1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "createns"
18 #include <log/log.h>
19
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <inttypes.h>
23 #include <sched.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mount.h>
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <unistd.h>
30
31 #include <limits>
32 #include <string>
33 #include <vector>
34
35 static const char kNamespacePath[] = "/data/vendor/var/run/netns/";
36 static const char kProcNsNet[] = "/proc/self/ns/net";
37
38 class Fd {
39 public:
Fd(int fd)40 explicit Fd(int fd) : mFd(fd) { }
41 Fd(const Fd&) = delete;
~Fd()42 ~Fd() {
43 if (mFd != -1) {
44 ::close(mFd);
45 mFd = -1;
46 }
47 }
48
get() const49 int get() const { return mFd; }
50 Fd& operator=(const Fd&) = delete;
51 private:
52 int mFd;
53 };
54
usage(const char * program)55 static void usage(const char* program) {
56 ALOGE("%s <namespace>", program);
57 }
58
removeFile(const char * file)59 static bool removeFile(const char* file) {
60 if (::unlink(file) == -1) {
61 ALOGE("Failed to unlink file '%s': %s", file, strerror(errno));
62 return false;
63 }
64 return true;
65 }
66
getNamespacePath(const char * name)67 static std::string getNamespacePath(const char* name) {
68 size_t len = strlen(name);
69 if (len == 0) {
70 ALOGE("Must provide a namespace argument that is not empty");
71 return std::string();
72 }
73
74 if (std::numeric_limits<size_t>::max() - sizeof(kNamespacePath) < len) {
75 // The argument is so big the resulting string can't fit in size_t
76 ALOGE("Namespace argument too long");
77 return std::string();
78 }
79
80 std::vector<char> nsPath(sizeof(kNamespacePath) + len);
81 size_t totalSize = strlcpy(nsPath.data(), kNamespacePath, nsPath.size());
82 if (totalSize >= nsPath.size()) {
83 // The resulting string had to be concatenated to fit, this is a logic
84 // error in the code above that determines the size of the data.
85 ALOGE("Could not create namespace path");
86 return std::string();
87 }
88 totalSize = strlcat(nsPath.data(), name, nsPath.size());
89 if (totalSize >= nsPath.size()) {
90 // The resulting string had to be concatenated to fit, this is a logic
91 // error in the code above that determines the size of the data.
92 ALOGE("Could not append to namespace path");
93 return std::string();
94 }
95 return nsPath.data();
96 }
97
writeNamespacePid(const char * name,pid_t pid)98 static bool writeNamespacePid(const char* name, pid_t pid) {
99 std::string path = getNamespacePath(name);
100 if (path.empty()) {
101 return false;
102 }
103 path += ".pid";
104
105 Fd fd(::open(path.c_str(),
106 O_CREAT | O_TRUNC | O_WRONLY | O_CLOEXEC,
107 S_IRUSR | S_IWUSR | S_IRGRP));
108 if (fd.get() == -1) {
109 ALOGE("Unable to create file '%s': %s", path.c_str(), strerror(errno));
110 return false;
111 }
112
113 // In order to safely print a pid_t we use int64_t with a known format
114 // specifier. Ensure that a pid_t will fit in a pid_t. According to POSIX
115 // pid_t is signed.
116 static_assert(sizeof(pid_t) <= sizeof(int64_t),
117 "pid_t is larger than int64_t");
118 char pidString[32];
119 int printed = snprintf(pidString,
120 sizeof(pidString),
121 "%" PRId64,
122 static_cast<int64_t>(pid));
123 if (printed <= 0) {
124 ALOGE("Unabled to created PID string for writing");
125 removeFile(path.c_str());
126 return false;
127 }
128
129 const char* toPrint = pidString;
130 int remaining = printed;
131 for (;;) {
132 int result = ::write(fd.get(), toPrint, remaining);
133 if (result < 0) {
134 if (errno == EINTR) {
135 continue;
136 }
137 ALOGE("Unable to write pid to file %s: %s",
138 path.c_str(), strerror(errno));
139 removeFile(path.c_str());
140 return false;
141 } else if (result < printed) {
142 remaining -= result;
143 toPrint += result;
144 } else {
145 break;
146 }
147 }
148 return true;
149 }
150
daemonize(int fd)151 static pid_t daemonize(int fd) {
152 // This convoluted way of demonizing the process is described in
153 // man (7) daemon.
154
155 // (1) Close all files, we don't have any open files at this point
156 // (2) Reset all signal handlers to default, they already are
157 // (3) Reset the signal mask, we never changed it
158 // (4) Sanitize environment block, we didn't change the environment
159 // (5) Call fork
160 pid_t pid = ::fork();
161 if (pid != 0) {
162 // In the parent, nothing more to do
163 return pid;
164 }
165
166 // (6) Acquire a new session to detach from terminal
167 ::setsid();
168
169 // (7) Fork again to avoid the daemon being attached to a terminal again
170 pid = ::fork();
171 if (pid != 0) {
172 // (8) This is the first child, needs to call exit
173 exit(0);
174 return pid;
175 }
176 // (9) Connect /dev/null to stdin, stdout, stderr
177 ::close(STDIN_FILENO);
178 ::close(STDOUT_FILENO);
179 ::close(STDERR_FILENO);
180 // Since open will always reuse the lowest available fd and we have closed
181 // every single fd at this point we can just open them in the correct order.
182 if (::open("/dev/null", O_RDONLY) == -1) {
183 ALOGE("Unable to open /dev/null as stdin");
184 }
185 if (::open("/dev/null", O_WRONLY) == -1) {
186 ALOGE("Unable to open /dev/null as stdout");
187 }
188 if (::open("/dev/null", O_WRONLY) == -1) {
189 ALOGE("Unable to open /dev/null as stderr");
190 }
191 // (10) Reset umask to zero
192 ::umask(0);
193 // (11) Change directory to root (/)
194 if (::chdir("/") != 0) {
195 ALOGE("Failed to set working directory to root: %s", strerror(errno));
196 }
197 // (12) Write the pid of the daemon to a file, we're passing this to
198 // the process that starts the daemon to ensure that the pid file exists
199 // once that process exits. Atomicity is guaranteed by that write requiring
200 // that the pid file does not exist to begin with.
201 pid = ::getpid();
202 if (::write(fd, &pid, sizeof(pid)) != sizeof(pid)) {
203 ALOGE("Unable to write pid to pipe: %s", strerror(errno));
204 ::close(fd);
205 exit(1);
206 }
207 ::close(fd);
208 // (13) Drop privileges, doing this causes problems for execns when it's
209 // trying to open the proc/ns/net file of this process so we can't do that.
210 // (14) Notify the starting process that the daemon is running, this is done
211 // in step (12) above.
212 // (15) Exit starting process happens in main where it returns.
213 return 0;
214 }
215
main(int argc,char * argv[])216 int main(int argc, char* argv[]) {
217 if (argc != 2) {
218 usage(argv[0]);
219 return 1;
220 }
221 int fds[2];
222 if (::pipe2(fds, O_CLOEXEC) != 0) {
223 ALOGE("Failed to create pipe: %s", strerror(errno));
224 return 1;
225 }
226
227 Fd readPipe(fds[0]);
228 Fd writePipe(fds[1]);
229
230 if (::unshare(CLONE_NEWNET) != 0) {
231 ALOGE("Failed to create network namespace '%s': %s",
232 argv[1],
233 strerror(errno));
234 return 1;
235 }
236
237 std::string path = getNamespacePath(argv[1]);
238 if (path.empty()) {
239 return 1;
240 }
241 {
242 // Open and then immediately close the fd
243 Fd fd(::open(path.c_str(), O_CREAT | O_TRUNC | O_RDONLY | O_CLOEXEC,
244 S_IRUSR | S_IWUSR | S_IRGRP));
245 if (fd.get() == -1) {
246 ALOGE("Failed to open file %s: %s", path.c_str(), strerror(errno));
247 return 1;
248 }
249 }
250 if (::mount(kProcNsNet, path.c_str(), nullptr, MS_BIND, nullptr) != 0) {
251 ALOGE("Failed to bind %s to %s: %s",
252 kProcNsNet,
253 path.c_str(),
254 strerror(errno));
255 // Clean up on failure
256 removeFile(path.c_str());
257 return 1;
258 }
259
260 // At this point we fork. This way we keep a process in the namespace alive
261 // without this command being blocking. This is valuable because it allows
262 // us to write the pid to a file before we exit. That way we can guarantee
263 // that after this command completes there is a pid to be read, there is no
264 // asynchronous behavior going on.
265 pid_t pid = daemonize(writePipe.get());
266 if (pid == 0) {
267 // In the child
268 for (;;) {
269 pause();
270 }
271 } else {
272 // In the parent, read the pid of the daemon from the pipe and write it
273 // to a file.
274 pid_t child = 0;
275 if (::read(readPipe.get(), &child, sizeof(child)) != sizeof(child)) {
276 ALOGE("Failed to read child PID from pipe: %s", strerror(errno));
277 return 1;
278 }
279 if (!writeNamespacePid(argv[1], child)) {
280 return 1;
281 }
282 }
283
284 return 0;
285 }
286
287