1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "createns"
18 #include <log/log.h>
19 
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <inttypes.h>
23 #include <sched.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mount.h>
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <unistd.h>
30 
31 #include <limits>
32 #include <string>
33 #include <vector>
34 
35 static const char kNamespacePath[] = "/data/vendor/var/run/netns/";
36 static const char kProcNsNet[] = "/proc/self/ns/net";
37 
38 class Fd {
39 public:
Fd(int fd)40     explicit Fd(int fd) : mFd(fd) { }
41     Fd(const Fd&) = delete;
~Fd()42     ~Fd() {
43         if (mFd != -1) {
44             ::close(mFd);
45             mFd = -1;
46         }
47     }
48 
get() const49     int get() const { return mFd; }
50     Fd& operator=(const Fd&) = delete;
51 private:
52     int mFd;
53 };
54 
usage(const char * program)55 static void usage(const char* program) {
56     ALOGE("%s <namespace>", program);
57 }
58 
removeFile(const char * file)59 static bool removeFile(const char* file) {
60     if (::unlink(file) == -1) {
61         ALOGE("Failed to unlink file '%s': %s", file, strerror(errno));
62         return false;
63     }
64     return true;
65 }
66 
getNamespacePath(const char * name)67 static std::string getNamespacePath(const char* name) {
68     size_t len = strlen(name);
69     if (len == 0) {
70         ALOGE("Must provide a namespace argument that is not empty");
71         return std::string();
72     }
73 
74     if (std::numeric_limits<size_t>::max() - sizeof(kNamespacePath) < len) {
75         // The argument is so big the resulting string can't fit in size_t
76         ALOGE("Namespace argument too long");
77         return std::string();
78     }
79 
80     std::vector<char> nsPath(sizeof(kNamespacePath) + len);
81     size_t totalSize = strlcpy(nsPath.data(), kNamespacePath, nsPath.size());
82     if (totalSize >= nsPath.size()) {
83         // The resulting string had to be concatenated to fit, this is a logic
84         // error in the code above that determines the size of the data.
85         ALOGE("Could not create namespace path");
86         return std::string();
87     }
88     totalSize = strlcat(nsPath.data(), name, nsPath.size());
89     if (totalSize >= nsPath.size()) {
90         // The resulting string had to be concatenated to fit, this is a logic
91         // error in the code above that determines the size of the data.
92         ALOGE("Could not append to namespace path");
93         return std::string();
94     }
95     return nsPath.data();
96 }
97 
writeNamespacePid(const char * name,pid_t pid)98 static bool writeNamespacePid(const char* name, pid_t pid) {
99     std::string path = getNamespacePath(name);
100     if (path.empty()) {
101         return false;
102     }
103     path += ".pid";
104 
105     Fd fd(::open(path.c_str(),
106                  O_CREAT | O_TRUNC | O_WRONLY | O_CLOEXEC,
107                  S_IRUSR | S_IWUSR | S_IRGRP));
108     if (fd.get() == -1) {
109         ALOGE("Unable to create file '%s': %s", path.c_str(), strerror(errno));
110         return false;
111     }
112 
113     // In order to safely print a pid_t we use int64_t with a known format
114     // specifier. Ensure that a pid_t will fit in a pid_t. According to POSIX
115     // pid_t is signed.
116     static_assert(sizeof(pid_t) <= sizeof(int64_t),
117                   "pid_t is larger than int64_t");
118     char pidString[32];
119     int printed = snprintf(pidString,
120                            sizeof(pidString),
121                            "%" PRId64,
122                            static_cast<int64_t>(pid));
123     if (printed <= 0) {
124         ALOGE("Unabled to created PID string for writing");
125         removeFile(path.c_str());
126         return false;
127     }
128 
129     const char* toPrint = pidString;
130     int remaining = printed;
131     for (;;) {
132         int result = ::write(fd.get(), toPrint, remaining);
133         if (result < 0) {
134             if (errno == EINTR) {
135                 continue;
136             }
137             ALOGE("Unable to write pid to file %s: %s",
138                   path.c_str(), strerror(errno));
139             removeFile(path.c_str());
140             return false;
141         } else if (result < printed) {
142             remaining -= result;
143             toPrint += result;
144         } else {
145             break;
146         }
147     }
148     return true;
149 }
150 
daemonize(int fd)151 static pid_t daemonize(int fd) {
152     // This convoluted way of demonizing the process is described in
153     // man (7) daemon.
154 
155     // (1) Close all files, we don't have any open files at this point
156     // (2) Reset all signal handlers to default, they already are
157     // (3) Reset the signal mask, we never changed it
158     // (4) Sanitize environment block, we didn't change the environment
159     // (5) Call fork
160     pid_t pid = ::fork();
161     if (pid != 0) {
162         // In the parent, nothing more to do
163         return pid;
164     }
165 
166     // (6) Acquire a new session to detach from terminal
167     ::setsid();
168 
169     // (7) Fork again to avoid the daemon being attached to a terminal again
170     pid = ::fork();
171     if (pid != 0) {
172         // (8) This is the first child, needs to call exit
173         exit(0);
174         return pid;
175     }
176     // (9) Connect /dev/null to stdin, stdout, stderr
177     ::close(STDIN_FILENO);
178     ::close(STDOUT_FILENO);
179     ::close(STDERR_FILENO);
180     // Since open will always reuse the lowest available fd and we have closed
181     // every single fd at this point we can just open them in the correct order.
182     if (::open("/dev/null", O_RDONLY) == -1) {
183         ALOGE("Unable to open /dev/null as stdin");
184     }
185     if (::open("/dev/null", O_WRONLY) == -1) {
186         ALOGE("Unable to open /dev/null as stdout");
187     }
188     if (::open("/dev/null", O_WRONLY) == -1) {
189         ALOGE("Unable to open /dev/null as stderr");
190     }
191     // (10) Reset umask to zero
192     ::umask(0);
193     // (11) Change directory to root (/)
194     if (::chdir("/") != 0) {
195         ALOGE("Failed to set working directory to root: %s", strerror(errno));
196     }
197     // (12) Write the pid of the daemon to a file, we're passing this to
198     // the process that starts the daemon to ensure that the pid file exists
199     // once that process exits. Atomicity is guaranteed by that write requiring
200     // that the pid file does not exist to begin with.
201     pid = ::getpid();
202     if (::write(fd, &pid, sizeof(pid)) != sizeof(pid)) {
203         ALOGE("Unable to write pid to pipe: %s", strerror(errno));
204         ::close(fd);
205         exit(1);
206     }
207     ::close(fd);
208     // (13) Drop privileges, doing this causes problems for execns when it's
209     // trying to open the proc/ns/net file of this process so we can't do that.
210     // (14) Notify the starting process that the daemon is running, this is done
211     // in step (12) above.
212     // (15) Exit starting process happens in main where it returns.
213     return 0;
214 }
215 
main(int argc,char * argv[])216 int main(int argc, char* argv[]) {
217     if (argc != 2) {
218         usage(argv[0]);
219         return 1;
220     }
221     int fds[2];
222     if (::pipe2(fds, O_CLOEXEC) != 0) {
223         ALOGE("Failed to create pipe: %s", strerror(errno));
224         return 1;
225     }
226 
227     Fd readPipe(fds[0]);
228     Fd writePipe(fds[1]);
229 
230     if (::unshare(CLONE_NEWNET) != 0) {
231         ALOGE("Failed to create network namespace '%s': %s",
232               argv[1],
233               strerror(errno));
234         return 1;
235     }
236 
237     std::string path = getNamespacePath(argv[1]);
238     if (path.empty()) {
239         return 1;
240     }
241     {
242         // Open and then immediately close the fd
243         Fd fd(::open(path.c_str(), O_CREAT | O_TRUNC | O_RDONLY | O_CLOEXEC,
244                      S_IRUSR | S_IWUSR | S_IRGRP));
245         if (fd.get() == -1) {
246             ALOGE("Failed to open file %s: %s", path.c_str(), strerror(errno));
247             return 1;
248         }
249     }
250     if (::mount(kProcNsNet, path.c_str(), nullptr, MS_BIND, nullptr) != 0) {
251         ALOGE("Failed to bind %s to %s: %s",
252               kProcNsNet,
253               path.c_str(),
254               strerror(errno));
255         // Clean up on failure
256         removeFile(path.c_str());
257         return 1;
258     }
259 
260     // At this point we fork. This way we keep a process in the namespace alive
261     // without this command being blocking. This is valuable because it allows
262     // us to write the pid to a file before we exit. That way we can guarantee
263     // that after this command completes there is a pid to be read, there is no
264     // asynchronous behavior going on.
265     pid_t pid = daemonize(writePipe.get());
266     if (pid == 0) {
267         // In the child
268         for (;;) {
269             pause();
270         }
271     } else {
272         // In the parent, read the pid of the daemon from the pipe and write it
273         // to a file.
274         pid_t child = 0;
275         if (::read(readPipe.get(), &child, sizeof(child)) != sizeof(child)) {
276             ALOGE("Failed to read child PID from pipe: %s", strerror(errno));
277             return 1;
278         }
279         if (!writeNamespacePid(argv[1], child)) {
280             return 1;
281         }
282     }
283 
284     return 0;
285 }
286 
287