1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "service_utils.h"
18
19 #include <fcntl.h>
20 #include <grp.h>
21 #include <sys/mount.h>
22 #include <sys/prctl.h>
23 #include <sys/wait.h>
24 #include <unistd.h>
25
26 #include <android-base/file.h>
27 #include <android-base/logging.h>
28 #include <android-base/properties.h>
29 #include <android-base/stringprintf.h>
30 #include <android-base/strings.h>
31 #include <cutils/android_get_control_file.h>
32 #include <cutils/sockets.h>
33 #include <processgroup/processgroup.h>
34
35 #include "mount_namespace.h"
36 #include "util.h"
37
38 using android::base::GetProperty;
39 using android::base::StartsWith;
40 using android::base::StringPrintf;
41 using android::base::unique_fd;
42 using android::base::WriteStringToFile;
43
44 namespace android {
45 namespace init {
46
47 namespace {
48
EnterNamespace(int nstype,const char * path)49 Result<void> EnterNamespace(int nstype, const char* path) {
50 auto fd = unique_fd{open(path, O_RDONLY | O_CLOEXEC)};
51 if (fd == -1) {
52 return ErrnoError() << "Could not open namespace at " << path;
53 }
54 if (setns(fd, nstype) == -1) {
55 return ErrnoError() << "Could not setns() namespace at " << path;
56 }
57 return {};
58 }
59
SetUpMountNamespace(bool remount_proc,bool remount_sys)60 Result<void> SetUpMountNamespace(bool remount_proc, bool remount_sys) {
61 constexpr unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
62
63 // Recursively remount / as MS_SLAVE like zygote does so that
64 // unmounting and mounting /proc doesn't interfere with the parent
65 // namespace's /proc mount. This will also prevent any other
66 // mounts/unmounts initiated by the service from interfering with the
67 // parent namespace but will still allow mount events from the parent
68 // namespace to propagate to the child.
69 if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
70 return ErrnoError() << "Could not remount(/) recursively as MS_SLAVE";
71 }
72
73 // umount() then mount() /proc and/or /sys
74 // Note that it is not sufficient to mount with MS_REMOUNT.
75 if (remount_proc) {
76 if (umount("/proc") == -1) {
77 return ErrnoError() << "Could not umount(/proc)";
78 }
79 if (mount("", "/proc", "proc", kSafeFlags, "") == -1) {
80 return ErrnoError() << "Could not mount(/proc)";
81 }
82 }
83 if (remount_sys) {
84 if (umount2("/sys", MNT_DETACH) == -1) {
85 return ErrnoError() << "Could not umount(/sys)";
86 }
87 if (mount("", "/sys", "sysfs", kSafeFlags, "") == -1) {
88 return ErrnoError() << "Could not mount(/sys)";
89 }
90 }
91 return {};
92 }
93
SetUpPidNamespace(const char * name)94 Result<void> SetUpPidNamespace(const char* name) {
95 if (prctl(PR_SET_NAME, name) == -1) {
96 return ErrnoError() << "Could not set name";
97 }
98
99 pid_t child_pid = fork();
100 if (child_pid == -1) {
101 return ErrnoError() << "Could not fork init inside the PID namespace";
102 }
103
104 if (child_pid > 0) {
105 // So that we exit with the right status.
106 static int init_exitstatus = 0;
107 signal(SIGTERM, [](int) { _exit(init_exitstatus); });
108
109 pid_t waited_pid;
110 int status;
111 while ((waited_pid = wait(&status)) > 0) {
112 // This loop will end when there are no processes left inside the
113 // PID namespace or when the init process inside the PID namespace
114 // gets a signal.
115 if (waited_pid == child_pid) {
116 init_exitstatus = status;
117 }
118 }
119 if (!WIFEXITED(init_exitstatus)) {
120 _exit(EXIT_FAILURE);
121 }
122 _exit(WEXITSTATUS(init_exitstatus));
123 }
124 return {};
125 }
126
SetupStdio(bool stdio_to_kmsg)127 void SetupStdio(bool stdio_to_kmsg) {
128 auto fd = unique_fd{open("/dev/null", O_RDWR | O_CLOEXEC)};
129 dup2(fd, STDIN_FILENO);
130 if (stdio_to_kmsg) {
131 fd.reset(open("/dev/kmsg_debug", O_WRONLY | O_CLOEXEC));
132 if (fd == -1) fd.reset(open("/dev/null", O_WRONLY | O_CLOEXEC));
133 }
134 dup2(fd, STDOUT_FILENO);
135 dup2(fd, STDERR_FILENO);
136 }
137
OpenConsole(const std::string & console)138 void OpenConsole(const std::string& console) {
139 auto fd = unique_fd{open(console.c_str(), O_RDWR | O_CLOEXEC)};
140 if (fd == -1) fd.reset(open("/dev/null", O_RDWR | O_CLOEXEC));
141 ioctl(fd, TIOCSCTTY, 0);
142 dup2(fd, 0);
143 dup2(fd, 1);
144 dup2(fd, 2);
145 }
146
147 } // namespace
148
Publish() const149 void Descriptor::Publish() const {
150 auto published_name = name_;
151
152 for (auto& c : published_name) {
153 c = isalnum(c) ? c : '_';
154 }
155
156 int fd = fd_.get();
157 // For safety, the FD is created as CLOEXEC, so that must be removed before publishing.
158 auto fd_flags = fcntl(fd, F_GETFD);
159 fd_flags &= ~FD_CLOEXEC;
160 if (fcntl(fd, F_SETFD, fd_flags) != 0) {
161 PLOG(ERROR) << "Failed to remove CLOEXEC from '" << published_name << "'";
162 }
163
164 std::string val = std::to_string(fd);
165 setenv(published_name.c_str(), val.c_str(), 1);
166 }
167
Create(const std::string & global_context) const168 Result<Descriptor> SocketDescriptor::Create(const std::string& global_context) const {
169 const auto& socket_context = context.empty() ? global_context : context;
170 auto result = CreateSocket(name, type | SOCK_CLOEXEC, passcred, perm, uid, gid, socket_context);
171 if (!result.ok()) {
172 return result.error();
173 }
174
175 return Descriptor(ANDROID_SOCKET_ENV_PREFIX + name, unique_fd(*result));
176 }
177
Create() const178 Result<Descriptor> FileDescriptor::Create() const {
179 int flags = (type == "r") ? O_RDONLY : (type == "w") ? O_WRONLY : O_RDWR;
180
181 // Make sure we do not block on open (eg: devices can chose to block on carrier detect). Our
182 // intention is never to delay launch of a service for such a condition. The service can
183 // perform its own blocking on carrier detect.
184 unique_fd fd(TEMP_FAILURE_RETRY(open(name.c_str(), flags | O_NONBLOCK | O_CLOEXEC)));
185
186 if (fd < 0) {
187 return ErrnoError() << "Failed to open file '" << name << "'";
188 }
189
190 // Fixup as we set O_NONBLOCK for open, the intent for fd is to block reads.
191 fcntl(fd, F_SETFL, flags);
192
193 LOG(INFO) << "Opened file '" << name << "', flags " << flags;
194
195 return Descriptor(ANDROID_FILE_ENV_PREFIX + name, std::move(fd));
196 }
197
EnterNamespaces(const NamespaceInfo & info,const std::string & name,std::optional<MountNamespace> override_mount_namespace)198 Result<void> EnterNamespaces(const NamespaceInfo& info, const std::string& name,
199 std::optional<MountNamespace> override_mount_namespace) {
200 for (const auto& [nstype, path] : info.namespaces_to_enter) {
201 if (auto result = EnterNamespace(nstype, path.c_str()); !result.ok()) {
202 return result;
203 }
204 }
205
206 #if defined(__ANDROID__)
207 if (override_mount_namespace.has_value()) {
208 if (auto result = SwitchToMountNamespaceIfNeeded(override_mount_namespace.value());
209 !result.ok()) {
210 return result;
211 }
212 }
213 #endif
214
215 if (info.flags & CLONE_NEWNS) {
216 bool remount_proc = info.flags & CLONE_NEWPID;
217 bool remount_sys =
218 std::any_of(info.namespaces_to_enter.begin(), info.namespaces_to_enter.end(),
219 [](const auto& entry) { return entry.first == CLONE_NEWNET; });
220 if (auto result = SetUpMountNamespace(remount_proc, remount_sys); !result.ok()) {
221 return result;
222 }
223 }
224
225 if (info.flags & CLONE_NEWPID) {
226 // This will fork again to run an init process inside the PID namespace.
227 if (auto result = SetUpPidNamespace(name.c_str()); !result.ok()) {
228 return result;
229 }
230 }
231
232 return {};
233 }
234
SetProcessAttributes(const ProcessAttributes & attr)235 Result<void> SetProcessAttributes(const ProcessAttributes& attr) {
236 if (attr.ioprio_class != IoSchedClass_NONE) {
237 if (android_set_ioprio(getpid(), attr.ioprio_class, attr.ioprio_pri)) {
238 PLOG(ERROR) << "failed to set pid " << getpid() << " ioprio=" << attr.ioprio_class
239 << "," << attr.ioprio_pri;
240 }
241 }
242
243 if (!attr.console.empty()) {
244 setsid();
245 OpenConsole(attr.console);
246 } else {
247 if (setpgid(0, getpid()) == -1) {
248 return ErrnoError() << "setpgid failed";
249 }
250 SetupStdio(attr.stdio_to_kmsg);
251 }
252
253 for (const auto& rlimit : attr.rlimits) {
254 if (setrlimit(rlimit.first, &rlimit.second) == -1) {
255 return ErrnoErrorf("setrlimit({}, {{rlim_cur={}, rlim_max={}}}) failed", rlimit.first,
256 rlimit.second.rlim_cur, rlimit.second.rlim_max);
257 }
258 }
259
260 if (attr.gid) {
261 if (setgid(attr.gid) != 0) {
262 return ErrnoError() << "setgid failed";
263 }
264 }
265 if (setgroups(attr.supp_gids.size(), const_cast<gid_t*>(&attr.supp_gids[0])) != 0) {
266 return ErrnoError() << "setgroups failed";
267 }
268 if (attr.uid) {
269 if (setuid(attr.uid) != 0) {
270 return ErrnoError() << "setuid failed";
271 }
272 }
273
274 if (attr.priority != 0) {
275 if (setpriority(PRIO_PROCESS, 0, attr.priority) != 0) {
276 return ErrnoError() << "setpriority failed";
277 }
278 }
279 return {};
280 }
281
WritePidToFiles(std::vector<std::string> * files)282 Result<void> WritePidToFiles(std::vector<std::string>* files) {
283 // See if there were "writepid" instructions to write to files under cpuset path.
284 std::string cpuset_path;
285 if (CgroupGetControllerPath("cpuset", &cpuset_path)) {
286 auto cpuset_predicate = [&cpuset_path](const std::string& path) {
287 return StartsWith(path, cpuset_path + "/");
288 };
289 auto iter = std::find_if(files->begin(), files->end(), cpuset_predicate);
290 if (iter == files->end()) {
291 // There were no "writepid" instructions for cpusets, check if the system default
292 // cpuset is specified to be used for the process.
293 std::string default_cpuset = GetProperty("ro.cpuset.default", "");
294 if (!default_cpuset.empty()) {
295 // Make sure the cpuset name starts and ends with '/'.
296 // A single '/' means the 'root' cpuset.
297 if (default_cpuset.front() != '/') {
298 default_cpuset.insert(0, 1, '/');
299 }
300 if (default_cpuset.back() != '/') {
301 default_cpuset.push_back('/');
302 }
303 files->push_back(
304 StringPrintf("%s%stasks", cpuset_path.c_str(), default_cpuset.c_str()));
305 }
306 }
307 } else {
308 LOG(ERROR) << "cpuset cgroup controller is not mounted!";
309 }
310 std::string pid_str = std::to_string(getpid());
311 for (const auto& file : *files) {
312 if (!WriteStringToFile(pid_str, file)) {
313 return ErrnoError() << "couldn't write " << pid_str << " to " << file;
314 }
315 }
316 return {};
317 }
318
319 } // namespace init
320 } // namespace android
321