1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "IptablesRestoreController.h"
18 
19 #include <poll.h>
20 #include <signal.h>
21 #include <sys/wait.h>
22 #include <unistd.h>
23 
24 #define LOG_TAG "IptablesRestoreController"
25 #include <android-base/logging.h>
26 #include <android-base/file.h>
27 #include <netdutils/Syscalls.h>
28 
29 #include "Controllers.h"
30 
31 using android::netdutils::StatusOr;
32 using android::netdutils::sSyscalls;
33 
34 constexpr char IPTABLES_RESTORE_PATH[] = "/system/bin/iptables-restore";
35 constexpr char IP6TABLES_RESTORE_PATH[] = "/system/bin/ip6tables-restore";
36 
37 constexpr char PING[] = "#PING\n";
38 
39 constexpr size_t PING_SIZE = sizeof(PING) - 1;
40 
41 // Not compile-time constants because they are changed by the unit tests.
42 int IptablesRestoreController::MAX_RETRIES = 50;
43 int IptablesRestoreController::POLL_TIMEOUT_MS = 100;
44 
45 class IptablesProcess {
46 public:
IptablesProcess(pid_t pid,int stdIn,int stdOut,int stdErr)47     IptablesProcess(pid_t pid, int stdIn, int stdOut, int stdErr) :
48         pid(pid),
49         stdIn(stdIn),
50         processTerminated(false) {
51 
52         pollFds[STDOUT_IDX] = { .fd = stdOut, .events = POLLIN };
53         pollFds[STDERR_IDX] = { .fd = stdErr, .events = POLLIN };
54     }
55 
~IptablesProcess()56     ~IptablesProcess() {
57         close(stdIn);
58         close(pollFds[STDOUT_IDX].fd);
59         close(pollFds[STDERR_IDX].fd);
60     }
61 
outputReady()62     bool outputReady() {
63         struct pollfd pollfd = { .fd = stdIn, .events = POLLOUT };
64         int ret = poll(&pollfd, 1, 0);
65         if (ret == -1) {
66             ALOGE("outputReady poll failed: %s", strerror(errno));
67             return false;
68         }
69         return (ret == 1) && !(pollfd.revents & POLLERR);
70     }
71 
stop()72     void stop() {
73         if (processTerminated) return;
74 
75         // This can be called by drainAndWaitForAck (after a POLLHUP) or by sendCommand (if the
76         // process was killed by something else on the system). In both cases, it's safe to send the
77         // PID a SIGTERM, because the PID continues to exist until its parent (i.e., us) calls
78         // waitpid on it, so there's no risk that the PID is reused.
79         int err = kill(pid, SIGTERM);
80         if (err) {
81             err = errno;
82         }
83 
84         if (err == ESRCH) {
85             // This means that someone else inside netd but outside this class called waitpid(),
86             // which is a programming error. There's no point in calling waitpid() here since we
87             // know that the process is gone.
88             ALOGE("iptables child process %d unexpectedly disappeared", pid);
89             processTerminated = true;
90             return;
91         }
92 
93         if (err) {
94             ALOGE("Error killing iptables child process %d: %s", pid, strerror(err));
95         }
96 
97         int status;
98         if (waitpid(pid, &status, 0) == -1) {
99             ALOGE("Error waiting for iptables child process %d: %s", pid, strerror(errno));
100         } else {
101             ALOGW("iptables-restore process %d terminated status=%d", pid, status);
102         }
103 
104         processTerminated = true;
105     }
106 
107     const pid_t pid;  // NOLINT(misc-non-private-member-variables-in-classes)
108     const int stdIn;  // NOLINT(misc-non-private-member-variables-in-classes)
109 
110     struct pollfd pollFds[2];
111     std::string errBuf;
112 
113     std::atomic_bool processTerminated;
114 
115     static constexpr size_t STDOUT_IDX = 0;
116     static constexpr size_t STDERR_IDX = 1;
117 };
118 
IptablesRestoreController()119 IptablesRestoreController::IptablesRestoreController() {
120     Init();
121 }
122 
~IptablesRestoreController()123 IptablesRestoreController::~IptablesRestoreController() {
124 }
125 
Init()126 void IptablesRestoreController::Init() {
127     // We cannot fork these in parallel or a child process could inherit the pipe fds intended for
128     // use by the other child process. see https://android-review.googlesource.com/469559 for what
129     // breaks. This does not cause a latency hit, because the parent only has to wait for
130     // forkAndExec, which is sub-millisecond, and the child processes then call exec() in parallel.
131     mIpRestore.reset(forkAndExec(IPTABLES_PROCESS));
132     mIp6Restore.reset(forkAndExec(IP6TABLES_PROCESS));
133 }
134 
135 /* static */
forkAndExec(const IptablesProcessType type)136 IptablesProcess* IptablesRestoreController::forkAndExec(const IptablesProcessType type) {
137     const char* const cmd = (type == IPTABLES_PROCESS) ?
138         IPTABLES_RESTORE_PATH : IP6TABLES_RESTORE_PATH;
139 
140     // Create the pipes we'll use for communication with the child
141     // process. One each for the child's in, out and err files.
142     int stdin_pipe[2];
143     int stdout_pipe[2];
144     int stderr_pipe[2];
145 
146     if (pipe2(stdin_pipe,  O_CLOEXEC) == -1 ||
147         pipe2(stdout_pipe, O_NONBLOCK | O_CLOEXEC) == -1 ||
148         pipe2(stderr_pipe, O_NONBLOCK | O_CLOEXEC) == -1) {
149 
150         ALOGE("pipe2() failed: %s", strerror(errno));
151         return nullptr;
152     }
153 
154     const auto& sys = sSyscalls.get();
155     StatusOr<pid_t> child_pid = sys.fork();
156     if (!isOk(child_pid)) {
157         ALOGE("fork() failed: %s", strerror(child_pid.status().code()));
158         return nullptr;
159     }
160 
161     if (child_pid.value() == 0) {
162         // The child process. Reads from stdin, writes to stderr and stdout.
163 
164         // stdin_pipe[0] : The read end of the stdin pipe.
165         // stdout_pipe[1] : The write end of the stdout pipe.
166         // stderr_pipe[1] : The write end of the stderr pipe.
167         if (dup2(stdin_pipe[0], 0) == -1 ||
168             dup2(stdout_pipe[1], 1) == -1 ||
169             dup2(stderr_pipe[1], 2) == -1) {
170             ALOGE("dup2() failed: %s", strerror(errno));
171             abort();
172         }
173 
174         if (execl(cmd,
175                   cmd,
176                   "--noflush",  // Don't flush the whole table.
177                   "-w",         // Wait instead of failing if the lock is held.
178                   "-v",         // Verbose mode, to make sure our ping is echoed
179                                 // back to us.
180                   nullptr) == -1) {
181             ALOGE("execl(%s, ...) failed: %s", cmd, strerror(errno));
182             abort();
183         }
184 
185         // This statement is unreachable. We abort() upon error, and execl
186         // if everything goes well.
187         return nullptr;
188     }
189 
190     // The parent process. Writes to stdout and stderr and reads from stdin.
191     // stdin_pipe[0] : The read end of the stdin pipe.
192     // stdout_pipe[1] : The write end of the stdout pipe.
193     // stderr_pipe[1] : The write end of the stderr pipe.
194     if (close(stdin_pipe[0]) == -1 ||
195         close(stdout_pipe[1]) == -1 ||
196         close(stderr_pipe[1]) == -1) {
197         ALOGW("close() failed: %s", strerror(errno));
198     }
199 
200     return new IptablesProcess(child_pid.value(), stdin_pipe[1], stdout_pipe[0], stderr_pipe[0]);
201 }
202 
203 // TODO: Return -errno on failure instead of -1.
204 // TODO: Maybe we should keep a rotating buffer of the last N commands
205 // so that they can be dumped on dumpsys.
sendCommand(const IptablesProcessType type,const std::string & command,std::string * output)206 int IptablesRestoreController::sendCommand(const IptablesProcessType type,
207                                            const std::string& command,
208                                            std::string *output) {
209    std::unique_ptr<IptablesProcess> *process =
210            (type == IPTABLES_PROCESS) ? &mIpRestore : &mIp6Restore;
211 
212 
213     // We might need to fork a new process if we haven't forked one yet, or
214     // if the forked process terminated.
215     //
216     // NOTE: For a given command, this is the last point at which we try to
217     // recover from a child death. If the child dies at some later point during
218     // the execution of this method, we will receive an EPIPE and return an
219     // error. The command will then need to be retried at a higher level.
220     IptablesProcess *existingProcess = process->get();
221     if (existingProcess != nullptr && !existingProcess->outputReady()) {
222         existingProcess->stop();
223         existingProcess = nullptr;
224     }
225 
226     if (existingProcess == nullptr) {
227         // Fork a new iptables[6]-restore process.
228         IptablesProcess *newProcess = IptablesRestoreController::forkAndExec(type);
229         if (newProcess == nullptr) {
230             LOG(ERROR) << "Unable to fork ip[6]tables-restore, type: " << type;
231             return -1;
232         }
233 
234         process->reset(newProcess);
235     }
236 
237     if (!android::base::WriteFully((*process)->stdIn, command.data(), command.length())) {
238         ALOGE("Unable to send command: %s", strerror(errno));
239         return -1;
240     }
241 
242     if (!android::base::WriteFully((*process)->stdIn, PING, PING_SIZE)) {
243         ALOGE("Unable to send ping command: %s", strerror(errno));
244         return -1;
245     }
246 
247     if (!drainAndWaitForAck(*process, command, output)) {
248         // drainAndWaitForAck has already logged an error.
249         return -1;
250     }
251 
252     return 0;
253 }
254 
maybeLogStderr(const std::unique_ptr<IptablesProcess> & process,const std::string & command)255 void IptablesRestoreController::maybeLogStderr(const std::unique_ptr<IptablesProcess> &process,
256                                                const std::string& command) {
257     if (process->errBuf.empty()) {
258         return;
259     }
260 
261     ALOGE("iptables error:\n");
262     ALOGE("------- COMMAND -------\n");
263     ALOGE("%s\n", command.c_str());
264     ALOGE("-------  ERROR -------\n");
265     ALOGE("%s", process->errBuf.c_str());
266     ALOGE("----------------------\n");
267     process->errBuf.clear();
268 }
269 
270 /* static */
drainAndWaitForAck(const std::unique_ptr<IptablesProcess> & process,const std::string & command,std::string * output)271 bool IptablesRestoreController::drainAndWaitForAck(const std::unique_ptr<IptablesProcess> &process,
272                                                    const std::string& command,
273                                                    std::string *output) {
274     bool receivedAck = false;
275     int timeout = 0;
276     while (!receivedAck && (timeout++ < MAX_RETRIES)) {
277         int numEvents = TEMP_FAILURE_RETRY(
278             poll(process->pollFds, ARRAY_SIZE(process->pollFds), POLL_TIMEOUT_MS));
279         if (numEvents == -1) {
280             ALOGE("Poll failed: %s", strerror(errno));
281             return false;
282         }
283 
284         // We've timed out, which means something has gone wrong - we know that stdout should have
285         // become available to read with the ACK message, or that stderr should have been available
286         // to read with an error message.
287         if (numEvents == 0) {
288             continue;
289         }
290 
291         char buffer[PIPE_BUF];
292         for (size_t i = 0; i < ARRAY_SIZE(process->pollFds); ++i) {
293             const struct pollfd &pollfd = process->pollFds[i];
294             if (pollfd.revents & POLLIN) {
295                 ssize_t size;
296                 do {
297                     size = TEMP_FAILURE_RETRY(read(pollfd.fd, buffer, sizeof(buffer)));
298 
299                     if (size == -1) {
300                         if (errno != EAGAIN) {
301                             ALOGE("Unable to read from descriptor: %s", strerror(errno));
302                         }
303                         break;
304                     }
305 
306                     if (i == IptablesProcess::STDOUT_IDX) {
307                         // i == STDOUT_IDX: accumulate stdout into *output, and look
308                         // for the ping response.
309                         output->append(buffer, size);
310                         size_t pos = output->find(PING);
311                         if (pos != std::string::npos) {
312                             if (output->size() > pos + PING_SIZE) {
313                                 size_t extra = output->size() - (pos + PING_SIZE);
314                                 ALOGW("%zd extra characters after iptables response: '%s...'",
315                                       extra, output->substr(pos + PING_SIZE, 128).c_str());
316                             }
317                             output->resize(pos);
318                             receivedAck = true;
319                         }
320                     } else {
321                         // i == STDERR_IDX: accumulate stderr into errBuf.
322                         process->errBuf.append(buffer, size);
323                     }
324                 } while (size > 0);
325             }
326             if (pollfd.revents & POLLHUP) {
327                 // The pipe was closed. This likely means the subprocess is exiting, since
328                 // iptables-restore only closes stdin on error.
329                 process->stop();
330                 break;
331             }
332         }
333     }
334 
335     if (!receivedAck && !process->processTerminated) {
336         ALOGE("Timed out waiting for response from iptables process %d", process->pid);
337         // Kill the process so that if it eventually recovers, we don't misinterpret the ping
338         // response (or any output) of the command we just sent as coming from future commands.
339         process->stop();
340     }
341 
342     maybeLogStderr(process, command);
343 
344     return receivedAck;
345 }
346 
execute(const IptablesTarget target,const std::string & command,std::string * output)347 int IptablesRestoreController::execute(const IptablesTarget target, const std::string& command,
348                                        std::string *output) {
349     std::lock_guard lock(mLock);
350 
351     std::string buffer;
352     if (output == nullptr) {
353         output = &buffer;
354     } else {
355         output->clear();
356     }
357 
358     int res = 0;
359     if (target == V4 || target == V4V6) {
360         res |= sendCommand(IPTABLES_PROCESS, command, output);
361     }
362     if (target == V6 || target == V4V6) {
363         res |= sendCommand(IP6TABLES_PROCESS, command, output);
364     }
365     return res;
366 }
367 
getIpRestorePid(const IptablesProcessType type)368 int IptablesRestoreController::getIpRestorePid(const IptablesProcessType type) {
369     return type == IPTABLES_PROCESS ? mIpRestore->pid : mIp6Restore->pid;
370 }
371