1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 #include <signal.h>
19 #include <stdint.h>
20 #include <sys/prctl.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 #include <sys/wait.h>
24 #include <unistd.h>
25 
26 #include <chrono>
27 #include <iostream>
28 #include <string>
29 
30 #include <android-base/properties.h>
31 #include <gtest/gtest.h>
32 #include <log/log_time.h>  // for MS_PER_SEC and US_PER_SEC
33 
34 #include "llkd.h"
35 
36 using namespace std::chrono;
37 using namespace std::chrono_literals;
38 
39 namespace {
40 
GetUintProperty(const std::string & key,milliseconds def)41 milliseconds GetUintProperty(const std::string& key, milliseconds def) {
42     return milliseconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
43                                                        static_cast<uint64_t>(def.max().count())));
44 }
45 
GetUintProperty(const std::string & key,seconds def)46 seconds GetUintProperty(const std::string& key, seconds def) {
47     return seconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
48                                                   static_cast<uint64_t>(def.max().count())));
49 }
50 
51 // GTEST_LOG_(WARNING) output is fugly, this has much less noise
52 // ToDo: look into fixing googletest to produce output that matches style of
53 //       all the other status messages, and can switch off __line__ and
54 //       __function__ noise
55 #define GTEST_LOG_WARNING std::cerr << "[ WARNING  ] "
56 #define GTEST_LOG_INFO std::cerr << "[   INFO   ] "
57 
58 // Properties is _not_ a high performance ABI!
rest()59 void rest() {
60     usleep(200000);
61 }
62 
execute(const char * command)63 void execute(const char* command) {
64     if (getuid() || system(command)) {
65         system((std::string("su root ") + command).c_str());
66     }
67 }
68 
llkdSleepPeriod(char state)69 seconds llkdSleepPeriod(char state) {
70     auto default_eng = android::base::GetProperty(LLK_ENABLE_PROPERTY, "eng") == "eng";
71     auto default_enable = LLK_ENABLE_DEFAULT;
72     if (!LLK_ENABLE_DEFAULT && default_eng &&
73         android::base::GetBoolProperty("ro.debuggable", false)) {
74         default_enable = true;
75     }
76     default_enable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, default_enable);
77     if (default_eng) {
78         GTEST_LOG_INFO << LLK_ENABLE_PROPERTY " defaults to \"eng\" thus "
79                        << (default_enable ? "true" : "false") << "\n";
80     }
81     // Hail Mary hope is unconfigured.
82     if ((GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, LLK_TIMEOUT_MS_DEFAULT) !=
83          duration_cast<milliseconds>(120s)) ||
84         (GetUintProperty(LLK_CHECK_MS_PROPERTY,
85                          LLK_TIMEOUT_MS_DEFAULT / LLK_CHECKS_PER_TIMEOUT_DEFAULT) !=
86          duration_cast<milliseconds>(10s))) {
87         execute("stop llkd-0");
88         execute("stop llkd-1");
89         rest();
90         std::string setprop("setprop ");
91         // Manually check that SyS_openat is _added_ to the list when restarted
92         // 4.19+ kernels report __arm64_sys_openat b/147486902
93         execute((setprop + LLK_CHECK_STACK_PROPERTY + " ,SyS_openat,__arm64_sys_openat").c_str());
94         rest();
95         execute((setprop + LLK_ENABLE_WRITEABLE_PROPERTY + " false").c_str());
96         rest();
97         execute((setprop + LLK_TIMEOUT_MS_PROPERTY + " 120000").c_str());
98         rest();
99         execute((setprop + KHT_TIMEOUT_PROPERTY + " 130").c_str());
100         rest();
101         execute((setprop + LLK_CHECK_MS_PROPERTY + " 10000").c_str());
102         rest();
103         if (!default_enable) {
104             execute((setprop + LLK_ENABLE_PROPERTY + " true").c_str());
105             rest();
106         }
107         execute((setprop + LLK_ENABLE_WRITEABLE_PROPERTY + " true").c_str());
108         rest();
109     }
110     default_enable = LLK_ENABLE_DEFAULT;
111     if (!LLK_ENABLE_DEFAULT && (android::base::GetProperty(LLK_ENABLE_PROPERTY, "eng") == "eng") &&
112         android::base::GetBoolProperty("ro.debuggable", false)) {
113         default_enable = true;
114     }
115     default_enable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, default_enable);
116     if (default_enable) {
117         execute("start llkd-1");
118         rest();
119         GTEST_LOG_INFO << "llkd enabled\n";
120     } else {
121         GTEST_LOG_WARNING << "llkd disabled\n";
122     }
123 
124     /* KISS follows llk_init() */
125     milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT;
126     seconds khtTimeout = duration_cast<seconds>(
127         llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) / LLK_CHECKS_PER_TIMEOUT_DEFAULT);
128     khtTimeout = GetUintProperty(KHT_TIMEOUT_PROPERTY, khtTimeout);
129     llkTimeoutMs =
130         khtTimeout * LLK_CHECKS_PER_TIMEOUT_DEFAULT / (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT);
131     llkTimeoutMs = GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
132     if (llkTimeoutMs < LLK_TIMEOUT_MS_MINIMUM) {
133         llkTimeoutMs = LLK_TIMEOUT_MS_MINIMUM;
134     }
135     milliseconds llkCheckMs = llkTimeoutMs / LLK_CHECKS_PER_TIMEOUT_DEFAULT;
136     auto timeout = GetUintProperty((state == 'Z') ? LLK_Z_TIMEOUT_MS_PROPERTY
137                                                   : (state == 'S') ? LLK_STACK_TIMEOUT_MS_PROPERTY
138                                                                    : LLK_D_TIMEOUT_MS_PROPERTY,
139                                    llkTimeoutMs);
140     if (timeout < LLK_TIMEOUT_MS_MINIMUM) {
141         timeout = LLK_TIMEOUT_MS_MINIMUM;
142     }
143 
144     if (llkCheckMs > timeout) {
145         llkCheckMs = timeout;
146     }
147     llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs);
148     timeout += llkCheckMs;
149     auto sec = duration_cast<seconds>(timeout);
150     if (sec == 0s) {
151         ++sec;
152     } else if (sec > 59s) {
153         GTEST_LOG_WARNING << "llkd is configured for about " << duration_cast<minutes>(sec).count()
154                           << " minutes to react\n";
155     }
156 
157     // 33% margin for the test to naturally timeout waiting for llkd to respond
158     return (sec * 4 + 2s) / 3;
159 }
160 
waitForPid(pid_t child_pid)161 inline void waitForPid(pid_t child_pid) {
162     int wstatus;
163     ASSERT_LE(0, waitpid(child_pid, &wstatus, 0));
164     EXPECT_FALSE(WIFEXITED(wstatus)) << "[   INFO   ] exit=" << WEXITSTATUS(wstatus);
165     ASSERT_TRUE(WIFSIGNALED(wstatus));
166     ASSERT_EQ(WTERMSIG(wstatus), SIGKILL);
167 }
168 
checkKill(const char * reason)169 bool checkKill(const char* reason) {
170     if (android::base::GetBoolProperty(LLK_KILLTEST_PROPERTY, LLK_KILLTEST_DEFAULT)) {
171         return false;
172     }
173     auto bootreason = android::base::GetProperty("sys.boot.reason", "nothing");
174     if (bootreason == reason) {
175         GTEST_LOG_INFO << "Expected test result confirmed " << reason << "\n";
176         return true;
177     }
178     GTEST_LOG_WARNING << "Expected test result is " << reason << "\n";
179 
180     // apct adjustment if needed (set LLK_KILLTEST_PROPERTY to "off" to allow test)
181     //
182     // if (android::base::GetProperty(LLK_KILLTEST_PROPERTY, "") == "false") {
183     //     GTEST_LOG_WARNING << "Bypassing test\n";
184     //     return true;
185     // }
186 
187     return false;
188 }
189 
190 }  // namespace
191 
192 // The tests that use this helper are to simulate processes stuck in 'D'
193 // state that are experiencing forward scheduled progress. As such the
194 // expectation is that llkd will _not_ perform any mitigations. The sleepfor
195 // argument helps us set the amount of forward scheduler progress.
llkd_driver_ABA(const microseconds sleepfor)196 static void llkd_driver_ABA(const microseconds sleepfor) {
197     const auto period = llkdSleepPeriod('D');
198     if (period <= sleepfor) {
199         GTEST_LOG_WARNING << "llkd configuration too short for "
200                           << duration_cast<milliseconds>(sleepfor).count() << "ms work cycle\n";
201         return;
202     }
203 
204     auto child_pid = fork();
205     ASSERT_LE(0, child_pid);
206     int wstatus;
207     if (!child_pid) {
208         auto ratio = period / sleepfor;
209         ASSERT_LT(0, ratio);
210         // vfork() parent is uninterruptable D state waiting for child to exec()
211         while (--ratio > 0) {
212             auto driver_pid = vfork();
213             ASSERT_LE(0, driver_pid);
214             if (driver_pid) {  // parent
215                 waitpid(driver_pid, &wstatus, 0);
216                 if (!WIFEXITED(wstatus)) {
217                     exit(42);
218                 }
219                 if (WEXITSTATUS(wstatus) != 42) {
220                     exit(42);
221                 }
222             } else {
223                 usleep(sleepfor.count());
224                 exit(42);
225             }
226         }
227         exit(0);
228     }
229     ASSERT_LE(0, waitpid(child_pid, &wstatus, 0));
230     EXPECT_TRUE(WIFEXITED(wstatus));
231     if (WIFEXITED(wstatus)) {
232         EXPECT_EQ(0, WEXITSTATUS(wstatus));
233     }
234     ASSERT_FALSE(WIFSIGNALED(wstatus)) << "[   INFO   ] signo=" << WTERMSIG(wstatus);
235 }
236 
TEST(llkd,driver_ABA_fast)237 TEST(llkd, driver_ABA_fast) {
238     llkd_driver_ABA(5ms);
239 }
240 
TEST(llkd,driver_ABA_slow)241 TEST(llkd, driver_ABA_slow) {
242     llkd_driver_ABA(1s);
243 }
244 
TEST(llkd,driver_ABA_glacial)245 TEST(llkd, driver_ABA_glacial) {
246     llkd_driver_ABA(1min);
247 }
248 
249 // Following tests must be last in this file to capture possible errant
250 // kernel_panic mitigation failure.
251 
252 // The following tests simulate processes stick in 'Z' or 'D' state with
253 // no forward scheduling progress, but interruptible. As such the expectation
254 // is that llkd will perform kill mitigation and not progress to kernel_panic.
255 
TEST(llkd,zombie)256 TEST(llkd, zombie) {
257     if (checkKill("kernel_panic,sysrq,livelock,zombie")) {
258         return;
259     }
260 
261     const auto period = llkdSleepPeriod('Z');
262 
263     /* Create a Persistent Zombie Process */
264     pid_t child_pid = fork();
265     ASSERT_LE(0, child_pid);
266     if (!child_pid) {
267         auto zombie_pid = fork();
268         ASSERT_LE(0, zombie_pid);
269         if (!zombie_pid) {
270             sleep(1);
271             exit(0);
272         }
273         sleep(period.count());
274         exit(42);
275     }
276 
277     waitForPid(child_pid);
278 }
279 
TEST(llkd,driver)280 TEST(llkd, driver) {
281     if (checkKill("kernel_panic,sysrq,livelock,driver")) {
282         return;
283     }
284 
285     const auto period = llkdSleepPeriod('D');
286 
287     /* Create a Persistent Device Process */
288     auto child_pid = fork();
289     ASSERT_LE(0, child_pid);
290     if (!child_pid) {
291         // vfork() parent is uninterruptable D state waiting for child to exec()
292         auto driver_pid = vfork();
293         ASSERT_LE(0, driver_pid);
294         sleep(period.count());
295         exit(driver_pid ? 42 : 0);
296     }
297 
298     waitForPid(child_pid);
299 }
300 
TEST(llkd,sleep)301 TEST(llkd, sleep) {
302     if (checkKill("kernel_panic,sysrq,livelock,sleeping")) {
303         return;
304     }
305     if (!android::base::GetBoolProperty("ro.debuggable", false)) {
306         GTEST_LOG_WARNING << "Features not available on user builds\n";
307     }
308 
309     const auto period = llkdSleepPeriod('S');
310 
311     /* Create a Persistent SyS_openat for single-ended pipe */
312     static constexpr char stack_pipe_file[] = "/dev/stack_pipe_file";
313     unlink(stack_pipe_file);
314     auto pipe_ret = mknod(stack_pipe_file, S_IFIFO | 0666, 0);
315     ASSERT_LE(0, pipe_ret);
316 
317     auto child_pid = fork();
318     ASSERT_LE(0, child_pid);
319     if (!child_pid) {
320         child_pid = fork();
321         ASSERT_LE(0, child_pid);
322         if (!child_pid) {
323             sleep(period.count());
324             auto fd = open(stack_pipe_file, O_RDONLY | O_CLOEXEC);
325             close(fd);
326             exit(0);
327         } else {
328             auto fd = open(stack_pipe_file, O_WRONLY | O_CLOEXEC);
329             close(fd);
330             exit(42);
331         }
332     }
333 
334     waitForPid(child_pid);
335 
336     unlink(stack_pipe_file);
337 }
338 
339 // b/120983740
TEST(llkd,adbd_and_setsid)340 TEST(llkd, adbd_and_setsid) {
341     if (checkKill("kernel_panic,sysrq,livelock,zombie")) {
342         return;
343     }
344     const auto period = llkdSleepPeriod('S');
345 
346     // expect llkd.zombie to trigger, but not for adbd&[setsid]
347     // Create a Persistent Zombie setsid Process
348     pid_t child_pid = fork();
349     ASSERT_LE(0, child_pid);
350     if (!child_pid) {
351         prctl(PR_SET_NAME, "adbd");
352         auto zombie_pid = fork();
353         ASSERT_LE(0, zombie_pid);
354         if (!zombie_pid) {
355             prctl(PR_SET_NAME, "setsid");
356             sleep(1);
357             exit(0);
358         }
359         sleep(period.count());
360         exit(42);
361     }
362 
363     // Reverse of waitForPid, do _not_ expect kill
364     int wstatus;
365     ASSERT_LE(0, waitpid(child_pid, &wstatus, 0));
366     EXPECT_TRUE(WIFEXITED(wstatus));
367     if (WIFEXITED(wstatus)) {
368         EXPECT_EQ(42, WEXITSTATUS(wstatus));
369     }
370     ASSERT_FALSE(WIFSIGNALED(wstatus)) << "[   INFO   ] signo=" << WTERMSIG(wstatus);
371 }
372