1 /*
2 * Copyright (c) 2015, The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google, Inc. nor the names of its contributors
15 * may be used to endorse or promote products derived from this
16 * software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <binder/IBinder.h>
33 #include <binder/IServiceManager.h>
34 #include <binder/Parcel.h>
35
36 #include <ctime>
37 #include <cutils/properties.h>
38 #include <signal.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42
43 #include <sys/resource.h>
44 #include <sys/stat.h>
45 #include <sys/time.h>
46 #include <sys/types.h>
47 #include <unistd.h>
48
49 #include <utils/Log.h>
50 #include <utils/String8.h>
51 #include <utils/Trace.h>
52 #include <zlib.h>
53
54 using namespace android;
55
56 #ifdef LOG_TAG
57 #undef LOG_TAG
58 #endif
59
60 #define LOG_TAG "anrdaemon"
61
62 static const int check_period = 1; // in sec
63 static const int tracing_check_period = 500000; // in micro sec
64 static const int cpu_stat_entries = 7; // number of cpu stat entries
65 static const int min_buffer_size = 16;
66 static const int max_buffer_size = 2048;
67 static const char *min_buffer_size_str = "16";
68 static const char *max_buffer_size_str = "2048";
69 static const int time_buf_size = 20;
70 static const int path_buf_size = 60;
71
72 typedef struct cpu_stat {
73 unsigned long utime, ntime, stime, itime;
74 unsigned long iowtime, irqtime, sirqtime, steal;
75 unsigned long total;
76 } cpu_stat_t;
77
78 /*
79 * Logging on/off threshold.
80 * Uint: 0.01%; default to 99.90% cpu.
81 */
82 static int idle_threshold = 10;
83
84 static bool quit = false;
85 static bool suspend= false;
86 static bool dump_requested = false;
87 static bool err = false;
88 static char err_msg[100];
89 static bool tracing = false;
90
91 static const char *buf_size_kb = "2048";
92 static const char *apps = "";
93 static uint64_t tag = 0;
94
95 static cpu_stat_t new_cpu;
96 static cpu_stat_t old_cpu;
97
98 /* Log certain kernel activity when enabled */
99 static bool log_sched = false;
100 static bool log_stack = false;
101 static bool log_irq = false;
102 static bool log_sync = false;
103 static bool log_workq = false;
104
105 /* Paths for debugfs controls*/
106 static const char* dfs_trace_output_path =
107 "/d/tracing/trace";
108 static const char* dfs_irq_path =
109 "/d/tracing/events/irq/enable";
110 static const char* dfs_sync_path =
111 "/d/tracing/events/sync/enable";
112 static const char* dfs_workq_path =
113 "/d/tracing/events/workqueue/enable";
114 static const char* dfs_stack_path =
115 "/d/tracing/options/stacktrace";
116 static const char* dfs_sched_switch_path =
117 "/d/tracing/events/sched/sched_switch/enable";
118 static const char* dfs_sched_wakeup_path =
119 "/d/tracing/events/sched/sched_wakeup/enable";
120 static const char* dfs_control_path =
121 "/d/tracing/tracing_on";
122 static const char* dfs_buffer_size_path =
123 "/d/tracing/buffer_size_kb";
124 static const char* dfs_tags_property = "debug.atrace.tags.enableflags";
125 static const char* dfs_apps_property = "debug.atrace.app_cmdlines";
126
127 /*
128 * Read accumulated cpu data from /proc/stat
129 */
get_cpu_stat(cpu_stat_t * cpu)130 static void get_cpu_stat(cpu_stat_t *cpu) {
131 FILE *fp = NULL;
132 const char *params = "cpu %lu %lu %lu %lu %lu %lu %lu %*d %*d %*d\n";
133
134 if ((fp = fopen("/proc/stat", "r")) == NULL) {
135 err = true;
136 snprintf(err_msg, sizeof(err_msg), "can't read from /proc/stat with errno %d", errno);
137 } else {
138 if (fscanf(fp, params, &cpu->utime, &cpu->ntime,
139 &cpu->stime, &cpu->itime, &cpu->iowtime, &cpu->irqtime,
140 &cpu->sirqtime) != cpu_stat_entries) {
141 /*
142 * If failed in getting status, new_cpu won't be updated and
143 * is_heavy_loaded() will return false.
144 */
145 ALOGE("Error in getting cpu status. Skipping this check.");
146 fclose(fp);
147 return;
148 }
149
150 cpu->total = cpu->utime + cpu->ntime + cpu->stime + cpu->itime
151 + cpu->iowtime + cpu->irqtime + cpu->sirqtime;
152
153 fclose(fp);
154 }
155 }
156
157 /*
158 * Calculate cpu usage in the past interval.
159 * If tracing is on, increase the idle threshold by 1.00% so that we do not
160 * turn on and off tracing frequently when the cpu load is right close to
161 * threshold.
162 */
is_heavy_load(void)163 static bool is_heavy_load(void) {
164 unsigned long diff_idle, diff_total;
165 int threshold = idle_threshold + (tracing?100:0);
166 get_cpu_stat(&new_cpu);
167 diff_idle = new_cpu.itime - old_cpu.itime;
168 diff_total = new_cpu.total - old_cpu.total;
169 old_cpu = new_cpu;
170 return (diff_idle * 10000 < diff_total * threshold);
171 }
172
173 /*
174 * Force the userland processes to refresh their property for logging.
175 */
dfs_poke_binder(void)176 static void dfs_poke_binder(void) {
177 sp<IServiceManager> sm = defaultServiceManager();
178 Vector<String16> services = sm->listServices();
179 for (size_t i = 0; i < services.size(); i++) {
180 sp<IBinder> obj = sm->checkService(services[i]);
181 if (obj != NULL) {
182 Parcel data;
183 obj->transact(IBinder::SYSPROPS_TRANSACTION, data, NULL, 0);
184 }
185 }
186 }
187
188 /*
189 * Enable/disable a debugfs property by writing 0/1 to its path.
190 */
dfs_enable(bool enable,const char * path)191 static int dfs_enable(bool enable, const char* path) {
192 int fd = open(path, O_WRONLY);
193 if (fd == -1) {
194 err = true;
195 snprintf(err_msg, sizeof(err_msg), "Can't open %s. Error: %d", path, errno);
196 return -1;
197 }
198 const char* control = (enable?"1":"0");
199 ssize_t len = strlen(control);
200 int max_try = 10; // Fail if write was interrupted for 10 times
201 while (write(fd, control, len) != len) {
202 if (errno == EINTR && max_try-- > 0) {
203 usleep(100);
204 continue;
205 }
206
207 err = true;
208 snprintf(err_msg, sizeof(err_msg), "Error %d in writing to %s.", errno, path);
209 }
210 close(fd);
211 return (err?-1:0);
212 }
213
214 /*
215 * Set the userland tracing properties.
216 */
dfs_set_property(uint64_t mtag,const char * mapp,bool enable)217 static void dfs_set_property(uint64_t mtag, const char* mapp, bool enable) {
218 char buf[64];
219 snprintf(buf, sizeof(buf), "%#" PRIx64, mtag);
220 if (property_set(dfs_tags_property, buf) < 0) {
221 err = true;
222 snprintf(err_msg, sizeof(err_msg), "Failed to set debug tags system properties.");
223 }
224
225 if (strlen(mapp) > 0
226 && property_set(dfs_apps_property, mapp) < 0) {
227 err = true;
228 snprintf(err_msg, sizeof(err_msg), "Failed to set debug applications.");
229 }
230
231 if (log_sched) {
232 dfs_enable(enable, dfs_sched_switch_path);
233 dfs_enable(enable, dfs_sched_wakeup_path);
234 }
235 if (log_stack) {
236 dfs_enable(enable, dfs_stack_path);
237 }
238 if (log_irq) {
239 dfs_enable(enable, dfs_irq_path);
240 }
241 if (log_sync) {
242 dfs_enable(enable, dfs_sync_path);
243 }
244 if (log_workq) {
245 dfs_enable(enable, dfs_workq_path);
246 }
247 }
248
249 /*
250 * Dump the log in a compressed format for systrace to visualize.
251 * Create a dump file "dump_of_anrdaemon.<current_time>" under /data/misc/anrd
252 */
dump_trace()253 static void dump_trace()
254 {
255 time_t now = time(0);
256 struct tm tstruct;
257 char time_buf[time_buf_size];
258 char path_buf[path_buf_size];
259 const char* header = " done\nTRACE:\n";
260 ssize_t header_len = strlen(header);
261
262 ALOGI("Started to dump ANRdaemon trace.");
263
264 tstruct = *localtime(&now);
265 strftime(time_buf, time_buf_size, "%Y-%m-%d.%X", &tstruct);
266 snprintf(path_buf, path_buf_size, "/data/misc/anrd/dump_of_anrdaemon.%s", time_buf);
267 int output_fd = creat(path_buf, S_IRWXU);
268 if (output_fd == -1) {
269 ALOGE("Failed to create %s. Dump aborted.", path_buf);
270 return;
271 }
272
273 if (write(output_fd, header, strlen(header)) != header_len) {
274 ALOGE("Failed to write the header.");
275 close(output_fd);
276 return;
277 }
278
279 int trace_fd = open(dfs_trace_output_path, O_RDWR);
280 if (trace_fd == -1) {
281 ALOGE("Failed to open %s. Dump aborted.", dfs_trace_output_path);
282 close(output_fd);
283 return;
284 }
285
286 z_stream zs;
287 uint8_t *in, *out;
288 int result, flush;
289
290 memset(&zs, 0, sizeof(zs));
291 result = deflateInit(&zs, Z_DEFAULT_COMPRESSION);
292 if (result != Z_OK) {
293 ALOGE("error initializing zlib: %d\n", result);
294 close(trace_fd);
295 close(output_fd);
296 return;
297 }
298
299 const size_t bufSize = 64*1024;
300 in = (uint8_t*)malloc(bufSize);
301 out = (uint8_t*)malloc(bufSize);
302 flush = Z_NO_FLUSH;
303
304 zs.next_out = out;
305 zs.avail_out = bufSize;
306
307 do {
308 if (zs.avail_in == 0) {
309 result = read(trace_fd, in, bufSize);
310 if (result < 0) {
311 ALOGE("error reading trace: %s", strerror(errno));
312 result = Z_STREAM_END;
313 break;
314 } else if (result == 0) {
315 flush = Z_FINISH;
316 } else {
317 zs.next_in = in;
318 zs.avail_in = result;
319 }
320 }
321
322 if (zs.avail_out == 0) {
323 result = write(output_fd, out, bufSize);
324 if ((size_t)result < bufSize) {
325 ALOGE("error writing deflated trace: %s", strerror(errno));
326 result = Z_STREAM_END;
327 zs.avail_out = bufSize;
328 break;
329 }
330 zs.next_out = out;
331 zs.avail_out = bufSize;
332 }
333
334 } while ((result = deflate(&zs, flush)) == Z_OK);
335
336 if (result != Z_STREAM_END) {
337 ALOGE("error deflating trace: %s\n", zs.msg);
338 }
339
340 if (zs.avail_out < bufSize) {
341 size_t bytes = bufSize - zs.avail_out;
342 result = write(output_fd, out, bytes);
343 if ((size_t)result < bytes) {
344 ALOGE("error writing deflated trace: %s", strerror(errno));
345 }
346 }
347
348 result = deflateEnd(&zs);
349 if (result != Z_OK) {
350 ALOGE("error cleaning up zlib: %d\n", result);
351 }
352
353 free(in);
354 free(out);
355
356 close(trace_fd);
357 close(output_fd);
358
359 ALOGI("Finished dump. Output file stored at: %s", path_buf);
360 }
361
362 /*
363 * Start logging when cpu usage is high. Meanwhile, moniter the cpu usage and
364 * stop logging when it drops down.
365 */
start_tracing(void)366 static void start_tracing(void) {
367 ALOGD("High cpu usage, start logging.");
368
369 if (dfs_enable(true, dfs_control_path) != 0) {
370 ALOGE("Failed to start tracing.");
371 return;
372 }
373 tracing = true;
374
375 /* Stop logging when cpu usage drops or the daemon is suspended.*/
376 do {
377 usleep(tracing_check_period);
378 } while (!suspend && !dump_requested && is_heavy_load());
379
380 if (dfs_enable(false, dfs_control_path) != 0) {
381 ALOGE("Failed to stop tracing.");
382 err = true;
383 return;
384 }
385 tracing = false;
386
387 if (suspend) {
388 ALOGI("trace stopped due to suspend. Send SIGCONT to resume.");
389 } else if (dump_requested) {
390 ALOGI("trace stopped due to dump request.");
391 dump_trace();
392 dump_requested = false;
393 } else {
394 ALOGD("Usage back to low, stop logging.");
395 }
396 }
397
398 /*
399 * Set the tracing log buffer size.
400 * Note the actual buffer size will be buf_size_kb * number of cores.
401 */
set_tracing_buffer_size(void)402 static int set_tracing_buffer_size(void) {
403 int fd = open(dfs_buffer_size_path, O_WRONLY);
404 if (fd == -1) {
405 err = true;
406 snprintf(err_msg, sizeof(err_msg), "Can't open atrace buffer size file under /d/tracing.");
407 return -1;
408 }
409 ssize_t len = strlen(buf_size_kb);
410 if (write(fd, buf_size_kb, len) != len) {
411 err = true;
412 snprintf(err_msg, sizeof(err_msg), "Error in writing to atrace buffer size file.");
413 }
414 close(fd);
415 return (err?-1:0);
416
417 }
418
419 /*
420 * Main loop to moniter the cpu usage and decided whether to start logging.
421 */
start(void)422 static void start(void) {
423 if ((set_tracing_buffer_size()) != 0)
424 return;
425
426 dfs_set_property(tag, apps, true);
427 dfs_poke_binder();
428
429 get_cpu_stat(&old_cpu);
430 sleep(check_period);
431
432 while (!quit && !err) {
433 if (!suspend && is_heavy_load()) {
434 /*
435 * Increase process priority to make sure we can stop logging when
436 * necessary and do not overwrite the buffer
437 */
438 setpriority(PRIO_PROCESS, 0, -20);
439 start_tracing();
440 setpriority(PRIO_PROCESS, 0, 0);
441 }
442 sleep(check_period);
443 }
444 return;
445 }
446
447 /*
448 * If trace is not running, dump trace right away.
449 * If trace is running, request to dump trace.
450 */
request_dump_trace()451 static void request_dump_trace()
452 {
453 if (!tracing) {
454 dump_trace();
455 } else if (!dump_requested) {
456 dump_requested = true;
457 }
458 }
459
handle_signal(int signo)460 static void handle_signal(int signo)
461 {
462 switch (signo) {
463 case SIGQUIT:
464 suspend = true;
465 quit = true;
466 break;
467 case SIGSTOP:
468 suspend = true;
469 break;
470 case SIGCONT:
471 suspend = false;
472 break;
473 case SIGUSR1:
474 request_dump_trace();
475 }
476 }
477
478 /*
479 * Set the signal handler:
480 * SIGQUIT: Reset debugfs and tracing property and terminate the daemon.
481 * SIGSTOP: Stop logging and suspend the daemon.
482 * SIGCONT: Resume the daemon as normal.
483 * SIGUSR1: Dump the logging to a compressed format for systrace to visualize.
484 */
register_sighandler(void)485 static void register_sighandler(void)
486 {
487 struct sigaction sa;
488 sigset_t block_mask;
489
490 sigemptyset(&block_mask);
491 sigaddset (&block_mask, SIGQUIT);
492 sigaddset (&block_mask, SIGSTOP);
493 sigaddset (&block_mask, SIGCONT);
494 sigaddset (&block_mask, SIGUSR1);
495
496 sa.sa_flags = 0;
497 sa.sa_mask = block_mask;
498 sa.sa_handler = handle_signal;
499 sigaction(SIGQUIT, &sa, NULL);
500 sigaction(SIGSTOP, &sa, NULL);
501 sigaction(SIGCONT, &sa, NULL);
502 sigaction(SIGUSR1, &sa, NULL);
503 }
504
show_help(void)505 static void show_help(void) {
506
507 fprintf(stderr, "usage: ANRdaemon [options] [categoris...]\n");
508 fprintf(stdout, "Options includes:\n"
509 " -a appname enable app-level tracing for a comma "
510 "separated list of cmdlines\n"
511 " -t N cpu threshold for logging to start "
512 "(uint = 0.01%%, min = 5000, max = 9999, default = 9990)\n"
513 " -s N use a trace buffer size of N KB "
514 "default to 2048KB\n"
515 " -h show helps\n");
516 fprintf(stdout, "Categoris includes:\n"
517 " am - activity manager\n"
518 " sm - sync manager\n"
519 " input - input\n"
520 " dalvik - dalvik VM\n"
521 " audio - Audio\n"
522 " gfx - Graphics\n"
523 " rs - RenderScript\n"
524 " hal - Hardware Modules\n"
525 " irq - kernel irq events\n"
526 " sched - kernel scheduler activity\n"
527 " stack - kernel stack\n"
528 " sync - kernel sync activity\n"
529 " workq - kernel work queues\n");
530 fprintf(stdout, "Control includes:\n"
531 " SIGQUIT: terminate the process\n"
532 " SIGSTOP: suspend all function of the daemon\n"
533 " SIGCONT: resume the normal function\n"
534 " SIGUSR1: dump the current logging in a compressed form\n");
535 exit(0);
536 }
537
get_options(int argc,char * argv[])538 static int get_options(int argc, char *argv[]) {
539 int opt = 0;
540 int threshold;
541 while ((opt = getopt(argc, argv, "a:s:t:h")) >= 0) {
542 switch(opt) {
543 case 'a':
544 apps = optarg;
545 break;
546 case 's':
547 if (atoi(optarg) > max_buffer_size)
548 buf_size_kb = max_buffer_size_str;
549 else if (atoi(optarg) < min_buffer_size)
550 buf_size_kb = min_buffer_size_str;
551 else
552 buf_size_kb = optarg;
553 break;
554 case 't':
555 threshold = atoi(optarg);
556 if (threshold > 9999 || threshold < 5000) {
557 fprintf(stderr, "logging threshold should be 5000-9999\n");
558 return 1;
559 }
560 idle_threshold = 10000 - threshold;
561 break;
562 case 'h':
563 show_help();
564 break;
565 default:
566 fprintf(stderr, "Error in getting options.\n"
567 "run \"%s -h\" for usage.\n", argv[0]);
568 return 1;
569 }
570 }
571
572 for (int i = optind; i < argc; i++) {
573 if (strcmp(argv[i], "am") == 0) {
574 tag |= ATRACE_TAG_ACTIVITY_MANAGER;
575 } else if (strcmp(argv[i], "input") == 0) {
576 tag |= ATRACE_TAG_INPUT;
577 } else if (strcmp(argv[i], "sm") == 0) {
578 tag |= ATRACE_TAG_SYNC_MANAGER;
579 } else if (strcmp(argv[i], "dalvik") == 0) {
580 tag |= ATRACE_TAG_DALVIK;
581 } else if (strcmp(argv[i], "gfx") == 0) {
582 tag |= ATRACE_TAG_GRAPHICS;
583 } else if (strcmp(argv[i], "audio") == 0) {
584 tag |= ATRACE_TAG_AUDIO;
585 } else if (strcmp(argv[i], "hal") == 0) {
586 tag |= ATRACE_TAG_HAL;
587 } else if (strcmp(argv[i], "rs") == 0) {
588 tag |= ATRACE_TAG_RS;
589 } else if (strcmp(argv[i], "sched") == 0) {
590 log_sched = true;
591 } else if (strcmp(argv[i], "stack") == 0) {
592 log_stack = true;
593 } else if (strcmp(argv[i], "workq") == 0) {
594 log_workq = true;
595 } else if (strcmp(argv[i], "irq") == 0) {
596 log_irq = true;
597 } else if (strcmp(argv[i], "sync") == 0) {
598 log_sync = true;
599 } else {
600 fprintf(stderr, "invalid category: %s\n"
601 "run \"%s -h\" for usage.\n", argv[i], argv[0]);
602 return 1;
603 }
604 }
605
606 /* If nothing is enabled, don't run */
607 if (!tag && !log_sched && !log_stack && !log_workq && !log_irq && !log_sync) {
608 ALOGE("Specify at least one category to trace.");
609 return 1;
610 }
611
612 return 0;
613 }
614
main(int argc,char * argv[])615 int main(int argc, char *argv[])
616 {
617 if(get_options(argc, argv) != 0)
618 return 1;
619
620 if (daemon(0, 0) != 0)
621 return 1;
622
623 register_sighandler();
624
625 /* Clear any the trace log file by overwrite it with a new file */
626 int fd = creat(dfs_trace_output_path, 0);
627 if (fd == -1) {
628 ALOGE("Faield to open and cleaup previous log");
629 return 1;
630 }
631 close(fd);
632
633 ALOGI("ANRdaemon starting");
634 start();
635
636 if (err)
637 ALOGE("ANRdaemon stopped due to Error: %s", err_msg);
638
639 ALOGI("ANRdaemon terminated.");
640
641 return (err?1:0);
642 }
643