1 /*
2 * Copyright (C) 2012-2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 // for manual checking of stale entries during ChattyLogBuffer::erase()
17 //#define DEBUG_CHECK_FOR_STALE_ENTRIES
18
19 #include "ChattyLogBuffer.h"
20
21 #include <ctype.h>
22 #include <endian.h>
23 #include <errno.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/cdefs.h>
27 #include <sys/user.h>
28 #include <time.h>
29 #include <unistd.h>
30
31 #include <limits>
32 #include <unordered_map>
33 #include <utility>
34
35 #include <private/android_logger.h>
36
37 #include "LogUtils.h"
38
39 #ifndef __predict_false
40 #define __predict_false(exp) __builtin_expect((exp) != 0, 0)
41 #endif
42
ChattyLogBuffer(LogReaderList * reader_list,LogTags * tags,PruneList * prune,LogStatistics * stats)43 ChattyLogBuffer::ChattyLogBuffer(LogReaderList* reader_list, LogTags* tags, PruneList* prune,
44 LogStatistics* stats)
45 : SimpleLogBuffer(reader_list, tags, stats), prune_(prune) {}
46
~ChattyLogBuffer()47 ChattyLogBuffer::~ChattyLogBuffer() {}
48
49 enum match_type { DIFFERENT, SAME, SAME_LIBLOG };
50
Identical(const LogBufferElement & elem,const LogBufferElement & last)51 static enum match_type Identical(const LogBufferElement& elem, const LogBufferElement& last) {
52 ssize_t lenl = elem.msg_len();
53 if (lenl <= 0) return DIFFERENT; // value if this represents a chatty elem
54 ssize_t lenr = last.msg_len();
55 if (lenr <= 0) return DIFFERENT; // value if this represents a chatty elem
56 if (elem.uid() != last.uid()) return DIFFERENT;
57 if (elem.pid() != last.pid()) return DIFFERENT;
58 if (elem.tid() != last.tid()) return DIFFERENT;
59
60 // last is more than a minute old, stop squashing identical messages
61 if (elem.realtime().nsec() > (last.realtime().nsec() + 60 * NS_PER_SEC)) return DIFFERENT;
62
63 // Identical message
64 const char* msgl = elem.msg();
65 const char* msgr = last.msg();
66 if (lenl == lenr) {
67 if (!fastcmp<memcmp>(msgl, msgr, lenl)) return SAME;
68 // liblog tagged messages (content gets summed)
69 if (elem.log_id() == LOG_ID_EVENTS && lenl == sizeof(android_log_event_int_t) &&
70 !fastcmp<memcmp>(msgl, msgr, sizeof(android_log_event_int_t) - sizeof(int32_t)) &&
71 elem.GetTag() == LIBLOG_LOG_TAG) {
72 return SAME_LIBLOG;
73 }
74 }
75
76 // audit message (except sequence number) identical?
77 if (IsBinary(last.log_id()) &&
78 lenl > static_cast<ssize_t>(sizeof(android_log_event_string_t)) &&
79 lenr > static_cast<ssize_t>(sizeof(android_log_event_string_t))) {
80 if (fastcmp<memcmp>(msgl, msgr, sizeof(android_log_event_string_t) - sizeof(int32_t))) {
81 return DIFFERENT;
82 }
83 msgl += sizeof(android_log_event_string_t);
84 lenl -= sizeof(android_log_event_string_t);
85 msgr += sizeof(android_log_event_string_t);
86 lenr -= sizeof(android_log_event_string_t);
87 }
88 static const char avc[] = "): avc: ";
89 const char* avcl = android::strnstr(msgl, lenl, avc);
90 if (!avcl) return DIFFERENT;
91 lenl -= avcl - msgl;
92 const char* avcr = android::strnstr(msgr, lenr, avc);
93 if (!avcr) return DIFFERENT;
94 lenr -= avcr - msgr;
95 if (lenl != lenr) return DIFFERENT;
96 if (fastcmp<memcmp>(avcl + strlen(avc), avcr + strlen(avc), lenl - strlen(avc))) {
97 return DIFFERENT;
98 }
99 return SAME;
100 }
101
LogInternal(LogBufferElement && elem)102 void ChattyLogBuffer::LogInternal(LogBufferElement&& elem) {
103 // b/137093665: don't coalesce security messages.
104 if (elem.log_id() == LOG_ID_SECURITY) {
105 SimpleLogBuffer::LogInternal(std::move(elem));
106 return;
107 }
108 int log_id = elem.log_id();
109
110 // Initialize last_logged_elements_ to a copy of elem if logging the first element for a log_id.
111 if (!last_logged_elements_[log_id]) {
112 last_logged_elements_[log_id].emplace(elem);
113 SimpleLogBuffer::LogInternal(std::move(elem));
114 return;
115 }
116
117 LogBufferElement& current_last = *last_logged_elements_[log_id];
118 enum match_type match = Identical(elem, current_last);
119
120 if (match == DIFFERENT) {
121 if (duplicate_elements_[log_id]) {
122 // If we previously had 3+ identical messages, log the chatty message.
123 if (duplicate_elements_[log_id]->dropped_count() > 0) {
124 SimpleLogBuffer::LogInternal(std::move(*duplicate_elements_[log_id]));
125 }
126 duplicate_elements_[log_id].reset();
127 // Log the saved copy of the last identical message seen.
128 SimpleLogBuffer::LogInternal(std::move(current_last));
129 }
130 last_logged_elements_[log_id].emplace(elem);
131 SimpleLogBuffer::LogInternal(std::move(elem));
132 return;
133 }
134
135 // 2 identical message: set duplicate_elements_ appropriately.
136 if (!duplicate_elements_[log_id]) {
137 duplicate_elements_[log_id].emplace(std::move(current_last));
138 last_logged_elements_[log_id].emplace(std::move(elem));
139 return;
140 }
141
142 // 3+ identical LIBLOG event messages: coalesce them into last_logged_elements_.
143 if (match == SAME_LIBLOG) {
144 const android_log_event_int_t* current_last_event =
145 reinterpret_cast<const android_log_event_int_t*>(current_last.msg());
146 int64_t current_last_count = current_last_event->payload.data;
147 android_log_event_int_t* elem_event =
148 reinterpret_cast<android_log_event_int_t*>(const_cast<char*>(elem.msg()));
149 int64_t elem_count = elem_event->payload.data;
150
151 int64_t total = current_last_count + elem_count;
152 if (total > std::numeric_limits<int32_t>::max()) {
153 SimpleLogBuffer::LogInternal(std::move(current_last));
154 last_logged_elements_[log_id].emplace(std::move(elem));
155 return;
156 }
157 stats()->AddTotal(current_last.log_id(), current_last.msg_len());
158 elem_event->payload.data = total;
159 last_logged_elements_[log_id].emplace(std::move(elem));
160 return;
161 }
162
163 // 3+ identical messages (not LIBLOG) messages: increase the drop count.
164 uint16_t dropped_count = duplicate_elements_[log_id]->dropped_count();
165 if (dropped_count == std::numeric_limits<uint16_t>::max()) {
166 SimpleLogBuffer::LogInternal(std::move(*duplicate_elements_[log_id]));
167 dropped_count = 0;
168 }
169 // We're dropping the current_last log so add its stats to the total.
170 stats()->AddTotal(current_last.log_id(), current_last.msg_len());
171 // Use current_last for tracking the dropped count to always use the latest timestamp.
172 current_last.SetDropped(dropped_count + 1);
173 duplicate_elements_[log_id].emplace(std::move(current_last));
174 last_logged_elements_[log_id].emplace(std::move(elem));
175 }
176
Erase(LogBufferElementCollection::iterator it,bool coalesce)177 LogBufferElementCollection::iterator ChattyLogBuffer::Erase(LogBufferElementCollection::iterator it,
178 bool coalesce) {
179 LogBufferElement& element = *it;
180 log_id_t id = element.log_id();
181
182 // Remove iterator references in the various lists that will become stale
183 // after the element is erased from the main logging list.
184
185 { // start of scope for found iterator
186 int key = (id == LOG_ID_EVENTS || id == LOG_ID_SECURITY) ? element.GetTag() : element.uid();
187 LogBufferIteratorMap::iterator found = mLastWorst[id].find(key);
188 if ((found != mLastWorst[id].end()) && (it == found->second)) {
189 mLastWorst[id].erase(found);
190 }
191 }
192
193 { // start of scope for pid found iterator
194 // element->uid() may not be AID_SYSTEM for next-best-watermark.
195 // will not assume id != LOG_ID_EVENTS or LOG_ID_SECURITY for KISS and
196 // long term code stability, find() check should be fast for those ids.
197 LogBufferPidIteratorMap::iterator found = mLastWorstPidOfSystem[id].find(element.pid());
198 if (found != mLastWorstPidOfSystem[id].end() && it == found->second) {
199 mLastWorstPidOfSystem[id].erase(found);
200 }
201 }
202
203 #ifdef DEBUG_CHECK_FOR_STALE_ENTRIES
204 LogBufferElementCollection::iterator bad = it;
205 int key = (id == LOG_ID_EVENTS || id == LOG_ID_SECURITY) ? element->GetTag() : element->uid();
206 #endif
207
208 if (coalesce) {
209 stats()->Erase(element.ToLogStatisticsElement());
210 } else {
211 stats()->Subtract(element.ToLogStatisticsElement());
212 }
213
214 it = SimpleLogBuffer::Erase(it);
215
216 #ifdef DEBUG_CHECK_FOR_STALE_ENTRIES
217 log_id_for_each(i) {
218 for (auto b : mLastWorst[i]) {
219 if (bad == b.second) {
220 LOG(ERROR) << StringPrintf("stale mLastWorst[%d] key=%d mykey=%d", i, b.first, key);
221 }
222 }
223 for (auto b : mLastWorstPidOfSystem[i]) {
224 if (bad == b.second) {
225 LOG(ERROR) << StringPrintf("stale mLastWorstPidOfSystem[%d] pid=%d", i, b.first);
226 }
227 }
228 }
229 #endif
230 return it;
231 }
232
233 // Define a temporary mechanism to report the last LogBufferElement pointer
234 // for the specified uid, pid and tid. Used below to help merge-sort when
235 // pruning for worst UID.
236 class LogBufferElementLast {
237 typedef std::unordered_map<uint64_t, LogBufferElement*> LogBufferElementMap;
238 LogBufferElementMap map;
239
240 public:
coalesce(LogBufferElement * element,uint16_t dropped)241 bool coalesce(LogBufferElement* element, uint16_t dropped) {
242 uint64_t key = LogBufferElementKey(element->uid(), element->pid(), element->tid());
243 LogBufferElementMap::iterator it = map.find(key);
244 if (it != map.end()) {
245 LogBufferElement* found = it->second;
246 uint16_t moreDropped = found->dropped_count();
247 if ((dropped + moreDropped) > USHRT_MAX) {
248 map.erase(it);
249 } else {
250 found->SetDropped(dropped + moreDropped);
251 return true;
252 }
253 }
254 return false;
255 }
256
add(LogBufferElement * element)257 void add(LogBufferElement* element) {
258 uint64_t key = LogBufferElementKey(element->uid(), element->pid(), element->tid());
259 map[key] = element;
260 }
261
clear()262 void clear() { map.clear(); }
263
clear(LogBufferElement * element)264 void clear(LogBufferElement* element) {
265 uint64_t current = element->realtime().nsec() - (EXPIRE_RATELIMIT * NS_PER_SEC);
266 for (LogBufferElementMap::iterator it = map.begin(); it != map.end();) {
267 LogBufferElement* mapElement = it->second;
268 if (mapElement->dropped_count() >= EXPIRE_THRESHOLD &&
269 current > mapElement->realtime().nsec()) {
270 it = map.erase(it);
271 } else {
272 ++it;
273 }
274 }
275 }
276
277 private:
LogBufferElementKey(uid_t uid,pid_t pid,pid_t tid)278 uint64_t LogBufferElementKey(uid_t uid, pid_t pid, pid_t tid) {
279 return uint64_t(uid) << 32 | uint64_t(pid) << 16 | uint64_t(tid);
280 }
281 };
282
283 // prune "pruneRows" of type "id" from the buffer.
284 //
285 // This garbage collection task is used to expire log entries. It is called to
286 // remove all logs (clear), all UID logs (unprivileged clear), or every
287 // 256 or 10% of the total logs (whichever is less) to prune the logs.
288 //
289 // First there is a prep phase where we discover the reader region lock that
290 // acts as a backstop to any pruning activity to stop there and go no further.
291 //
292 // There are three major pruning loops that follow. All expire from the oldest
293 // entries. Since there are multiple log buffers, the Android logging facility
294 // will appear to drop entries 'in the middle' when looking at multiple log
295 // sources and buffers. This effect is slightly more prominent when we prune
296 // the worst offender by logging source. Thus the logs slowly loose content
297 // and value as you move back in time. This is preferred since chatty sources
298 // invariably move the logs value down faster as less chatty sources would be
299 // expired in the noise.
300 //
301 // The first pass prunes elements that match 3 possible rules:
302 // 1) A high priority prune rule, for example ~100/20, which indicates elements from UID 100 and PID
303 // 20 should be pruned in this first pass.
304 // 2) The default chatty pruning rule, ~!. This rule sums the total size spent on log messages for
305 // each UID this log buffer. If the highest sum consumes more than 12.5% of the log buffer, then
306 // these elements from that UID are pruned.
307 // 3) The default AID_SYSTEM pruning rule, ~1000/!. This rule is a special case to 2), if
308 // AID_SYSTEM is the top consumer of the log buffer, then this rule sums the total size spent on
309 // log messages for each PID in AID_SYSTEM in this log buffer and prunes elements from the PID
310 // with the highest sum.
311 // This pass reevaluates the sums for rules 2) and 3) for every log message pruned. It creates
312 // 'chatty' entries for the elements that it prunes and merges related chatty entries together. It
313 // completes when one of three conditions have been met:
314 // 1) The requested element count has been pruned.
315 // 2) There are no elements that match any of these rules.
316 // 3) A reader is referencing the oldest element that would match these rules.
317 //
318 // The second pass prunes elements starting from the beginning of the log. It skips elements that
319 // match any low priority prune rules. It completes when one of three conditions have been met:
320 // 1) The requested element count has been pruned.
321 // 2) All elements except those mwatching low priority prune rules have been pruned.
322 // 3) A reader is referencing the oldest element that would match these rules.
323 //
324 // The final pass only happens if there are any low priority prune rules and if the first two passes
325 // were unable to prune the requested number of elements. It prunes elements all starting from the
326 // beginning of the log, regardless of if they match any low priority prune rules.
327 //
328 // If the requested number of logs was unable to be pruned, KickReader() is called to mitigate the
329 // situation before the next call to Prune() and the function returns false. Otherwise, if the
330 // requested number of logs or all logs present in the buffer are pruned, in the case of Clear(),
331 // it returns true.
Prune(log_id_t id,unsigned long pruneRows,uid_t caller_uid)332 bool ChattyLogBuffer::Prune(log_id_t id, unsigned long pruneRows, uid_t caller_uid) {
333 LogReaderThread* oldest = nullptr;
334 bool clearAll = pruneRows == ULONG_MAX;
335
336 auto reader_threads_lock = std::lock_guard{reader_list()->reader_threads_lock()};
337
338 // Region locked?
339 for (const auto& reader_thread : reader_list()->reader_threads()) {
340 if (!reader_thread->IsWatching(id)) {
341 continue;
342 }
343 if (!oldest || oldest->start() > reader_thread->start() ||
344 (oldest->start() == reader_thread->start() &&
345 reader_thread->deadline().time_since_epoch().count() != 0)) {
346 oldest = reader_thread.get();
347 }
348 }
349
350 LogBufferElementCollection::iterator it;
351
352 if (__predict_false(caller_uid != AID_ROOT)) { // unlikely
353 // Only here if clear all request from non system source, so chatty
354 // filter logistics is not required.
355 it = GetOldest(id);
356 while (it != logs().end()) {
357 LogBufferElement& element = *it;
358
359 if (element.log_id() != id || element.uid() != caller_uid) {
360 ++it;
361 continue;
362 }
363
364 if (oldest && oldest->start() <= element.sequence()) {
365 KickReader(oldest, id, pruneRows);
366 return false;
367 }
368
369 it = Erase(it);
370 if (--pruneRows == 0) {
371 return true;
372 }
373 }
374 return true;
375 }
376
377 // First prune pass.
378 bool check_high_priority = id != LOG_ID_SECURITY && prune_->HasHighPriorityPruneRules();
379 while (!clearAll && (pruneRows > 0)) {
380 // recalculate the worst offender on every batched pass
381 int worst = -1; // not valid for uid() or getKey()
382 size_t worst_sizes = 0;
383 size_t second_worst_sizes = 0;
384 pid_t worstPid = 0; // POSIX guarantees PID != 0
385
386 if (worstUidEnabledForLogid(id) && prune_->worst_uid_enabled()) {
387 // Calculate threshold as 12.5% of available storage
388 size_t threshold = max_size(id) / 8;
389
390 if (id == LOG_ID_EVENTS || id == LOG_ID_SECURITY) {
391 stats()->WorstTwoTags(threshold, &worst, &worst_sizes, &second_worst_sizes);
392 // per-pid filter for AID_SYSTEM sources is too complex
393 } else {
394 stats()->WorstTwoUids(id, threshold, &worst, &worst_sizes, &second_worst_sizes);
395
396 if (worst == AID_SYSTEM && prune_->worst_pid_of_system_enabled()) {
397 stats()->WorstTwoSystemPids(id, worst_sizes, &worstPid, &second_worst_sizes);
398 }
399 }
400 }
401
402 // skip if we have neither a worst UID or high priority prune rules
403 if (worst == -1 && !check_high_priority) {
404 break;
405 }
406
407 bool kick = false;
408 bool leading = true; // true if starting from the oldest log entry, false if starting from
409 // a specific chatty entry.
410 // Perform at least one mandatory garbage collection cycle in following
411 // - clear leading chatty tags
412 // - coalesce chatty tags
413 // - check age-out of preserved logs
414 bool gc = pruneRows <= 1;
415 if (!gc && (worst != -1)) {
416 { // begin scope for worst found iterator
417 LogBufferIteratorMap::iterator found = mLastWorst[id].find(worst);
418 if (found != mLastWorst[id].end() && found->second != logs().end()) {
419 leading = false;
420 it = found->second;
421 }
422 }
423 if (worstPid) { // begin scope for pid worst found iterator
424 // FYI: worstPid only set if !LOG_ID_EVENTS and
425 // !LOG_ID_SECURITY, not going to make that assumption ...
426 LogBufferPidIteratorMap::iterator found = mLastWorstPidOfSystem[id].find(worstPid);
427 if (found != mLastWorstPidOfSystem[id].end() && found->second != logs().end()) {
428 leading = false;
429 it = found->second;
430 }
431 }
432 }
433 if (leading) {
434 it = GetOldest(id);
435 }
436 static const log_time too_old{EXPIRE_HOUR_THRESHOLD * 60 * 60, 0};
437 LogBufferElementCollection::iterator lastt;
438 lastt = logs().end();
439 --lastt;
440 LogBufferElementLast last;
441 while (it != logs().end()) {
442 LogBufferElement& element = *it;
443
444 if (oldest && oldest->start() <= element.sequence()) {
445 // Do not let chatty eliding trigger any reader mitigation
446 break;
447 }
448
449 if (element.log_id() != id) {
450 ++it;
451 continue;
452 }
453 // below this point element->log_id() == id
454
455 uint16_t dropped = element.dropped_count();
456
457 // remove any leading drops
458 if (leading && dropped) {
459 it = Erase(it);
460 continue;
461 }
462
463 if (dropped && last.coalesce(&element, dropped)) {
464 it = Erase(it, true);
465 continue;
466 }
467
468 int key = (id == LOG_ID_EVENTS || id == LOG_ID_SECURITY) ? element.GetTag()
469 : element.uid();
470
471 if (check_high_priority && prune_->IsHighPriority(&element)) {
472 last.clear(&element);
473 it = Erase(it);
474 if (dropped) {
475 continue;
476 }
477
478 pruneRows--;
479 if (pruneRows == 0) {
480 break;
481 }
482
483 if (key == worst) {
484 kick = true;
485 if (worst_sizes < second_worst_sizes) {
486 break;
487 }
488 worst_sizes -= element.msg_len();
489 }
490 continue;
491 }
492
493 if (element.realtime() < (lastt->realtime() - too_old) ||
494 element.realtime() > lastt->realtime()) {
495 break;
496 }
497
498 if (dropped) {
499 last.add(&element);
500 if (worstPid && ((!gc && element.pid() == worstPid) ||
501 mLastWorstPidOfSystem[id].find(element.pid()) ==
502 mLastWorstPidOfSystem[id].end())) {
503 // element->uid() may not be AID_SYSTEM, next best
504 // watermark if current one empty. id is not LOG_ID_EVENTS
505 // or LOG_ID_SECURITY because of worstPid check.
506 mLastWorstPidOfSystem[id][element.pid()] = it;
507 }
508 if ((!gc && !worstPid && (key == worst)) ||
509 (mLastWorst[id].find(key) == mLastWorst[id].end())) {
510 mLastWorst[id][key] = it;
511 }
512 ++it;
513 continue;
514 }
515
516 if (key != worst || (worstPid && element.pid() != worstPid)) {
517 leading = false;
518 last.clear(&element);
519 ++it;
520 continue;
521 }
522 // key == worst below here
523 // If worstPid set, then element->pid() == worstPid below here
524
525 pruneRows--;
526 if (pruneRows == 0) {
527 break;
528 }
529
530 kick = true;
531
532 uint16_t len = element.msg_len();
533
534 // do not create any leading drops
535 if (leading) {
536 it = Erase(it);
537 } else {
538 stats()->Drop(element.ToLogStatisticsElement());
539 element.SetDropped(1);
540 if (last.coalesce(&element, 1)) {
541 it = Erase(it, true);
542 } else {
543 last.add(&element);
544 if (worstPid && (!gc || mLastWorstPidOfSystem[id].find(worstPid) ==
545 mLastWorstPidOfSystem[id].end())) {
546 // element->uid() may not be AID_SYSTEM, next best
547 // watermark if current one empty. id is not
548 // LOG_ID_EVENTS or LOG_ID_SECURITY because of worstPid.
549 mLastWorstPidOfSystem[id][worstPid] = it;
550 }
551 if ((!gc && !worstPid) || mLastWorst[id].find(worst) == mLastWorst[id].end()) {
552 mLastWorst[id][worst] = it;
553 }
554 ++it;
555 }
556 }
557 if (worst_sizes < second_worst_sizes) {
558 break;
559 }
560 worst_sizes -= len;
561 }
562 last.clear();
563
564 if (!kick || !prune_->worst_uid_enabled()) {
565 break; // the following loop will ask bad clients to skip/drop
566 }
567 }
568
569 // Second prune pass.
570 bool skipped_low_priority_prune = false;
571 bool check_low_priority =
572 id != LOG_ID_SECURITY && prune_->HasLowPriorityPruneRules() && !clearAll;
573 it = GetOldest(id);
574 while (pruneRows > 0 && it != logs().end()) {
575 LogBufferElement& element = *it;
576
577 if (element.log_id() != id) {
578 it++;
579 continue;
580 }
581
582 if (oldest && oldest->start() <= element.sequence()) {
583 if (!skipped_low_priority_prune) KickReader(oldest, id, pruneRows);
584 break;
585 }
586
587 if (check_low_priority && !element.dropped_count() && prune_->IsLowPriority(&element)) {
588 skipped_low_priority_prune = true;
589 it++;
590 continue;
591 }
592
593 it = Erase(it);
594 pruneRows--;
595 }
596
597 // Third prune pass.
598 if (skipped_low_priority_prune && pruneRows > 0) {
599 it = GetOldest(id);
600 while (it != logs().end() && pruneRows > 0) {
601 LogBufferElement& element = *it;
602
603 if (element.log_id() != id) {
604 ++it;
605 continue;
606 }
607
608 if (oldest && oldest->start() <= element.sequence()) {
609 KickReader(oldest, id, pruneRows);
610 break;
611 }
612
613 it = Erase(it);
614 pruneRows--;
615 }
616 }
617
618 return pruneRows == 0 || it == logs().end();
619 }
620