1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <inttypes.h>
18 #include <signal.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <sys/prctl.h>
22
23 #include <algorithm>
24 #include <chrono>
25 #include <optional>
26 #include <set>
27 #include <string>
28 #include <string_view>
29 #include <vector>
30
31 #include <android-base/file.h>
32 #include <android-base/logging.h>
33 #include <android-base/strings.h>
34 #include <android-base/unique_fd.h>
35
36 #include "cmd_stat_impl.h"
37 #include "command.h"
38 #include "environment.h"
39 #include "event_attr.h"
40 #include "event_fd.h"
41 #include "event_selection_set.h"
42 #include "event_type.h"
43 #include "IOEventLoop.h"
44 #include "utils.h"
45 #include "workload.h"
46
47 using android::base::Split;
48 using namespace simpleperf;
49
50 namespace simpleperf {
51
52 static std::vector<std::string> default_measured_event_types{
53 "cpu-cycles", "stalled-cycles-frontend", "stalled-cycles-backend",
54 "instructions", "branch-instructions", "branch-misses",
55 "task-clock", "context-switches", "page-faults",
56 };
57
58 static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>>
59 COMMON_EVENT_RATE_MAP = {
60 {"cache-misses", {"cache-references", "miss rate"}},
61 {"branch-misses", {"branch-instructions", "miss rate"}},
62 };
63
64 static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>>
65 ARM_EVENT_RATE_MAP = {
66 // Refer to "D6.10.5 Meaningful ratios between common microarchitectural events" in ARMv8
67 // specification.
68 {"raw-l1i-cache-refill", {"raw-l1i-cache", "level 1 instruction cache refill rate"}},
69 {"raw-l1i-tlb-refill", {"raw-l1i-tlb", "level 1 instruction TLB refill rate"}},
70 {"raw-l1d-cache-refill", {"raw-l1d-cache", "level 1 data or unified cache refill rate"}},
71 {"raw-l1d-tlb-refill", {"raw-l1d-tlb", "level 1 data or unified TLB refill rate"}},
72 {"raw-l2d-cache-refill", {"raw-l2d-cache", "level 2 data or unified cache refill rate"}},
73 {"raw-l2i-cache-refill", {"raw-l2i-cache", "level 2 instruction cache refill rate"}},
74 {"raw-l3d-cache-refill", {"raw-l3d-cache", "level 3 data or unified cache refill rate"}},
75 {"raw-l2d-tlb-refill", {"raw-l2d-tlb", "level 2 data or unified TLB refill rate"}},
76 {"raw-l2i-tlb-refill", {"raw-l2i-tlb", "level 2 instruction TLB refill rate"}},
77 {"raw-bus-access", {"raw-bus-cycles", "bus accesses per cycle"}},
78 {"raw-ll-cache-miss", {"raw-ll-cache", "last level data or unified cache refill rate"}},
79 {"raw-dtlb-walk", {"raw-l1d-tlb", "data TLB miss rate"}},
80 {"raw-itlb-walk", {"raw-l1i-tlb", "instruction TLB miss rate"}},
81 {"raw-ll-cache-miss-rd", {"raw-ll-cache-rd", "memory read operation miss rate"}},
82 {"raw-remote-access-rd",
83 {"raw-remote-access", "read accesses to another socket in a multi-socket system"}},
84 // Refer to "Table K3-2 Relationship between REFILL events and associated access events" in
85 // ARMv8 specification.
86 {"raw-l1d-cache-refill-rd", {"raw-l1d-cache-rd", "level 1 cache refill rate, read"}},
87 {"raw-l1d-cache-refill-wr", {"raw-l1d-cache-wr", "level 1 cache refill rate, write"}},
88 {"raw-l1d-tlb-refill-rd", {"raw-l1d-tlb-rd", "level 1 TLB refill rate, read"}},
89 {"raw-l1d-tlb-refill-wr", {"raw-l1d-tlb-wr", "level 1 TLB refill rate, write"}},
90 {"raw-l2d-cache-refill-rd", {"raw-l2d-cache-rd", "level 2 data cache refill rate, read"}},
91 {"raw-l2d-cache-refill-wr", {"raw-l2d-cache-wr", "level 2 data cache refill rate, write"}},
92 {"raw-l2d-tlb-refill-rd", {"raw-l2d-tlb-rd", "level 2 data TLB refill rate, read"}},
93 };
94
FindSummary(const std::string & type_name,const std::string & modifier,const ThreadInfo * thread,int cpu)95 const CounterSummary* CounterSummaries::FindSummary(const std::string& type_name,
96 const std::string& modifier,
97 const ThreadInfo* thread, int cpu) {
98 for (const auto& s : summaries_) {
99 if (s.type_name == type_name && s.modifier == modifier && s.thread == thread && s.cpu == cpu) {
100 return &s;
101 }
102 }
103 return nullptr;
104 }
105
AutoGenerateSummaries()106 void CounterSummaries::AutoGenerateSummaries() {
107 for (size_t i = 0; i < summaries_.size(); ++i) {
108 const CounterSummary& s = summaries_[i];
109 if (s.modifier == "u") {
110 const CounterSummary* other = FindSummary(s.type_name, "k", s.thread, s.cpu);
111 if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
112 if (FindSummary(s.type_name, "", s.thread, s.cpu) == nullptr) {
113 summaries_.emplace_back(s.type_name, "", s.group_id, s.thread, s.cpu,
114 s.count + other->count, s.runtime_in_ns, s.scale, true, csv_);
115 }
116 }
117 }
118 }
119 }
120
GenerateComments(double duration_in_sec)121 void CounterSummaries::GenerateComments(double duration_in_sec) {
122 for (auto& s : summaries_) {
123 s.comment = GetCommentForSummary(s, duration_in_sec);
124 }
125 }
126
Show(FILE * fp)127 void CounterSummaries::Show(FILE* fp) {
128 bool show_thread = !summaries_.empty() && summaries_[0].thread != nullptr;
129 bool show_cpu = !summaries_.empty() && summaries_[0].cpu != -1;
130 if (csv_) {
131 ShowCSV(fp, show_thread, show_cpu);
132 } else {
133 ShowText(fp, show_thread, show_cpu);
134 }
135 }
136
ShowCSV(FILE * fp,bool show_thread,bool show_cpu)137 void CounterSummaries::ShowCSV(FILE* fp, bool show_thread, bool show_cpu) {
138 for (auto& s : summaries_) {
139 if (show_thread) {
140 fprintf(fp, "%s,%d,%d,", s.thread->name.c_str(), s.thread->pid, s.thread->tid);
141 }
142 if (show_cpu) {
143 fprintf(fp, "%d,", s.cpu);
144 }
145 fprintf(fp, "%s,%s,%s,(%.0f%%)%s\n", s.readable_count.c_str(), s.Name().c_str(),
146 s.comment.c_str(), 1.0 / s.scale * 100, (s.auto_generated ? " (generated)," : ","));
147 }
148 }
149
ShowText(FILE * fp,bool show_thread,bool show_cpu)150 void CounterSummaries::ShowText(FILE* fp, bool show_thread, bool show_cpu) {
151 std::vector<std::string> titles;
152
153 if (show_thread) {
154 titles = {"thread_name", "pid", "tid"};
155 }
156 if (show_cpu) {
157 titles.emplace_back("cpu");
158 }
159 titles.emplace_back("count");
160 titles.emplace_back("event_name");
161 titles.emplace_back(" # count / runtime, runtime / enabled_time");
162
163 std::vector<size_t> width(titles.size(), 0);
164
165 auto adjust_width = [](size_t& w, size_t size) { w = std::max(w, size); };
166
167 // The last title is too long. Don't include it for width adjustment.
168 for (size_t i = 0; i + 1 < titles.size(); i++) {
169 adjust_width(width[i], titles[i].size());
170 }
171
172 for (auto& s : summaries_) {
173 size_t i = 0;
174 if (show_thread) {
175 adjust_width(width[i++], s.thread->name.size());
176 adjust_width(width[i++], std::to_string(s.thread->pid).size());
177 adjust_width(width[i++], std::to_string(s.thread->tid).size());
178 }
179 if (show_cpu) {
180 adjust_width(width[i++], std::to_string(s.cpu).size());
181 }
182 adjust_width(width[i++], s.readable_count.size());
183 adjust_width(width[i++], s.Name().size());
184 adjust_width(width[i++], s.comment.size());
185 }
186
187 fprintf(fp, "# ");
188 for (size_t i = 0; i < titles.size(); i++) {
189 if (titles[i] == "count") {
190 fprintf(fp, "%*s", static_cast<int>(width[i]), titles[i].c_str());
191 } else {
192 fprintf(fp, "%-*s", static_cast<int>(width[i]), titles[i].c_str());
193 }
194 if (i + 1 < titles.size()) {
195 fprintf(fp, " ");
196 }
197 }
198 fprintf(fp, "\n");
199
200 for (auto& s : summaries_) {
201 size_t i = 0;
202 if (show_thread) {
203 fprintf(fp, " %-*s", static_cast<int>(width[i++]), s.thread->name.c_str());
204 fprintf(fp, " %-*d", static_cast<int>(width[i++]), s.thread->pid);
205 fprintf(fp, " %-*d", static_cast<int>(width[i++]), s.thread->tid);
206 }
207 if (show_cpu) {
208 fprintf(fp, " %-*d", static_cast<int>(width[i++]), s.cpu);
209 }
210 fprintf(fp, " %*s %-*s # %-*s (%.0f%%)%s\n", static_cast<int>(width[i]),
211 s.readable_count.c_str(), static_cast<int>(width[i + 1]), s.Name().c_str(),
212 static_cast<int>(width[i + 2]), s.comment.c_str(), 1.0 / s.scale * 100,
213 (s.auto_generated ? " (generated)" : ""));
214 }
215 }
216
GetCommentForSummary(const CounterSummary & s,double duration_in_sec)217 std::string CounterSummaries::GetCommentForSummary(const CounterSummary& s,
218 double duration_in_sec) {
219 char sap_mid;
220 if (csv_) {
221 sap_mid = ',';
222 } else {
223 sap_mid = ' ';
224 }
225 if (s.type_name == "task-clock") {
226 double run_sec = s.count / 1e9;
227 double used_cpus = run_sec / duration_in_sec;
228 return android::base::StringPrintf("%f%ccpus used", used_cpus, sap_mid);
229 }
230 if (s.type_name == "cpu-clock") {
231 return "";
232 }
233 if (s.type_name == "cpu-cycles") {
234 if (s.runtime_in_ns == 0) {
235 return "";
236 }
237 double ghz = static_cast<double>(s.count) / s.runtime_in_ns;
238 return android::base::StringPrintf("%f%cGHz", ghz, sap_mid);
239 }
240 if (s.type_name == "instructions" && s.count != 0) {
241 const CounterSummary* other = FindSummary("cpu-cycles", s.modifier, s.thread, s.cpu);
242 if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
243 double cpi = static_cast<double>(other->count) / s.count;
244 return android::base::StringPrintf("%f%ccycles per instruction", cpi, sap_mid);
245 }
246 }
247 std::string rate_comment = GetRateComment(s, sap_mid);
248 if (!rate_comment.empty()) {
249 return rate_comment;
250 }
251 if (s.runtime_in_ns == 0) {
252 return "";
253 }
254 double runtime_in_sec = static_cast<double>(s.runtime_in_ns) / 1e9;
255 double rate = s.count / runtime_in_sec;
256 if (rate >= 1e9 - 1e5) {
257 return android::base::StringPrintf("%.3f%cG/sec", rate / 1e9, sap_mid);
258 }
259 if (rate >= 1e6 - 1e2) {
260 return android::base::StringPrintf("%.3f%cM/sec", rate / 1e6, sap_mid);
261 }
262 if (rate >= 1e3) {
263 return android::base::StringPrintf("%.3f%cK/sec", rate / 1e3, sap_mid);
264 }
265 return android::base::StringPrintf("%.3f%c/sec", rate, sap_mid);
266 }
267
GetRateComment(const CounterSummary & s,char sep)268 std::string CounterSummaries::GetRateComment(const CounterSummary& s, char sep) {
269 std::string_view miss_event_name = s.type_name;
270 std::string event_name;
271 std::string rate_desc;
272 if (auto it = COMMON_EVENT_RATE_MAP.find(miss_event_name); it != COMMON_EVENT_RATE_MAP.end()) {
273 event_name = it->second.first;
274 rate_desc = it->second.second;
275 }
276 if (event_name.empty() && (GetBuildArch() == ARCH_ARM || GetBuildArch() == ARCH_ARM64)) {
277 if (auto it = ARM_EVENT_RATE_MAP.find(miss_event_name); it != ARM_EVENT_RATE_MAP.end()) {
278 event_name = it->second.first;
279 rate_desc = it->second.second;
280 }
281 }
282 if (event_name.empty() && android::base::ConsumeSuffix(&miss_event_name, "-misses")) {
283 event_name = std::string(miss_event_name) + "s";
284 rate_desc = "miss rate";
285 }
286 if (!event_name.empty()) {
287 const CounterSummary* other = FindSummary(event_name, s.modifier, s.thread, s.cpu);
288 if (other != nullptr && other->IsMonitoredAtTheSameTime(s) && other->count != 0) {
289 double miss_rate = static_cast<double>(s.count) / other->count;
290 return android::base::StringPrintf("%f%%%c%s", miss_rate * 100, sep, rate_desc.c_str());
291 }
292 }
293 return "";
294 }
295
296 } // namespace simpleperf
297
298 namespace {
299
300 // devfreq may use performance counters to calculate memory latency (as in
301 // drivers/devfreq/arm-memlat-mon.c). Hopefully we can get more available counters by asking devfreq
302 // to not use the memory latency governor temporarily.
303 class DevfreqCounters {
304 public:
Use()305 bool Use() {
306 if (!IsRoot()) {
307 LOG(ERROR) << "--use-devfreq-counters needs root permission to set devfreq governors";
308 return false;
309 }
310 std::string devfreq_dir = "/sys/class/devfreq/";
311 for (auto& name : GetSubDirs(devfreq_dir)) {
312 std::string governor_path = devfreq_dir + name + "/governor";
313 if (IsRegularFile(governor_path)) {
314 std::string governor;
315 if (!android::base::ReadFileToString(governor_path, &governor)) {
316 LOG(ERROR) << "failed to read " << governor_path;
317 return false;
318 }
319 governor = android::base::Trim(governor);
320 if (governor == "mem_latency") {
321 if (!android::base::WriteStringToFile("performance", governor_path)) {
322 PLOG(ERROR) << "failed to write " << governor_path;
323 return false;
324 }
325 mem_latency_governor_paths_.emplace_back(std::move(governor_path));
326 }
327 }
328 }
329 return true;
330 }
331
~DevfreqCounters()332 ~DevfreqCounters() {
333 for (auto& path : mem_latency_governor_paths_) {
334 android::base::WriteStringToFile("mem_latency", path);
335 }
336 }
337
338 private:
339 std::vector<std::string> mem_latency_governor_paths_;
340 };
341
342 class StatCommand : public Command {
343 public:
StatCommand()344 StatCommand()
345 : Command("stat", "gather performance counter information",
346 // clang-format off
347 "Usage: simpleperf stat [options] [command [command-args]]\n"
348 " Gather performance counter information of running [command].\n"
349 " And -a/-p/-t option can be used to change target of counter information.\n"
350 "-a Collect system-wide information.\n"
351 #if defined(__ANDROID__)
352 "--app package_name Profile the process of an Android application.\n"
353 " On non-rooted devices, the app must be debuggable,\n"
354 " because we use run-as to switch to the app's context.\n"
355 #endif
356 "--cpu cpu_item1,cpu_item2,...\n"
357 " Collect information only on the selected cpus. cpu_item can\n"
358 " be a cpu number like 1, or a cpu range like 0-3.\n"
359 "--csv Write report in comma separate form.\n"
360 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n"
361 " [command]. Here time_in_sec may be any positive\n"
362 " floating point number.\n"
363 "--interval time_in_ms Print stat for every time_in_ms milliseconds.\n"
364 " Here time_in_ms may be any positive floating point\n"
365 " number. Simpleperf prints total values from the\n"
366 " starting point. But this can be changed by\n"
367 " --interval-only-values.\n"
368 "--interval-only-values Print numbers of events happened in each interval.\n"
369 "-e event1[:modifier1],event2[:modifier2],...\n"
370 " Select a list of events to count. An event can be:\n"
371 " 1) an event name listed in `simpleperf list`;\n"
372 " 2) a raw PMU event in rN format. N is a hex number.\n"
373 " For example, r1b selects event number 0x1b.\n"
374 " Modifiers can be added to define how the event should be\n"
375 " monitored. Possible modifiers are:\n"
376 " u - monitor user space events only\n"
377 " k - monitor kernel space events only\n"
378 "--group event1[:modifier],event2[:modifier2],...\n"
379 " Similar to -e option. But events specified in the same --group\n"
380 " option are monitored as a group, and scheduled in and out at the\n"
381 " same time.\n"
382 "--no-inherit Don't stat created child threads/processes.\n"
383 "-o output_filename Write report to output_filename instead of standard output.\n"
384 "--per-core Print counters for each cpu core.\n"
385 "--per-thread Print counters for each thread.\n"
386 "-p pid1,pid2,... Stat events on existing processes. Mutually exclusive with -a.\n"
387 "-t tid1,tid2,... Stat events on existing threads. Mutually exclusive with -a.\n"
388 "--sort key1,key2,... Select keys used to sort the report, used when --per-thread\n"
389 " or --per-core appears. The appearance order of keys decides\n"
390 " the order of keys used to sort the report.\n"
391 " Possible keys include:\n"
392 " count -- event count for each entry\n"
393 " count_per_thread -- event count for a thread on all cpus\n"
394 " cpu -- cpu id\n"
395 " pid -- process id\n"
396 " tid -- thread id\n"
397 " comm -- thread name\n"
398 " The default sort keys are:\n"
399 " count_per_thread,tid,cpu,count\n"
400 #if defined(__ANDROID__)
401 "--use-devfreq-counters On devices with Qualcomm SOCs, some hardware counters may be used\n"
402 " to monitor memory latency (in drivers/devfreq/arm-memlat-mon.c),\n"
403 " making fewer counters available to users. This option asks devfreq\n"
404 " to temporarily release counters by replacing memory-latency governor\n"
405 " with performance governor. It affects memory latency during profiling,\n"
406 " and may cause wedged power if simpleperf is killed in between.\n"
407 #endif
408 "--verbose Show result in verbose mode.\n"
409 #if 0
410 // Below options are only used internally and shouldn't be visible to the public.
411 "--in-app We are already running in the app's context.\n"
412 "--tracepoint-events file_name Read tracepoint events from [file_name] instead of tracefs.\n"
413 "--out-fd <fd> Write output to a file descriptor.\n"
414 "--stop-signal-fd <fd> Stop stating when fd is readable.\n"
415 #endif
416 // clang-format on
417 ),
418 verbose_mode_(false),
419 system_wide_collection_(false),
420 child_inherit_(true),
421 duration_in_sec_(0),
422 interval_in_ms_(0),
423 interval_only_values_(false),
424 event_selection_set_(true),
425 csv_(false),
426 in_app_context_(false) {
427 // Die if parent exits.
428 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
429 // Set default sort keys. Full key list is in BuildSummaryComparator().
430 sort_keys_ = {"count_per_thread", "tid", "cpu", "count"};
431 }
432
433 bool Run(const std::vector<std::string>& args);
434
435 private:
436 bool ParseOptions(const std::vector<std::string>& args,
437 std::vector<std::string>* non_option_args);
438 bool AddDefaultMeasuredEventTypes();
439 void SetEventSelectionFlags();
440 void MonitorEachThread();
441 void AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters);
442 bool ShowCounters(const std::vector<CountersInfo>& counters,
443 double duration_in_sec, FILE* fp);
444
445 bool verbose_mode_;
446 bool system_wide_collection_;
447 bool child_inherit_;
448 double duration_in_sec_;
449 double interval_in_ms_;
450 bool interval_only_values_;
451 std::vector<std::vector<CounterSum>> last_sum_values_;
452 std::vector<int> cpus_;
453 EventSelectionSet event_selection_set_;
454 std::string output_filename_;
455 android::base::unique_fd out_fd_;
456 bool csv_;
457 std::string app_package_name_;
458 bool in_app_context_;
459 android::base::unique_fd stop_signal_fd_;
460 bool use_devfreq_counters_ = false;
461
462 bool report_per_core_ = false;
463 bool report_per_thread_ = false;
464 // used to report event count for each thread
465 std::unordered_map<pid_t, ThreadInfo> thread_info_;
466 // used to sort report
467 std::vector<std::string> sort_keys_;
468 std::optional<SummaryComparator> summary_comparator_;
469 };
470
Run(const std::vector<std::string> & args)471 bool StatCommand::Run(const std::vector<std::string>& args) {
472 if (!CheckPerfEventLimit()) {
473 return false;
474 }
475 AllowMoreOpenedFiles();
476
477 // 1. Parse options, and use default measured event types if not given.
478 std::vector<std::string> workload_args;
479 if (!ParseOptions(args, &workload_args)) {
480 return false;
481 }
482 if (!app_package_name_.empty() && !in_app_context_) {
483 if (!IsRoot()) {
484 return RunInAppContext(app_package_name_, "stat", args, workload_args.size(),
485 output_filename_, !event_selection_set_.GetTracepointEvents().empty());
486 }
487 }
488 DevfreqCounters devfreq_counters;
489 if (use_devfreq_counters_) {
490 if (!devfreq_counters.Use()) {
491 return false;
492 }
493 }
494 if (event_selection_set_.empty()) {
495 if (!AddDefaultMeasuredEventTypes()) {
496 return false;
497 }
498 }
499 SetEventSelectionFlags();
500
501 // 2. Create workload.
502 std::unique_ptr<Workload> workload;
503 if (!workload_args.empty()) {
504 workload = Workload::CreateWorkload(workload_args);
505 if (workload == nullptr) {
506 return false;
507 }
508 }
509 bool need_to_check_targets = false;
510 if (system_wide_collection_) {
511 if (report_per_thread_) {
512 event_selection_set_.AddMonitoredProcesses(GetAllProcesses());
513 } else {
514 event_selection_set_.AddMonitoredThreads({-1});
515 }
516 } else if (!event_selection_set_.HasMonitoredTarget()) {
517 if (workload != nullptr) {
518 event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
519 event_selection_set_.SetEnableOnExec(true);
520 } else if (!app_package_name_.empty()) {
521 std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
522 event_selection_set_.AddMonitoredProcesses(pids);
523 } else {
524 LOG(ERROR)
525 << "No threads to monitor. Try `simpleperf help stat` for help\n";
526 return false;
527 }
528 } else {
529 need_to_check_targets = true;
530 }
531
532 if (report_per_thread_) {
533 MonitorEachThread();
534 }
535
536 // 3. Open perf_event_files and output file if defined.
537 if (cpus_.empty() && !report_per_core_ && (report_per_thread_ || !system_wide_collection_)) {
538 cpus_.push_back(-1); // Get event count for each thread on all cpus.
539 }
540 if (!event_selection_set_.OpenEventFiles(cpus_)) {
541 return false;
542 }
543 std::unique_ptr<FILE, decltype(&fclose)> fp_holder(nullptr, fclose);
544 if (!output_filename_.empty()) {
545 fp_holder.reset(fopen(output_filename_.c_str(), "we"));
546 if (fp_holder == nullptr) {
547 PLOG(ERROR) << "failed to open " << output_filename_;
548 return false;
549 }
550 } else if (out_fd_ != -1) {
551 fp_holder.reset(fdopen(out_fd_.release(), "we"));
552 if (fp_holder == nullptr) {
553 PLOG(ERROR) << "failed to write output.";
554 return false;
555 }
556 }
557 FILE* fp = fp_holder ? fp_holder.get() : stdout;
558
559 // 4. Add signal/periodic Events.
560 IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
561 if (interval_in_ms_ != 0) {
562 if (!loop->UsePreciseTimer()) {
563 return false;
564 }
565 }
566 std::chrono::time_point<std::chrono::steady_clock> start_time;
567 std::vector<CountersInfo> counters;
568 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
569 return false;
570 }
571 auto exit_loop_callback = [loop]() {
572 return loop->ExitLoop();
573 };
574 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP}, exit_loop_callback)) {
575 return false;
576 }
577 if (stop_signal_fd_ != -1) {
578 if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback)) {
579 return false;
580 }
581 }
582 if (duration_in_sec_ != 0) {
583 if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_), exit_loop_callback)) {
584 return false;
585 }
586 }
587 auto print_counters = [&]() {
588 auto end_time = std::chrono::steady_clock::now();
589 if (!event_selection_set_.ReadCounters(&counters)) {
590 return false;
591 }
592 double duration_in_sec =
593 std::chrono::duration_cast<std::chrono::duration<double>>(end_time -
594 start_time)
595 .count();
596 if (interval_only_values_) {
597 AdjustToIntervalOnlyValues(counters);
598 }
599 if (!ShowCounters(counters, duration_in_sec, fp)) {
600 return false;
601 }
602 return true;
603 };
604
605 if (interval_in_ms_ != 0) {
606 if (!loop->AddPeriodicEvent(SecondToTimeval(interval_in_ms_ / 1000.0),
607 print_counters)) {
608 return false;
609 }
610 }
611
612 // 5. Count events while workload running.
613 start_time = std::chrono::steady_clock::now();
614 if (workload != nullptr && !workload->Start()) {
615 return false;
616 }
617 if (!loop->RunLoop()) {
618 return false;
619 }
620
621 // 6. Read and print counters.
622 if (interval_in_ms_ == 0) {
623 return print_counters();
624 }
625 return true;
626 }
627
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args)628 bool StatCommand::ParseOptions(const std::vector<std::string>& args,
629 std::vector<std::string>* non_option_args) {
630 OptionValueMap options;
631 std::vector<std::pair<OptionName, OptionValue>> ordered_options;
632
633 if (!PreprocessOptions(args, GetStatCmdOptionFormats(), &options, &ordered_options,
634 non_option_args)) {
635 return false;
636 }
637
638 // Process options.
639 system_wide_collection_ = options.PullBoolValue("-a");
640
641 if (auto value = options.PullValue("--app"); value) {
642 app_package_name_ = *value->str_value;
643 }
644 if (auto value = options.PullValue("--cpu"); value) {
645 cpus_ = GetCpusFromString(*value->str_value);
646 }
647
648 csv_ = options.PullBoolValue("--csv");
649
650 if (!options.PullDoubleValue("--duration", &duration_in_sec_, 1e-9)) {
651 return false;
652 }
653 if (!options.PullDoubleValue("--interval", &interval_in_ms_, 1e-9)) {
654 return false;
655 }
656 interval_only_values_ = options.PullBoolValue("--interval-only-values");
657
658 if (auto values = options.PullValues("-e"); values) {
659 for (const auto& value : values.value()) {
660 for (const auto& event_type : Split(*value.str_value, ",")) {
661 if (!event_selection_set_.AddEventType(event_type)) {
662 return false;
663 }
664 }
665 }
666 }
667
668 if (auto values = options.PullValues("--group"); values) {
669 for (const auto& value : values.value()) {
670 if (!event_selection_set_.AddEventGroup(Split(*value.str_value, ","))) {
671 return false;
672 }
673 }
674 }
675
676 in_app_context_ = options.PullBoolValue("--in-app");
677 child_inherit_ = !options.PullBoolValue("--no-inherit");
678
679 if (auto value = options.PullValue("-o"); value) {
680 output_filename_ = *value->str_value;
681 }
682 if (auto value = options.PullValue("--out-fd"); value) {
683 out_fd_.reset(static_cast<int>(value->uint_value));
684 }
685
686 report_per_core_ = options.PullBoolValue("--per-core");
687 report_per_thread_ = options.PullBoolValue("--per-thread");
688
689 if (auto values = options.PullValues("-p"); values) {
690 for (const auto& value : values.value()) {
691 std::set<pid_t> pids;
692 if (!GetValidThreadsFromThreadString(*value.str_value, &pids)) {
693 return false;
694 }
695 event_selection_set_.AddMonitoredProcesses(pids);
696 }
697 }
698
699 if (auto value = options.PullValue("--sort"); value) {
700 sort_keys_ = Split(*value->str_value, ",");
701 }
702
703 if (auto value = options.PullValue("--stop-signal-fd"); value) {
704 stop_signal_fd_.reset(static_cast<int>(value->uint_value));
705 }
706
707 if (auto values = options.PullValues("-t"); values) {
708 for (const auto& value : values.value()) {
709 std::set<pid_t> tids;
710 if (!GetValidThreadsFromThreadString(*value.str_value, &tids)) {
711 return false;
712 }
713 event_selection_set_.AddMonitoredThreads(tids);
714 }
715 }
716
717 if (auto value = options.PullValue("--tracepoint-events"); value) {
718 if (!SetTracepointEventsFilePath(*value->str_value)) {
719 return false;
720 }
721 }
722
723 use_devfreq_counters_ = options.PullBoolValue("--use-devfreq-counters");
724 verbose_mode_ = options.PullBoolValue("--verbose");
725
726 CHECK(options.values.empty());
727 CHECK(ordered_options.empty());
728
729 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
730 LOG(ERROR) << "Stat system wide and existing processes/threads can't be "
731 "used at the same time.";
732 return false;
733 }
734 if (system_wide_collection_ && !IsRoot()) {
735 LOG(ERROR) << "System wide profiling needs root privilege.";
736 return false;
737 }
738
739 if (report_per_core_ || report_per_thread_) {
740 summary_comparator_ = BuildSummaryComparator(sort_keys_, report_per_thread_, report_per_core_);
741 if (!summary_comparator_) {
742 return false;
743 }
744 }
745 return true;
746 }
747
AddDefaultMeasuredEventTypes()748 bool StatCommand::AddDefaultMeasuredEventTypes() {
749 for (auto& name : default_measured_event_types) {
750 // It is not an error when some event types in the default list are not
751 // supported by the kernel.
752 const EventType* type = FindEventTypeByName(name);
753 if (type != nullptr &&
754 IsEventAttrSupported(CreateDefaultPerfEventAttr(*type), name)) {
755 if (!event_selection_set_.AddEventType(name)) {
756 return false;
757 }
758 }
759 }
760 if (event_selection_set_.empty()) {
761 LOG(ERROR) << "Failed to add any supported default measured types";
762 return false;
763 }
764 return true;
765 }
766
SetEventSelectionFlags()767 void StatCommand::SetEventSelectionFlags() {
768 event_selection_set_.SetInherit(child_inherit_);
769 }
770
MonitorEachThread()771 void StatCommand::MonitorEachThread() {
772 std::vector<pid_t> threads;
773 for (auto pid : event_selection_set_.GetMonitoredProcesses()) {
774 for (auto tid : GetThreadsInProcess(pid)) {
775 ThreadInfo info;
776 if (GetThreadName(tid, &info.name)) {
777 info.tid = tid;
778 info.pid = pid;
779 thread_info_[tid] = std::move(info);
780 threads.push_back(tid);
781 }
782 }
783 }
784 for (auto tid : event_selection_set_.GetMonitoredThreads()) {
785 ThreadInfo info;
786 if (ReadThreadNameAndPid(tid, &info.name, &info.pid)) {
787 info.tid = tid;
788 thread_info_[tid] = std::move(info);
789 threads.push_back(tid);
790 }
791 }
792 event_selection_set_.ClearMonitoredTargets();
793 event_selection_set_.AddMonitoredThreads(threads);
794 }
795
AdjustToIntervalOnlyValues(std::vector<CountersInfo> & counters)796 void StatCommand::AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters) {
797 if (last_sum_values_.size() < counters.size()) {
798 last_sum_values_.resize(counters.size());
799 }
800 for (size_t i = 0; i < counters.size(); i++) {
801 std::vector<CounterInfo>& counters_per_event = counters[i].counters;
802 std::vector<CounterSum>& last_sum = last_sum_values_[i];
803
804 if (last_sum.size() < counters_per_event.size()) {
805 last_sum.resize(counters_per_event.size());
806 }
807 for (size_t j = 0; j < counters_per_event.size(); j++) {
808 PerfCounter& counter = counters_per_event[j].counter;
809 CounterSum new_sum;
810 new_sum.FromCounter(counter);
811 CounterSum delta = new_sum - last_sum[j];
812 delta.ToCounter(counter);
813 last_sum[j] = new_sum;
814 }
815 }
816 }
817
ShowCounters(const std::vector<CountersInfo> & counters,double duration_in_sec,FILE * fp)818 bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters,
819 double duration_in_sec, FILE* fp) {
820 if (csv_) {
821 fprintf(fp, "Performance counter statistics,\n");
822 } else {
823 fprintf(fp, "Performance counter statistics:\n\n");
824 }
825
826 if (verbose_mode_) {
827 for (auto& counters_info : counters) {
828 for (auto& counter_info : counters_info.counters) {
829 if (csv_) {
830 fprintf(fp, "%s,tid,%d,cpu,%d,count,%" PRIu64 ",time_enabled,%" PRIu64
831 ",time running,%" PRIu64 ",id,%" PRIu64 ",\n",
832 counters_info.event_name.c_str(), counter_info.tid,
833 counter_info.cpu, counter_info.counter.value,
834 counter_info.counter.time_enabled,
835 counter_info.counter.time_running, counter_info.counter.id);
836 } else {
837 fprintf(fp,
838 "%s(tid %d, cpu %d): count %" PRIu64 ", time_enabled %" PRIu64
839 ", time running %" PRIu64 ", id %" PRIu64 "\n",
840 counters_info.event_name.c_str(), counter_info.tid,
841 counter_info.cpu, counter_info.counter.value,
842 counter_info.counter.time_enabled,
843 counter_info.counter.time_running, counter_info.counter.id);
844 }
845 }
846 }
847 }
848
849 CounterSummaryBuilder builder(report_per_thread_, report_per_core_, csv_, thread_info_,
850 summary_comparator_);
851 for (const auto& info : counters) {
852 builder.AddCountersForOneEventType(info);
853 }
854 CounterSummaries summaries(builder.Build(), csv_);
855 summaries.AutoGenerateSummaries();
856 summaries.GenerateComments(duration_in_sec);
857 summaries.Show(fp);
858
859 if (csv_)
860 fprintf(fp, "Total test time,%lf,seconds,\n", duration_in_sec);
861 else
862 fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
863
864 const char* COUNTER_MULTIPLEX_INFO =
865 "probably caused by hardware counter multiplexing (less counters than events).\n"
866 "Try --use-devfreq-counters if on a rooted device.";
867
868 if (cpus_ == std::vector<int>(1, -1) ||
869 event_selection_set_.GetMonitoredThreads() == std::set<pid_t>({-1})) {
870 // We either monitor a thread on all cpus, or monitor all threads on a cpu. In both cases,
871 // if percentages < 100%, probably it is caused by hardware counter multiplexing.
872 bool counters_always_available = true;
873 for (const auto& summary : summaries.Summaries()) {
874 if (!summary.IsMonitoredAllTheTime()) {
875 counters_always_available = false;
876 break;
877 }
878 }
879 if (!counters_always_available) {
880 LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time,\n"
881 << COUNTER_MULTIPLEX_INFO;
882 }
883 } else if (report_per_thread_) {
884 // We monitor each thread on each cpu.
885 LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for each thread.\n"
886 << "If percentage sum of a thread < 99%, or report for a running thread is missing,\n"
887 << COUNTER_MULTIPLEX_INFO;
888 } else {
889 // We monitor some threads on each cpu.
890 LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for monitored\n"
891 << "threads. If percentage sum < 99%, or report for an event is missing,\n"
892 << COUNTER_MULTIPLEX_INFO;
893 }
894 return true;
895 }
896
897 } // namespace
898
899 namespace simpleperf {
900
RegisterStatCommand()901 void RegisterStatCommand() {
902 RegisterCommand("stat",
903 [] { return std::unique_ptr<Command>(new StatCommand); });
904 }
905
906 } // namespace simpleperf
907