1 // Copyright (C) 2016 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "linker/module_merger.h"
16 #include "repr/ir_dumper.h"
17 #include "repr/ir_reader.h"
18 #include "repr/ir_representation.h"
19 #include "repr/symbol/so_file_parser.h"
20 #include "repr/symbol/version_script_parser.h"
21 #include "utils/command_line_utils.h"
22 #include "utils/header_abi_util.h"
23 
24 #include <llvm/ADT/Optional.h>
25 #include <llvm/Support/CommandLine.h>
26 #include <llvm/Support/raw_ostream.h>
27 
28 #include <fstream>
29 #include <functional>
30 #include <iostream>
31 #include <memory>
32 #include <mutex>
33 #include <string>
34 #include <thread>
35 #include <vector>
36 
37 #include <stdlib.h>
38 
39 
40 using namespace header_checker;
41 using header_checker::repr::TextFormatIR;
42 using header_checker::utils::CollectAllExportedHeaders;
43 using header_checker::utils::GetCwd;
44 using header_checker::utils::HideIrrelevantCommandLineOptions;
45 
46 
47 static llvm::cl::OptionCategory header_linker_category(
48     "header-abi-linker options");
49 
50 static llvm::cl::list<std::string> dump_files(
51     llvm::cl::Positional, llvm::cl::desc("<dump-files>"), llvm::cl::ZeroOrMore,
52     llvm::cl::cat(header_linker_category));
53 
54 static llvm::cl::opt<std::string> linked_dump(
55     "o", llvm::cl::desc("<linked dump>"), llvm::cl::Required,
56     llvm::cl::cat(header_linker_category));
57 
58 static llvm::cl::list<std::string> exported_header_dirs(
59     "I", llvm::cl::desc("<export_include_dirs>"), llvm::cl::Prefix,
60     llvm::cl::ZeroOrMore, llvm::cl::cat(header_linker_category));
61 
62 static llvm::cl::opt<std::string> root_dir(
63     "root-dir",
64     llvm::cl::desc("Specify the directory that the paths in the dump files are "
65                    "relative to. Default to current working directory"),
66     llvm::cl::Optional, llvm::cl::cat(header_linker_category));
67 
68 static llvm::cl::opt<std::string> version_script(
69     "v", llvm::cl::desc("<version_script>"), llvm::cl::Optional,
70     llvm::cl::cat(header_linker_category));
71 
72 static llvm::cl::list<std::string> excluded_symbol_versions(
73     "exclude-symbol-version", llvm::cl::Optional,
74     llvm::cl::cat(header_linker_category));
75 
76 static llvm::cl::list<std::string> excluded_symbol_tags(
77     "exclude-symbol-tag", llvm::cl::Optional,
78     llvm::cl::cat(header_linker_category));
79 
80 static llvm::cl::opt<std::string> api(
81     "api", llvm::cl::desc("<api>"), llvm::cl::Optional,
82     llvm::cl::init("current"),
83     llvm::cl::cat(header_linker_category));
84 
85 static llvm::cl::opt<std::string> arch(
86     "arch", llvm::cl::desc("<arch>"), llvm::cl::Optional,
87     llvm::cl::cat(header_linker_category));
88 
89 static llvm::cl::opt<bool> no_filter(
90     "no-filter", llvm::cl::desc("Do not filter any abi"), llvm::cl::Optional,
91     llvm::cl::cat(header_linker_category));
92 
93 static llvm::cl::opt<std::string> so_file(
94     "so", llvm::cl::desc("<path to so file>"), llvm::cl::Optional,
95     llvm::cl::cat(header_linker_category));
96 
97 static llvm::cl::opt<TextFormatIR> input_format(
98     "input-format", llvm::cl::desc("Specify format of input dump files"),
99     llvm::cl::values(clEnumValN(TextFormatIR::ProtobufTextFormat,
100                                 "ProtobufTextFormat", "ProtobufTextFormat"),
101                      clEnumValN(TextFormatIR::Json, "Json", "JSON")),
102     llvm::cl::init(TextFormatIR::Json),
103     llvm::cl::cat(header_linker_category));
104 
105 static llvm::cl::opt<TextFormatIR> output_format(
106     "output-format", llvm::cl::desc("Specify format of output dump file"),
107     llvm::cl::values(clEnumValN(TextFormatIR::ProtobufTextFormat,
108                                 "ProtobufTextFormat", "ProtobufTextFormat"),
109                      clEnumValN(TextFormatIR::Json, "Json", "JSON")),
110     llvm::cl::init(TextFormatIR::Json),
111     llvm::cl::cat(header_linker_category));
112 
113 static llvm::cl::opt<std::size_t> sources_per_thread(
114     "sources-per-thread",
115     llvm::cl::desc("Specify number of input dump files each thread parses, for "
116                    "debugging merging types"),
117     llvm::cl::init(7), llvm::cl::Hidden);
118 
119 class HeaderAbiLinker {
120  public:
HeaderAbiLinker(const std::vector<std::string> & dump_files,const std::vector<std::string> & exported_header_dirs,const std::string & version_script,const std::string & so_file,const std::string & linked_dump,const std::string & arch,const std::string & api,const std::vector<std::string> & excluded_symbol_versions,const std::vector<std::string> & excluded_symbol_tags)121   HeaderAbiLinker(
122       const std::vector<std::string> &dump_files,
123       const std::vector<std::string> &exported_header_dirs,
124       const std::string &version_script,
125       const std::string &so_file,
126       const std::string &linked_dump,
127       const std::string &arch,
128       const std::string &api,
129       const std::vector<std::string> &excluded_symbol_versions,
130       const std::vector<std::string> &excluded_symbol_tags)
131       : dump_files_(dump_files), exported_header_dirs_(exported_header_dirs),
132         version_script_(version_script), so_file_(so_file),
133         out_dump_name_(linked_dump), arch_(arch), api_(api),
134         excluded_symbol_versions_(excluded_symbol_versions),
135         excluded_symbol_tags_(excluded_symbol_tags) {}
136 
137   bool LinkAndDump();
138 
139  private:
140   template <typename T>
141   bool LinkDecl(repr::ModuleIR *dst,
142                 const repr::AbiElementMap<T> &src,
143                 const std::function<bool(const std::string &)> &symbol_filter);
144 
145   std::unique_ptr<linker::ModuleMerger> ReadInputDumpFiles();
146 
147   bool ReadExportedSymbols();
148 
149   bool ReadExportedSymbolsFromVersionScript();
150 
151   bool ReadExportedSymbolsFromSharedObjectFile();
152 
153   bool LinkTypes(const repr::ModuleIR &module, repr::ModuleIR *linked_module);
154 
155   bool LinkFunctions(const repr::ModuleIR &module,
156                      repr::ModuleIR *linked_module);
157 
158   bool LinkGlobalVars(const repr::ModuleIR &module,
159                       repr::ModuleIR *linked_module);
160 
161   bool LinkExportedSymbols(repr::ModuleIR *linked_module);
162 
163   bool LinkExportedSymbols(repr::ModuleIR *linked_module,
164                            const repr::ExportedSymbolSet &exported_symbols);
165 
166   template <typename SymbolMap>
167   bool LinkExportedSymbols(repr::ModuleIR *linked_module,
168                            const SymbolMap &symbols);
169 
170   // Check whether a symbol name is considered as exported.  If both
171   // `shared_object_symbols_` and `version_script_symbols_` exists, the symbol
172   // name must pass the `HasSymbol()` test in both cases.
173   bool IsSymbolExported(const std::string &name) const;
174 
175  private:
176   const std::vector<std::string> &dump_files_;
177   const std::vector<std::string> &exported_header_dirs_;
178   const std::string &version_script_;
179   const std::string &so_file_;
180   const std::string &out_dump_name_;
181   const std::string &arch_;
182   const std::string &api_;
183   const std::vector<std::string> &excluded_symbol_versions_;
184   const std::vector<std::string> &excluded_symbol_tags_;
185 
186   std::set<std::string> exported_headers_;
187 
188   // Exported symbols
189   std::unique_ptr<repr::ExportedSymbolSet> shared_object_symbols_;
190 
191   std::unique_ptr<repr::ExportedSymbolSet> version_script_symbols_;
192 };
193 
194 static void
DeDuplicateAbiElementsThread(const std::vector<std::string> & dump_files,const std::set<std::string> * exported_headers,linker::ModuleMerger * global_merger,std::mutex * global_merger_lock,std::atomic<std::size_t> * cnt)195 DeDuplicateAbiElementsThread(const std::vector<std::string> &dump_files,
196                              const std::set<std::string> *exported_headers,
197                              linker::ModuleMerger *global_merger,
198                              std::mutex *global_merger_lock,
199                              std::atomic<std::size_t> *cnt) {
200   linker::ModuleMerger local_merger(exported_headers);
201 
202   auto begin_it = dump_files.begin();
203   std::size_t num_sources = dump_files.size();
204   while (1) {
205     std::size_t i = cnt->fetch_add(sources_per_thread);
206     if (i >= num_sources) {
207       break;
208     }
209     std::size_t end = std::min(i + sources_per_thread, num_sources);
210     for (auto it = begin_it + i; it != begin_it + end; it++) {
211       std::unique_ptr<repr::IRReader> reader =
212           repr::IRReader::CreateIRReader(input_format, exported_headers);
213       assert(reader != nullptr);
214       if (!reader->ReadDump(*it)) {
215         llvm::errs() << "ReadDump failed\n";
216         ::exit(1);
217       }
218       local_merger.MergeGraphs(reader->GetModule());
219     }
220   }
221 
222   std::lock_guard<std::mutex> lock(*global_merger_lock);
223   global_merger->MergeGraphs(local_merger.GetModule());
224 }
225 
ReadInputDumpFiles()226 std::unique_ptr<linker::ModuleMerger> HeaderAbiLinker::ReadInputDumpFiles() {
227   std::unique_ptr<linker::ModuleMerger> merger(
228       new linker::ModuleMerger(&exported_headers_));
229 
230   std::size_t max_threads = std::thread::hardware_concurrency();
231   std::size_t num_threads =
232       sources_per_thread < dump_files_.size()
233           ? std::min(dump_files_.size() / sources_per_thread, max_threads)
234           : 1;
235   std::vector<std::thread> threads;
236   std::atomic<std::size_t> cnt(0);
237   std::mutex merger_lock;
238   for (std::size_t i = 1; i < num_threads; i++) {
239     threads.emplace_back(DeDuplicateAbiElementsThread, dump_files_,
240                          &exported_headers_, merger.get(), &merger_lock, &cnt);
241   }
242   DeDuplicateAbiElementsThread(dump_files_, &exported_headers_, merger.get(),
243                                &merger_lock, &cnt);
244   for (auto &thread : threads) {
245     thread.join();
246   }
247 
248   return merger;
249 }
250 
LinkAndDump()251 bool HeaderAbiLinker::LinkAndDump() {
252   // Extract exported functions and variables from a shared lib or a version
253   // script.
254   if (!ReadExportedSymbols()) {
255     return false;
256   }
257 
258   // Construct the list of exported headers for source location filtering.
259   exported_headers_ = CollectAllExportedHeaders(
260       exported_header_dirs_, root_dir.empty() ? GetCwd() : root_dir);
261 
262   // Read all input ABI dumps.
263   auto merger = ReadInputDumpFiles();
264 
265   const repr::ModuleIR &module = merger->GetModule();
266 
267   // Link input ABI dumps.
268   std::unique_ptr<repr::ModuleIR> linked_module(
269       new repr::ModuleIR(&exported_headers_));
270 
271   if (!LinkExportedSymbols(linked_module.get())) {
272     return false;
273   }
274 
275   if (!LinkTypes(module, linked_module.get()) ||
276       !LinkFunctions(module, linked_module.get()) ||
277       !LinkGlobalVars(module, linked_module.get())) {
278     llvm::errs() << "Failed to link elements\n";
279     return false;
280   }
281 
282   // Dump the linked module.
283   std::unique_ptr<repr::IRDumper> ir_dumper =
284       repr::IRDumper::CreateIRDumper(output_format, out_dump_name_);
285   assert(ir_dumper != nullptr);
286   if (!ir_dumper->Dump(*linked_module)) {
287     llvm::errs() << "Failed to serialize the linked output to ostream\n";
288     return false;
289   }
290 
291   return true;
292 }
293 
294 template <typename T>
LinkDecl(repr::ModuleIR * dst,const repr::AbiElementMap<T> & src,const std::function<bool (const std::string &)> & symbol_filter)295 bool HeaderAbiLinker::LinkDecl(
296     repr::ModuleIR *dst, const repr::AbiElementMap<T> &src,
297     const std::function<bool(const std::string &)> &symbol_filter) {
298   assert(dst != nullptr);
299   for (auto &&element : src) {
300     // If we are not using a version script and exported headers are available,
301     // filter out unexported abi.
302     std::string source_file = element.second.GetSourceFile();
303     // Builtin types will not have source file information.
304     if (!exported_headers_.empty() && !source_file.empty() &&
305         exported_headers_.find(source_file) == exported_headers_.end()) {
306       continue;
307     }
308     // Check for the existence of the element in version script / symbol file.
309     if (!symbol_filter(element.first)) {
310       continue;
311     }
312     if (!dst->AddLinkableMessage(element.second)) {
313       llvm::errs() << "Failed to add element to linked dump\n";
314       return false;
315     }
316   }
317   return true;
318 }
319 
LinkTypes(const repr::ModuleIR & module,repr::ModuleIR * linked_module)320 bool HeaderAbiLinker::LinkTypes(const repr::ModuleIR &module,
321                                 repr::ModuleIR *linked_module) {
322   auto no_filter = [](const std::string &symbol) { return true; };
323   return LinkDecl(linked_module, module.GetRecordTypes(), no_filter) &&
324          LinkDecl(linked_module, module.GetEnumTypes(), no_filter) &&
325          LinkDecl(linked_module, module.GetFunctionTypes(), no_filter) &&
326          LinkDecl(linked_module, module.GetBuiltinTypes(), no_filter) &&
327          LinkDecl(linked_module, module.GetPointerTypes(), no_filter) &&
328          LinkDecl(linked_module, module.GetRvalueReferenceTypes(), no_filter) &&
329          LinkDecl(linked_module, module.GetLvalueReferenceTypes(), no_filter) &&
330          LinkDecl(linked_module, module.GetArrayTypes(), no_filter) &&
331          LinkDecl(linked_module, module.GetQualifiedTypes(), no_filter);
332 }
333 
IsSymbolExported(const std::string & name) const334 bool HeaderAbiLinker::IsSymbolExported(const std::string &name) const {
335   if (shared_object_symbols_ && !shared_object_symbols_->HasSymbol(name)) {
336     return false;
337   }
338   if (version_script_symbols_ && !version_script_symbols_->HasSymbol(name)) {
339     return false;
340   }
341   return true;
342 }
343 
LinkFunctions(const repr::ModuleIR & module,repr::ModuleIR * linked_module)344 bool HeaderAbiLinker::LinkFunctions(const repr::ModuleIR &module,
345                                     repr::ModuleIR *linked_module) {
346   auto symbol_filter = [this](const std::string &linker_set_key) {
347     return IsSymbolExported(linker_set_key);
348   };
349   return LinkDecl(linked_module, module.GetFunctions(), symbol_filter);
350 }
351 
LinkGlobalVars(const repr::ModuleIR & module,repr::ModuleIR * linked_module)352 bool HeaderAbiLinker::LinkGlobalVars(const repr::ModuleIR &module,
353                                      repr::ModuleIR *linked_module) {
354   auto symbol_filter = [this](const std::string &linker_set_key) {
355     return IsSymbolExported(linker_set_key);
356   };
357   return LinkDecl(linked_module, module.GetGlobalVariables(), symbol_filter);
358 }
359 
360 template <typename SymbolMap>
LinkExportedSymbols(repr::ModuleIR * dst,const SymbolMap & symbols)361 bool HeaderAbiLinker::LinkExportedSymbols(repr::ModuleIR *dst,
362                                           const SymbolMap &symbols) {
363   for (auto &&symbol : symbols) {
364     if (!IsSymbolExported(symbol.first)) {
365       continue;
366     }
367     if (!dst->AddElfSymbol(symbol.second)) {
368       return false;
369     }
370   }
371   return true;
372 }
373 
LinkExportedSymbols(repr::ModuleIR * linked_module,const repr::ExportedSymbolSet & exported_symbols)374 bool HeaderAbiLinker::LinkExportedSymbols(
375     repr::ModuleIR *linked_module,
376     const repr::ExportedSymbolSet &exported_symbols) {
377   return (LinkExportedSymbols(linked_module, exported_symbols.GetFunctions()) &&
378           LinkExportedSymbols(linked_module, exported_symbols.GetVars()));
379 }
380 
LinkExportedSymbols(repr::ModuleIR * linked_module)381 bool HeaderAbiLinker::LinkExportedSymbols(repr::ModuleIR *linked_module) {
382   if (shared_object_symbols_) {
383     return LinkExportedSymbols(linked_module, *shared_object_symbols_);
384   }
385 
386   if (version_script_symbols_) {
387     return LinkExportedSymbols(linked_module, *version_script_symbols_);
388   }
389 
390   return false;
391 }
392 
ReadExportedSymbols()393 bool HeaderAbiLinker::ReadExportedSymbols() {
394   if (so_file_.empty() && version_script_.empty()) {
395     llvm::errs() << "Either shared lib or version script must be specified.\n";
396     return false;
397   }
398 
399   if (!so_file_.empty()) {
400     if (!ReadExportedSymbolsFromSharedObjectFile()) {
401       llvm::errs() << "Failed to parse the shared library (.so file): "
402                    << so_file_ << "\n";
403       return false;
404     }
405   }
406 
407   if (!version_script_.empty()) {
408     if (!ReadExportedSymbolsFromVersionScript()) {
409       llvm::errs() << "Failed to parse the version script: " << version_script_
410                    << "\n";
411       return false;
412     }
413   }
414 
415   return true;
416 }
417 
ReadExportedSymbolsFromVersionScript()418 bool HeaderAbiLinker::ReadExportedSymbolsFromVersionScript() {
419   llvm::Optional<utils::ApiLevel> api_level = utils::ParseApiLevel(api_);
420   if (!api_level) {
421     llvm::errs() << "-api must be either \"current\" or an integer (e.g. 21)\n";
422     return false;
423   }
424 
425   std::ifstream stream(version_script_, std::ios_base::in);
426   if (!stream) {
427     llvm::errs() << "Failed to open version script file\n";
428     return false;
429   }
430 
431   repr::VersionScriptParser parser;
432   parser.SetArch(arch_);
433   parser.SetApiLevel(api_level.getValue());
434   for (auto &&version : excluded_symbol_versions_) {
435     parser.AddExcludedSymbolVersion(version);
436   }
437   for (auto &&tag : excluded_symbol_tags_) {
438     parser.AddExcludedSymbolTag(tag);
439   }
440 
441   version_script_symbols_ = parser.Parse(stream);
442   if (!version_script_symbols_) {
443     llvm::errs() << "Failed to parse version script file\n";
444     return false;
445   }
446 
447   return true;
448 }
449 
ReadExportedSymbolsFromSharedObjectFile()450 bool HeaderAbiLinker::ReadExportedSymbolsFromSharedObjectFile() {
451   std::unique_ptr<repr::SoFileParser> so_parser =
452       repr::SoFileParser::Create(so_file_);
453   if (!so_parser) {
454     return false;
455   }
456 
457   shared_object_symbols_ = so_parser->Parse();
458   if (!shared_object_symbols_) {
459     llvm::errs() << "Failed to parse shared object file\n";
460     return false;
461   }
462 
463   return true;
464 }
465 
main(int argc,const char ** argv)466 int main(int argc, const char **argv) {
467   HideIrrelevantCommandLineOptions(header_linker_category);
468   llvm::cl::ParseCommandLineOptions(argc, argv, "header-linker");
469 
470   if (so_file.empty() && version_script.empty()) {
471     llvm::errs() << "One of -so or -v needs to be specified\n";
472     return -1;
473   }
474 
475   if (no_filter) {
476     static_cast<std::vector<std::string> &>(exported_header_dirs).clear();
477   }
478 
479   HeaderAbiLinker Linker(dump_files, exported_header_dirs, version_script,
480                          so_file, linked_dump, arch, api,
481                          excluded_symbol_versions,
482                          excluded_symbol_tags);
483 
484   if (!Linker.LinkAndDump()) {
485     llvm::errs() << "Failed to link and dump elements\n";
486     return -1;
487   }
488 
489   return 0;
490 }
491