1 //===- GNUArchiveReader.cpp -----------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "mcld/LD/GNUArchiveReader.h"
10 
11 #include "mcld/InputTree.h"
12 #include "mcld/LinkerConfig.h"
13 #include "mcld/Module.h"
14 #include "mcld/ADT/SizeTraits.h"
15 #include "mcld/MC/Attribute.h"
16 #include "mcld/MC/Input.h"
17 #include "mcld/LD/ELFObjectReader.h"
18 #include "mcld/LD/ResolveInfo.h"
19 #include "mcld/Support/FileHandle.h"
20 #include "mcld/Support/FileSystem.h"
21 #include "mcld/Support/MemoryArea.h"
22 #include "mcld/Support/MsgHandling.h"
23 #include "mcld/Support/Path.h"
24 
25 #include <llvm/ADT/StringRef.h>
26 #include <llvm/Support/Host.h>
27 
28 #include <cstdlib>
29 #include <cstring>
30 
31 namespace mcld {
32 
GNUArchiveReader(Module & pModule,ELFObjectReader & pELFObjectReader)33 GNUArchiveReader::GNUArchiveReader(Module& pModule,
34                                    ELFObjectReader& pELFObjectReader)
35     : m_Module(pModule), m_ELFObjectReader(pELFObjectReader) {
36 }
37 
~GNUArchiveReader()38 GNUArchiveReader::~GNUArchiveReader() {
39 }
40 
41 /// isMyFormat
isMyFormat(Input & pInput,bool & pContinue) const42 bool GNUArchiveReader::isMyFormat(Input& pInput, bool& pContinue) const {
43   assert(pInput.hasMemArea());
44   if (pInput.memArea()->size() < Archive::MAGIC_LEN)
45     return false;
46 
47   llvm::StringRef region =
48       pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
49   const char* str = region.begin();
50 
51   bool result = false;
52   assert(str != NULL);
53   pContinue = true;
54   if (isArchive(str) || isThinArchive(str))
55     result = true;
56 
57   return result;
58 }
59 
60 /// isArchive
isArchive(const char * pStr) const61 bool GNUArchiveReader::isArchive(const char* pStr) const {
62   return (memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN) == 0);
63 }
64 
65 /// isThinArchive
isThinArchive(const char * pStr) const66 bool GNUArchiveReader::isThinArchive(const char* pStr) const {
67   return (memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN) == 0);
68 }
69 
70 /// isThinArchive
isThinArchive(Input & pInput) const71 bool GNUArchiveReader::isThinArchive(Input& pInput) const {
72   assert(pInput.hasMemArea());
73   llvm::StringRef region =
74       pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
75   const char* str = region.begin();
76 
77   bool result = false;
78   assert(str != NULL);
79   if (isThinArchive(str))
80     result = true;
81 
82   return result;
83 }
84 
readArchive(const LinkerConfig & pConfig,Archive & pArchive)85 bool GNUArchiveReader::readArchive(const LinkerConfig& pConfig,
86                                    Archive& pArchive) {
87   // bypass the empty archive
88   if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->size())
89     return true;
90 
91   if (pArchive.getARFile().attribute()->isWholeArchive())
92     return includeAllMembers(pConfig, pArchive);
93 
94   // if this is the first time read this archive, setup symtab and strtab
95   if (pArchive.getSymbolTable().empty()) {
96     // read the symtab of the archive
97     readSymbolTable(pArchive);
98 
99     // read the strtab of the archive
100     readStringTable(pArchive);
101 
102     // add root archive to ArchiveMemberMap
103     pArchive.addArchiveMember(pArchive.getARFile().name(),
104                               pArchive.inputs().root(),
105                               &InputTree::Downward);
106   }
107 
108   // include the needed members in the archive and build up the input tree
109   bool willSymResolved;
110   do {
111     willSymResolved = false;
112     for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
113       // bypass if we already decided to include this symbol or not
114       if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
115         continue;
116 
117       // bypass if another symbol with the same object file offset is included
118       if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
119         pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
120         continue;
121       }
122 
123       // check if we should include this defined symbol
124       Archive::Symbol::Status status =
125           shouldIncludeSymbol(pArchive.getSymbolName(idx));
126       if (Archive::Symbol::Unknown != status)
127         pArchive.setSymbolStatus(idx, status);
128 
129       if (Archive::Symbol::Include == status) {
130         // include the object member from the given offset
131         includeMember(pConfig, pArchive, pArchive.getObjFileOffset(idx));
132         willSymResolved = true;
133       }  // end of if
134     }    // end of for
135   } while (willSymResolved);
136 
137   return true;
138 }
139 
140 /// readMemberHeader - read the header of a member in a archive file and then
141 /// return the corresponding archive member (it may be an input object or
142 /// another archive)
143 /// @param pArchiveRoot  - the archive root that holds the strtab (extended
144 ///                        name table)
145 /// @param pArchiveFile  - the archive that contains the needed object
146 /// @param pFileOffset   - file offset of the member header in the archive
147 /// @param pNestedOffset - used when we find a nested archive
148 /// @param pMemberSize   - the file size of this member
readMemberHeader(Archive & pArchiveRoot,Input & pArchiveFile,uint32_t pFileOffset,uint32_t & pNestedOffset,size_t & pMemberSize)149 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
150                                           Input& pArchiveFile,
151                                           uint32_t pFileOffset,
152                                           uint32_t& pNestedOffset,
153                                           size_t& pMemberSize) {
154   assert(pArchiveFile.hasMemArea());
155 
156   llvm::StringRef header_region = pArchiveFile.memArea()->request(
157       (pArchiveFile.fileOffset() + pFileOffset), sizeof(Archive::MemberHeader));
158   const Archive::MemberHeader* header =
159       reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
160 
161   assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) ==
162          0);
163 
164   pMemberSize = atoi(header->size);
165 
166   // parse the member name and nested offset if any
167   std::string member_name;
168   llvm::StringRef name_field(header->name, sizeof(header->name));
169   if (header->name[0] != '/') {
170     // this is an object file in an archive
171     size_t pos = name_field.find_first_of('/');
172     member_name.assign(name_field.substr(0, pos).str());
173   } else {
174     // this is an object/archive file in a thin archive
175     size_t begin = 1;
176     size_t end = name_field.find_first_of(" :");
177     uint32_t name_offset = 0;
178     // parse the name offset
179     name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
180 
181     if (name_field[end] == ':') {
182       // there is a nested offset
183       begin = end + 1;
184       end = name_field.find_first_of(' ', begin);
185       name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
186     }
187 
188     // get the member name from the extended name table
189     assert(pArchiveRoot.hasStrTable());
190     begin = name_offset;
191     end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
192     member_name.assign(
193         pArchiveRoot.getStrTable().substr(begin, end - begin - 1));
194   }
195 
196   Input* member = NULL;
197   bool isThinAR = isThinArchive(pArchiveFile);
198   if (!isThinAR) {
199     // this is an object file in an archive
200     member = pArchiveRoot.getMemberFile(
201         pArchiveFile,
202         isThinAR,
203         member_name,
204         pArchiveFile.path(),
205         (pFileOffset + sizeof(Archive::MemberHeader)));
206   } else {
207     // this is a member in a thin archive
208     // try to find if this is a archive already in the map first
209     Archive::ArchiveMember* ar_member =
210         pArchiveRoot.getArchiveMember(member_name);
211     if (ar_member != NULL) {
212       return ar_member->file;
213     }
214 
215     // get nested file path, the nested file's member name is the relative
216     // path to the archive containing it.
217     sys::fs::Path input_path(pArchiveFile.path().parent_path());
218     if (!input_path.empty())
219       input_path.append(sys::fs::Path(member_name));
220     else
221       input_path.assign(member_name);
222 
223     member = pArchiveRoot.getMemberFile(
224         pArchiveFile, isThinAR, member_name, input_path);
225   }
226 
227   return member;
228 }
229 
230 template <size_t SIZE>
readSymbolTableEntries(Archive & pArchive,llvm::StringRef pMemRegion)231 static void readSymbolTableEntries(Archive& pArchive,
232                                    llvm::StringRef pMemRegion) {
233   typedef typename SizeTraits<SIZE>::Offset Offset;
234 
235   const Offset* data = reinterpret_cast<const Offset*>(pMemRegion.begin());
236 
237   // read the number of symbols
238   Offset number = 0;
239   if (llvm::sys::IsLittleEndianHost)
240     number = mcld::bswap<SIZE>(*data);
241   else
242     number = *data;
243 
244   // set up the pointers for file offset and name offset
245   ++data;
246   const char* name = reinterpret_cast<const char*>(data + number);
247 
248   // add the archive symbols
249   for (Offset i = 0; i < number; ++i) {
250     if (llvm::sys::IsLittleEndianHost)
251       pArchive.addSymbol(name, mcld::bswap<SIZE>(*data));
252     else
253       pArchive.addSymbol(name, *data);
254     name += strlen(name) + 1;
255     ++data;
256   }
257 }
258 
259 /// readSymbolTable - read the archive symbol map (armap)
readSymbolTable(Archive & pArchive)260 bool GNUArchiveReader::readSymbolTable(Archive& pArchive) {
261   assert(pArchive.getARFile().hasMemArea());
262   MemoryArea* memory_area = pArchive.getARFile().memArea();
263 
264   llvm::StringRef header_region = memory_area->request(
265       (pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN),
266       sizeof(Archive::MemberHeader));
267   const Archive::MemberHeader* header =
268       reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
269   assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) ==
270          0);
271 
272   int symtab_size = atoi(header->size);
273   pArchive.setSymTabSize(symtab_size);
274 
275   if (!pArchive.getARFile().attribute()->isWholeArchive()) {
276     llvm::StringRef symtab_region = memory_area->request(
277         (pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN +
278          sizeof(Archive::MemberHeader)),
279         symtab_size);
280 
281     if (strncmp(header->name,
282                 Archive::SVR4_SYMTAB_NAME,
283                 strlen(Archive::SVR4_SYMTAB_NAME)) == 0)
284       readSymbolTableEntries<32>(pArchive, symtab_region);
285     else if (strncmp(header->name,
286                      Archive::IRIX6_SYMTAB_NAME,
287                      strlen(Archive::IRIX6_SYMTAB_NAME)) == 0)
288       readSymbolTableEntries<64>(pArchive, symtab_region);
289     else
290       unreachable(diag::err_unsupported_archive);
291   }
292   return true;
293 }
294 
295 /// readStringTable - read the strtab for long file name of the archive
readStringTable(Archive & pArchive)296 bool GNUArchiveReader::readStringTable(Archive& pArchive) {
297   size_t offset = Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) +
298                   pArchive.getSymTabSize();
299 
300   if ((offset & 1) != 0x0)
301     ++offset;
302 
303   assert(pArchive.getARFile().hasMemArea());
304   MemoryArea* memory_area = pArchive.getARFile().memArea();
305 
306   llvm::StringRef header_region =
307       memory_area->request((pArchive.getARFile().fileOffset() + offset),
308                            sizeof(Archive::MemberHeader));
309   const Archive::MemberHeader* header =
310       reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
311 
312   assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) ==
313          0);
314 
315   if (memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name)) == 0) {
316     // read the extended name table
317     int strtab_size = atoi(header->size);
318     llvm::StringRef strtab_region =
319         memory_area->request((pArchive.getARFile().fileOffset() + offset +
320                               sizeof(Archive::MemberHeader)),
321                              strtab_size);
322     const char* strtab = strtab_region.begin();
323     pArchive.getStrTable().assign(strtab, strtab_size);
324   }
325   return true;
326 }
327 
328 /// shouldIncludeStatus - given a sym name from armap and check if including
329 /// the corresponding archive member, and then return the decision
shouldIncludeSymbol(const llvm::StringRef & pSymName) const330 enum Archive::Symbol::Status GNUArchiveReader::shouldIncludeSymbol(
331     const llvm::StringRef& pSymName) const {
332   // TODO: handle symbol version issue and user defined symbols
333   const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
334   if (info != NULL) {
335     if (!info->isUndef())
336       return Archive::Symbol::Exclude;
337     if (info->isWeak())
338       return Archive::Symbol::Unknown;
339     return Archive::Symbol::Include;
340   }
341   return Archive::Symbol::Unknown;
342 }
343 
344 /// includeMember - include the object member in the given file offset, and
345 /// return the size of the object
346 /// @param pConfig - LinkerConfig
347 /// @param pArchiveRoot - the archive root
348 /// @param pFileOffset  - file offset of the member header in the archive
includeMember(const LinkerConfig & pConfig,Archive & pArchive,uint32_t pFileOffset)349 size_t GNUArchiveReader::includeMember(const LinkerConfig& pConfig,
350                                        Archive& pArchive,
351                                        uint32_t pFileOffset) {
352   Input* cur_archive = &(pArchive.getARFile());
353   Input* member = NULL;
354   uint32_t file_offset = pFileOffset;
355   size_t size = 0;
356   do {
357     uint32_t nested_offset = 0;
358     // use the file offset in current archive to find out the member we
359     // want to include
360     member = readMemberHeader(
361         pArchive, *cur_archive, file_offset, nested_offset, size);
362     assert(member != NULL);
363     // bypass if we get an archive that is already in the map
364     if (Input::Archive == member->type()) {
365       cur_archive = member;
366       file_offset = nested_offset;
367       continue;
368     }
369 
370     // insert a node into the subtree of current archive.
371     Archive::ArchiveMember* parent =
372         pArchive.getArchiveMember(cur_archive->name());
373 
374     assert(parent != NULL);
375     pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
376 
377     // move the iterator to new created node, and also adjust the
378     // direction to Afterward for next insertion in this subtree
379     parent->move->move(parent->lastPos);
380     parent->move = &InputTree::Afterward;
381     bool doContinue = false;
382 
383     if (m_ELFObjectReader.isMyFormat(*member, doContinue)) {
384       member->setType(Input::Object);
385       // Set this object as no export if the archive is in the exclude libs.
386       if (pArchive.getARFile().noExport()) {
387         member->setNoExport();
388       }
389       pArchive.addObjectMember(pFileOffset, parent->lastPos);
390       m_ELFObjectReader.readHeader(*member);
391       m_ELFObjectReader.readSections(*member);
392       m_ELFObjectReader.readSymbols(*member);
393       m_Module.getObjectList().push_back(member);
394     } else if (doContinue && isMyFormat(*member, doContinue)) {
395       member->setType(Input::Archive);
396       // when adding a new archive node, set the iterator to archive
397       // itself, and set the direction to Downward
398       pArchive.addArchiveMember(
399           member->name(), parent->lastPos, &InputTree::Downward);
400       cur_archive = member;
401       file_offset = nested_offset;
402     } else {
403       warning(diag::warn_unrecognized_input_file)
404           << member->path() << pConfig.targets().triple().str();
405     }
406   } while (Input::Object != member->type());
407   return size;
408 }
409 
410 /// includeAllMembers - include all object members. This is called if
411 /// --whole-archive is the attribute for this archive file.
includeAllMembers(const LinkerConfig & pConfig,Archive & pArchive)412 bool GNUArchiveReader::includeAllMembers(const LinkerConfig& pConfig,
413                                          Archive& pArchive) {
414   // read the symtab of the archive
415   readSymbolTable(pArchive);
416 
417   // read the strtab of the archive
418   readStringTable(pArchive);
419 
420   // add root archive to ArchiveMemberMap
421   pArchive.addArchiveMember(pArchive.getARFile().name(),
422                             pArchive.inputs().root(),
423                             &InputTree::Downward);
424 
425   bool isThinAR = isThinArchive(pArchive.getARFile());
426   uint32_t begin_offset = pArchive.getARFile().fileOffset() +
427                           Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) +
428                           pArchive.getSymTabSize();
429   if (pArchive.hasStrTable()) {
430     if ((begin_offset & 1) != 0x0)
431       ++begin_offset;
432     begin_offset +=
433         sizeof(Archive::MemberHeader) + pArchive.getStrTable().size();
434   }
435   uint32_t end_offset = pArchive.getARFile().memArea()->size();
436   for (uint32_t offset = begin_offset; offset < end_offset;
437        offset += sizeof(Archive::MemberHeader)) {
438     size_t size = includeMember(pConfig, pArchive, offset);
439 
440     if (!isThinAR) {
441       offset += size;
442     }
443 
444     if ((offset & 1) != 0x0)
445       ++offset;
446   }
447   return true;
448 }
449 
450 }  // namespace mcld
451