1 //===- GNUArchiveReader.cpp -----------------------------------------------===//
2 //
3 // The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "mcld/LD/GNUArchiveReader.h"
10
11 #include "mcld/InputTree.h"
12 #include "mcld/LinkerConfig.h"
13 #include "mcld/Module.h"
14 #include "mcld/ADT/SizeTraits.h"
15 #include "mcld/MC/Attribute.h"
16 #include "mcld/MC/Input.h"
17 #include "mcld/LD/ELFObjectReader.h"
18 #include "mcld/LD/ResolveInfo.h"
19 #include "mcld/Support/FileHandle.h"
20 #include "mcld/Support/FileSystem.h"
21 #include "mcld/Support/MemoryArea.h"
22 #include "mcld/Support/MsgHandling.h"
23 #include "mcld/Support/Path.h"
24
25 #include <llvm/ADT/StringRef.h>
26 #include <llvm/Support/Host.h>
27
28 #include <cstdlib>
29 #include <cstring>
30
31 namespace mcld {
32
GNUArchiveReader(Module & pModule,ELFObjectReader & pELFObjectReader)33 GNUArchiveReader::GNUArchiveReader(Module& pModule,
34 ELFObjectReader& pELFObjectReader)
35 : m_Module(pModule), m_ELFObjectReader(pELFObjectReader) {
36 }
37
~GNUArchiveReader()38 GNUArchiveReader::~GNUArchiveReader() {
39 }
40
41 /// isMyFormat
isMyFormat(Input & pInput,bool & pContinue) const42 bool GNUArchiveReader::isMyFormat(Input& pInput, bool& pContinue) const {
43 assert(pInput.hasMemArea());
44 if (pInput.memArea()->size() < Archive::MAGIC_LEN)
45 return false;
46
47 llvm::StringRef region =
48 pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
49 const char* str = region.begin();
50
51 bool result = false;
52 assert(str != NULL);
53 pContinue = true;
54 if (isArchive(str) || isThinArchive(str))
55 result = true;
56
57 return result;
58 }
59
60 /// isArchive
isArchive(const char * pStr) const61 bool GNUArchiveReader::isArchive(const char* pStr) const {
62 return (memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN) == 0);
63 }
64
65 /// isThinArchive
isThinArchive(const char * pStr) const66 bool GNUArchiveReader::isThinArchive(const char* pStr) const {
67 return (memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN) == 0);
68 }
69
70 /// isThinArchive
isThinArchive(Input & pInput) const71 bool GNUArchiveReader::isThinArchive(Input& pInput) const {
72 assert(pInput.hasMemArea());
73 llvm::StringRef region =
74 pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
75 const char* str = region.begin();
76
77 bool result = false;
78 assert(str != NULL);
79 if (isThinArchive(str))
80 result = true;
81
82 return result;
83 }
84
readArchive(const LinkerConfig & pConfig,Archive & pArchive)85 bool GNUArchiveReader::readArchive(const LinkerConfig& pConfig,
86 Archive& pArchive) {
87 // bypass the empty archive
88 if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->size())
89 return true;
90
91 if (pArchive.getARFile().attribute()->isWholeArchive())
92 return includeAllMembers(pConfig, pArchive);
93
94 // if this is the first time read this archive, setup symtab and strtab
95 if (pArchive.getSymbolTable().empty()) {
96 // read the symtab of the archive
97 readSymbolTable(pArchive);
98
99 // read the strtab of the archive
100 readStringTable(pArchive);
101
102 // add root archive to ArchiveMemberMap
103 pArchive.addArchiveMember(pArchive.getARFile().name(),
104 pArchive.inputs().root(),
105 &InputTree::Downward);
106 }
107
108 // include the needed members in the archive and build up the input tree
109 bool willSymResolved;
110 do {
111 willSymResolved = false;
112 for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
113 // bypass if we already decided to include this symbol or not
114 if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
115 continue;
116
117 // bypass if another symbol with the same object file offset is included
118 if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
119 pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
120 continue;
121 }
122
123 // check if we should include this defined symbol
124 Archive::Symbol::Status status =
125 shouldIncludeSymbol(pArchive.getSymbolName(idx));
126 if (Archive::Symbol::Unknown != status)
127 pArchive.setSymbolStatus(idx, status);
128
129 if (Archive::Symbol::Include == status) {
130 // include the object member from the given offset
131 includeMember(pConfig, pArchive, pArchive.getObjFileOffset(idx));
132 willSymResolved = true;
133 } // end of if
134 } // end of for
135 } while (willSymResolved);
136
137 return true;
138 }
139
140 /// readMemberHeader - read the header of a member in a archive file and then
141 /// return the corresponding archive member (it may be an input object or
142 /// another archive)
143 /// @param pArchiveRoot - the archive root that holds the strtab (extended
144 /// name table)
145 /// @param pArchiveFile - the archive that contains the needed object
146 /// @param pFileOffset - file offset of the member header in the archive
147 /// @param pNestedOffset - used when we find a nested archive
148 /// @param pMemberSize - the file size of this member
readMemberHeader(Archive & pArchiveRoot,Input & pArchiveFile,uint32_t pFileOffset,uint32_t & pNestedOffset,size_t & pMemberSize)149 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
150 Input& pArchiveFile,
151 uint32_t pFileOffset,
152 uint32_t& pNestedOffset,
153 size_t& pMemberSize) {
154 assert(pArchiveFile.hasMemArea());
155
156 llvm::StringRef header_region = pArchiveFile.memArea()->request(
157 (pArchiveFile.fileOffset() + pFileOffset), sizeof(Archive::MemberHeader));
158 const Archive::MemberHeader* header =
159 reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
160
161 assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) ==
162 0);
163
164 pMemberSize = atoi(header->size);
165
166 // parse the member name and nested offset if any
167 std::string member_name;
168 llvm::StringRef name_field(header->name, sizeof(header->name));
169 if (header->name[0] != '/') {
170 // this is an object file in an archive
171 size_t pos = name_field.find_first_of('/');
172 member_name.assign(name_field.substr(0, pos).str());
173 } else {
174 // this is an object/archive file in a thin archive
175 size_t begin = 1;
176 size_t end = name_field.find_first_of(" :");
177 uint32_t name_offset = 0;
178 // parse the name offset
179 name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
180
181 if (name_field[end] == ':') {
182 // there is a nested offset
183 begin = end + 1;
184 end = name_field.find_first_of(' ', begin);
185 name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
186 }
187
188 // get the member name from the extended name table
189 assert(pArchiveRoot.hasStrTable());
190 begin = name_offset;
191 end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
192 member_name.assign(
193 pArchiveRoot.getStrTable().substr(begin, end - begin - 1));
194 }
195
196 Input* member = NULL;
197 bool isThinAR = isThinArchive(pArchiveFile);
198 if (!isThinAR) {
199 // this is an object file in an archive
200 member = pArchiveRoot.getMemberFile(
201 pArchiveFile,
202 isThinAR,
203 member_name,
204 pArchiveFile.path(),
205 (pFileOffset + sizeof(Archive::MemberHeader)));
206 } else {
207 // this is a member in a thin archive
208 // try to find if this is a archive already in the map first
209 Archive::ArchiveMember* ar_member =
210 pArchiveRoot.getArchiveMember(member_name);
211 if (ar_member != NULL) {
212 return ar_member->file;
213 }
214
215 // get nested file path, the nested file's member name is the relative
216 // path to the archive containing it.
217 sys::fs::Path input_path(pArchiveFile.path().parent_path());
218 if (!input_path.empty())
219 input_path.append(sys::fs::Path(member_name));
220 else
221 input_path.assign(member_name);
222
223 member = pArchiveRoot.getMemberFile(
224 pArchiveFile, isThinAR, member_name, input_path);
225 }
226
227 return member;
228 }
229
230 template <size_t SIZE>
readSymbolTableEntries(Archive & pArchive,llvm::StringRef pMemRegion)231 static void readSymbolTableEntries(Archive& pArchive,
232 llvm::StringRef pMemRegion) {
233 typedef typename SizeTraits<SIZE>::Offset Offset;
234
235 const Offset* data = reinterpret_cast<const Offset*>(pMemRegion.begin());
236
237 // read the number of symbols
238 Offset number = 0;
239 if (llvm::sys::IsLittleEndianHost)
240 number = mcld::bswap<SIZE>(*data);
241 else
242 number = *data;
243
244 // set up the pointers for file offset and name offset
245 ++data;
246 const char* name = reinterpret_cast<const char*>(data + number);
247
248 // add the archive symbols
249 for (Offset i = 0; i < number; ++i) {
250 if (llvm::sys::IsLittleEndianHost)
251 pArchive.addSymbol(name, mcld::bswap<SIZE>(*data));
252 else
253 pArchive.addSymbol(name, *data);
254 name += strlen(name) + 1;
255 ++data;
256 }
257 }
258
259 /// readSymbolTable - read the archive symbol map (armap)
readSymbolTable(Archive & pArchive)260 bool GNUArchiveReader::readSymbolTable(Archive& pArchive) {
261 assert(pArchive.getARFile().hasMemArea());
262 MemoryArea* memory_area = pArchive.getARFile().memArea();
263
264 llvm::StringRef header_region = memory_area->request(
265 (pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN),
266 sizeof(Archive::MemberHeader));
267 const Archive::MemberHeader* header =
268 reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
269 assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) ==
270 0);
271
272 int symtab_size = atoi(header->size);
273 pArchive.setSymTabSize(symtab_size);
274
275 if (!pArchive.getARFile().attribute()->isWholeArchive()) {
276 llvm::StringRef symtab_region = memory_area->request(
277 (pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN +
278 sizeof(Archive::MemberHeader)),
279 symtab_size);
280
281 if (strncmp(header->name,
282 Archive::SVR4_SYMTAB_NAME,
283 strlen(Archive::SVR4_SYMTAB_NAME)) == 0)
284 readSymbolTableEntries<32>(pArchive, symtab_region);
285 else if (strncmp(header->name,
286 Archive::IRIX6_SYMTAB_NAME,
287 strlen(Archive::IRIX6_SYMTAB_NAME)) == 0)
288 readSymbolTableEntries<64>(pArchive, symtab_region);
289 else
290 unreachable(diag::err_unsupported_archive);
291 }
292 return true;
293 }
294
295 /// readStringTable - read the strtab for long file name of the archive
readStringTable(Archive & pArchive)296 bool GNUArchiveReader::readStringTable(Archive& pArchive) {
297 size_t offset = Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) +
298 pArchive.getSymTabSize();
299
300 if ((offset & 1) != 0x0)
301 ++offset;
302
303 assert(pArchive.getARFile().hasMemArea());
304 MemoryArea* memory_area = pArchive.getARFile().memArea();
305
306 llvm::StringRef header_region =
307 memory_area->request((pArchive.getARFile().fileOffset() + offset),
308 sizeof(Archive::MemberHeader));
309 const Archive::MemberHeader* header =
310 reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
311
312 assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) ==
313 0);
314
315 if (memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name)) == 0) {
316 // read the extended name table
317 int strtab_size = atoi(header->size);
318 llvm::StringRef strtab_region =
319 memory_area->request((pArchive.getARFile().fileOffset() + offset +
320 sizeof(Archive::MemberHeader)),
321 strtab_size);
322 const char* strtab = strtab_region.begin();
323 pArchive.getStrTable().assign(strtab, strtab_size);
324 }
325 return true;
326 }
327
328 /// shouldIncludeStatus - given a sym name from armap and check if including
329 /// the corresponding archive member, and then return the decision
shouldIncludeSymbol(const llvm::StringRef & pSymName) const330 enum Archive::Symbol::Status GNUArchiveReader::shouldIncludeSymbol(
331 const llvm::StringRef& pSymName) const {
332 // TODO: handle symbol version issue and user defined symbols
333 const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
334 if (info != NULL) {
335 if (!info->isUndef())
336 return Archive::Symbol::Exclude;
337 if (info->isWeak())
338 return Archive::Symbol::Unknown;
339 return Archive::Symbol::Include;
340 }
341 return Archive::Symbol::Unknown;
342 }
343
344 /// includeMember - include the object member in the given file offset, and
345 /// return the size of the object
346 /// @param pConfig - LinkerConfig
347 /// @param pArchiveRoot - the archive root
348 /// @param pFileOffset - file offset of the member header in the archive
includeMember(const LinkerConfig & pConfig,Archive & pArchive,uint32_t pFileOffset)349 size_t GNUArchiveReader::includeMember(const LinkerConfig& pConfig,
350 Archive& pArchive,
351 uint32_t pFileOffset) {
352 Input* cur_archive = &(pArchive.getARFile());
353 Input* member = NULL;
354 uint32_t file_offset = pFileOffset;
355 size_t size = 0;
356 do {
357 uint32_t nested_offset = 0;
358 // use the file offset in current archive to find out the member we
359 // want to include
360 member = readMemberHeader(
361 pArchive, *cur_archive, file_offset, nested_offset, size);
362 assert(member != NULL);
363 // bypass if we get an archive that is already in the map
364 if (Input::Archive == member->type()) {
365 cur_archive = member;
366 file_offset = nested_offset;
367 continue;
368 }
369
370 // insert a node into the subtree of current archive.
371 Archive::ArchiveMember* parent =
372 pArchive.getArchiveMember(cur_archive->name());
373
374 assert(parent != NULL);
375 pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
376
377 // move the iterator to new created node, and also adjust the
378 // direction to Afterward for next insertion in this subtree
379 parent->move->move(parent->lastPos);
380 parent->move = &InputTree::Afterward;
381 bool doContinue = false;
382
383 if (m_ELFObjectReader.isMyFormat(*member, doContinue)) {
384 member->setType(Input::Object);
385 // Set this object as no export if the archive is in the exclude libs.
386 if (pArchive.getARFile().noExport()) {
387 member->setNoExport();
388 }
389 pArchive.addObjectMember(pFileOffset, parent->lastPos);
390 m_ELFObjectReader.readHeader(*member);
391 m_ELFObjectReader.readSections(*member);
392 m_ELFObjectReader.readSymbols(*member);
393 m_Module.getObjectList().push_back(member);
394 } else if (doContinue && isMyFormat(*member, doContinue)) {
395 member->setType(Input::Archive);
396 // when adding a new archive node, set the iterator to archive
397 // itself, and set the direction to Downward
398 pArchive.addArchiveMember(
399 member->name(), parent->lastPos, &InputTree::Downward);
400 cur_archive = member;
401 file_offset = nested_offset;
402 } else {
403 warning(diag::warn_unrecognized_input_file)
404 << member->path() << pConfig.targets().triple().str();
405 }
406 } while (Input::Object != member->type());
407 return size;
408 }
409
410 /// includeAllMembers - include all object members. This is called if
411 /// --whole-archive is the attribute for this archive file.
includeAllMembers(const LinkerConfig & pConfig,Archive & pArchive)412 bool GNUArchiveReader::includeAllMembers(const LinkerConfig& pConfig,
413 Archive& pArchive) {
414 // read the symtab of the archive
415 readSymbolTable(pArchive);
416
417 // read the strtab of the archive
418 readStringTable(pArchive);
419
420 // add root archive to ArchiveMemberMap
421 pArchive.addArchiveMember(pArchive.getARFile().name(),
422 pArchive.inputs().root(),
423 &InputTree::Downward);
424
425 bool isThinAR = isThinArchive(pArchive.getARFile());
426 uint32_t begin_offset = pArchive.getARFile().fileOffset() +
427 Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) +
428 pArchive.getSymTabSize();
429 if (pArchive.hasStrTable()) {
430 if ((begin_offset & 1) != 0x0)
431 ++begin_offset;
432 begin_offset +=
433 sizeof(Archive::MemberHeader) + pArchive.getStrTable().size();
434 }
435 uint32_t end_offset = pArchive.getARFile().memArea()->size();
436 for (uint32_t offset = begin_offset; offset < end_offset;
437 offset += sizeof(Archive::MemberHeader)) {
438 size_t size = includeMember(pConfig, pArchive, offset);
439
440 if (!isThinAR) {
441 offset += size;
442 }
443
444 if ((offset & 1) != 0x0)
445 ++offset;
446 }
447 return true;
448 }
449
450 } // namespace mcld
451