]> git.saurik.com Git - apple/ld64.git/blob - src/ld/parsers/archive_file.cpp
708f1fb4f8fe56d2cd43a87e214eb535b28d37c7
[apple/ld64.git] / src / ld / parsers / archive_file.cpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2005-2011 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 #include <stdint.h>
26 #include <math.h>
27 #include <unistd.h>
28 #include <sys/param.h>
29 #include <mach-o/ranlib.h>
30 #include <ar.h>
31
32 #include <vector>
33 #include <set>
34 #include <map>
35 #include <algorithm>
36 #include <unordered_map>
37
38 #include "MachOFileAbstraction.hpp"
39 #include "Architectures.hpp"
40
41 #include "macho_relocatable_file.h"
42 #include "lto_file.h"
43 #include "archive_file.h"
44
45
46 namespace archive {
47
48 typedef const struct ranlib* ConstRanLibPtr;
49
50 // forward reference
51 template <typename A> class File;
52
53
54 template <typename A>
55 class Parser
56 {
57 public:
58 typedef typename A::P P;
59
60 static bool validFile(const uint8_t* fileContent, uint64_t fileLength,
61 const mach_o::relocatable::ParserOptions& opts) {
62 return File<A>::validFile(fileContent, fileLength, opts); }
63 static File<A>* parse(const uint8_t* fileContent, uint64_t fileLength,
64 const char* path, time_t mTime,
65 ld::File::Ordinal ordinal, const ParserOptions& opts) {
66 return new File<A>(fileContent, fileLength, path, mTime,
67 ordinal, opts);
68 }
69
70 };
71
72 template <typename A>
73 class File : public ld::archive::File
74 {
75 public:
76 static bool validFile(const uint8_t* fileContent, uint64_t fileLength,
77 const mach_o::relocatable::ParserOptions& opts);
78 File(const uint8_t* fileContent, uint64_t fileLength,
79 const char* pth, time_t modTime,
80 ld::File::Ordinal ord, const ParserOptions& opts);
81 virtual ~File() {}
82
83 // overrides of ld::File
84 virtual bool forEachAtom(ld::File::AtomHandler&) const;
85 virtual bool justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const;
86 virtual uint32_t subFileCount() const { return _archiveFilelength/sizeof(ar_hdr); }
87
88 // overrides of ld::archive::File
89 virtual bool justInTimeDataOnlyforEachAtom(const char* name, ld::File::AtomHandler& handler) const;
90
91 private:
92 static bool validMachOFile(const uint8_t* fileContent, uint64_t fileLength,
93 const mach_o::relocatable::ParserOptions& opts);
94 static bool validLTOFile(const uint8_t* fileContent, uint64_t fileLength,
95 const mach_o::relocatable::ParserOptions& opts);
96 static cpu_type_t architecture();
97
98 class Entry : ar_hdr
99 {
100 public:
101 void getName(char *, int) const;
102 time_t modificationTime() const;
103 const uint8_t* content() const;
104 uint32_t contentSize() const;
105 const Entry* next() const;
106 private:
107 bool hasLongName() const;
108 unsigned int getLongNameSpace() const;
109
110 };
111
112 struct MemberState { ld::relocatable::File* file; const Entry *entry; bool logged; bool loaded; uint16_t index;};
113 bool loadMember(MemberState& state, ld::File::AtomHandler& handler, const char *format, ...) const;
114
115 typedef std::unordered_map<const char*, const struct ranlib*, ld::CStringHash, ld::CStringEquals> NameToEntryMap;
116
117 typedef typename A::P P;
118 typedef typename A::P::E E;
119
120 typedef std::map<const class Entry*, MemberState> MemberToStateMap;
121
122 const struct ranlib* ranlibHashSearch(const char* name) const;
123 MemberState& makeObjectFileForMember(const Entry* member) const;
124 bool memberHasObjCCategories(const Entry* member) const;
125 void dumpTableOfContents();
126 void buildHashTable();
127
128 const uint8_t* _archiveFileContent;
129 uint64_t _archiveFilelength;
130 const struct ranlib* _tableOfContents;
131 uint32_t _tableOfContentCount;
132 const char* _tableOfContentStrings;
133 mutable MemberToStateMap _instantiatedEntries;
134 NameToEntryMap _hashTable;
135 const bool _forceLoadAll;
136 const bool _forceLoadObjC;
137 const bool _forceLoadThis;
138 const bool _objc2ABI;
139 const bool _verboseLoad;
140 const bool _logAllFiles;
141 const mach_o::relocatable::ParserOptions _objOpts;
142 };
143
144
145 template <typename A>
146 bool File<A>::Entry::hasLongName() const
147 {
148 return ( strncmp(this->ar_name, AR_EFMT1, strlen(AR_EFMT1)) == 0 );
149 }
150
151 template <typename A>
152 unsigned int File<A>::Entry::getLongNameSpace() const
153 {
154 char* endptr;
155 long result = strtol(&this->ar_name[strlen(AR_EFMT1)], &endptr, 10);
156 return result;
157 }
158
159 template <typename A>
160 void File<A>::Entry::getName(char *buf, int bufsz) const
161 {
162 if ( this->hasLongName() ) {
163 int len = this->getLongNameSpace();
164 assert(bufsz >= len+1);
165 strncpy(buf, ((char*)this)+sizeof(ar_hdr), len);
166 buf[len] = '\0';
167 }
168 else {
169 assert(bufsz >= 16+1);
170 strncpy(buf, this->ar_name, 16);
171 buf[16] = '\0';
172 char* space = strchr(buf, ' ');
173 if ( space != NULL )
174 *space = '\0';
175 }
176 }
177
178 template <typename A>
179 time_t File<A>::Entry::modificationTime() const
180 {
181 char temp[14];
182 strncpy(temp, this->ar_date, 12);
183 temp[12] = '\0';
184 char* endptr;
185 return (time_t)strtol(temp, &endptr, 10);
186 }
187
188
189 template <typename A>
190 const uint8_t* File<A>::Entry::content() const
191 {
192 if ( this->hasLongName() )
193 return ((uint8_t*)this) + sizeof(ar_hdr) + this->getLongNameSpace();
194 else
195 return ((uint8_t*)this) + sizeof(ar_hdr);
196 }
197
198
199 template <typename A>
200 uint32_t File<A>::Entry::contentSize() const
201 {
202 char temp[12];
203 strncpy(temp, this->ar_size, 10);
204 temp[10] = '\0';
205 char* endptr;
206 long size = strtol(temp, &endptr, 10);
207 // long name is included in ar_size
208 if ( this->hasLongName() )
209 size -= this->getLongNameSpace();
210 return size;
211 }
212
213
214 template <typename A>
215 const class File<A>::Entry* File<A>::Entry::next() const
216 {
217 const uint8_t* p = this->content() + contentSize();
218 p = (const uint8_t*)(((uintptr_t)p+3) & (-4)); // 4-byte align
219 return (class File<A>::Entry*)p;
220 }
221
222
223 template <> cpu_type_t File<x86>::architecture() { return CPU_TYPE_I386; }
224 template <> cpu_type_t File<x86_64>::architecture() { return CPU_TYPE_X86_64; }
225 template <> cpu_type_t File<arm>::architecture() { return CPU_TYPE_ARM; }
226
227
228 template <typename A>
229 bool File<A>::validMachOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
230 {
231 return mach_o::relocatable::isObjectFile(fileContent, fileLength, opts);
232 }
233
234 template <typename A>
235 bool File<A>::validLTOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
236 {
237 return lto::isObjectFile(fileContent, fileLength, opts.architecture, opts.subType);
238 }
239
240
241
242 template <typename A>
243 bool File<A>::validFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
244 {
245 // must have valid archive header
246 if ( strncmp((const char*)fileContent, "!<arch>\n", 8) != 0 )
247 return false;
248
249 // peak at first .o file and verify it is correct architecture
250 const Entry* const start = (Entry*)&fileContent[8];
251 const Entry* const end = (Entry*)&fileContent[fileLength];
252 for (const Entry* p=start; p < end; p = p->next()) {
253 char memberName[256];
254 p->getName(memberName, sizeof(memberName));
255 // skip option table-of-content member
256 if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) )
257 continue;
258 // archive is valid if first .o file is valid
259 return (validMachOFile(p->content(), p->contentSize(), opts) || validLTOFile(p->content(), p->contentSize(), opts));
260 }
261 // empty archive
262 return true;
263 }
264
265
266 template <typename A>
267 File<A>::File(const uint8_t fileContent[], uint64_t fileLength, const char* pth, time_t modTime,
268 ld::File::Ordinal ord, const ParserOptions& opts)
269 : ld::archive::File(strdup(pth), modTime, ord),
270 _archiveFileContent(fileContent), _archiveFilelength(fileLength),
271 _tableOfContents(NULL), _tableOfContentCount(0), _tableOfContentStrings(NULL),
272 _forceLoadAll(opts.forceLoadAll), _forceLoadObjC(opts.forceLoadObjC),
273 _forceLoadThis(opts.forceLoadThisArchive), _objc2ABI(opts.objcABI2), _verboseLoad(opts.verboseLoad),
274 _logAllFiles(opts.logAllFiles), _objOpts(opts.objOpts)
275 {
276 if ( strncmp((const char*)fileContent, "!<arch>\n", 8) != 0 )
277 throw "not an archive";
278
279 if ( !_forceLoadAll ) {
280 const Entry* const firstMember = (Entry*)&_archiveFileContent[8];
281 char memberName[256];
282 firstMember->getName(memberName, sizeof(memberName));
283 if ( (strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0) ) {
284 const uint8_t* contents = firstMember->content();
285 uint32_t ranlibArrayLen = E::get32(*((uint32_t*)contents));
286 _tableOfContents = (const struct ranlib*)&contents[4];
287 _tableOfContentCount = ranlibArrayLen / sizeof(struct ranlib);
288 _tableOfContentStrings = (const char*)&contents[ranlibArrayLen+8];
289 if ( ((uint8_t*)(&_tableOfContents[_tableOfContentCount]) > &fileContent[fileLength])
290 || ((uint8_t*)_tableOfContentStrings > &fileContent[fileLength]) )
291 throw "malformed archive, perhaps wrong architecture";
292 this->buildHashTable();
293 }
294 else
295 throw "archive has no table of contents";
296 }
297 }
298
299 template <>
300 bool File<x86>::memberHasObjCCategories(const Entry* member) const
301 {
302 if ( _objc2ABI ) {
303 // i386 for iOS simulator uses ObjC2 which has no global symbol for categories
304 return mach_o::relocatable::hasObjC2Categories(member->content());
305 }
306 else {
307 // i386 uses ObjC1 ABI which has .objc_category* global symbols
308 // <rdar://problem/11342022> strip -S on i386 pulls out .objc_category_name symbols from static frameworks
309 return mach_o::relocatable::hasObjC1Categories(member->content());
310 }
311 }
312
313
314
315 template <typename A>
316 bool File<A>::memberHasObjCCategories(const Entry* member) const
317 {
318 // x86_64 and ARM use ObjC2 which has no global symbol for categories
319 return mach_o::relocatable::hasObjC2Categories(member->content());
320 }
321
322
323 template <typename A>
324 typename File<A>::MemberState& File<A>::makeObjectFileForMember(const Entry* member) const
325 {
326 uint16_t memberIndex = 0;
327 // in case member was instantiated earlier but not needed yet
328 typename MemberToStateMap::iterator pos = _instantiatedEntries.find(member);
329 if ( pos == _instantiatedEntries.end() ) {
330 // Have to find the index of this member
331 const Entry* start;
332 uint16_t index;
333 if (_instantiatedEntries.size() == 0) {
334 start = (Entry*)&_archiveFileContent[8];
335 index = 1;
336 } else {
337 MemberState &lastKnown = _instantiatedEntries.rbegin()->second;
338 start = lastKnown.entry->next();
339 index = lastKnown.index+1;
340 }
341 for (const Entry* p=start; p <= member; p = p->next(), index++) {
342 MemberState state = {NULL, p, false, false, index};
343 _instantiatedEntries[p] = state;
344 if (member == p) {
345 memberIndex = index;
346 }
347 }
348 } else {
349 MemberState& state = pos->second;
350 if (state.file)
351 return state;
352 memberIndex = state.index;
353 }
354 assert(memberIndex != 0);
355 char memberName[256];
356 member->getName(memberName, sizeof(memberName));
357 char memberPath[strlen(this->path()) + strlen(memberName)+4];
358 strcpy(memberPath, this->path());
359 strcat(memberPath, "(");
360 strcat(memberPath, memberName);
361 strcat(memberPath, ")");
362 //fprintf(stderr, "using %s from %s\n", memberName, this->path());
363 try {
364 // range check
365 if ( member > (Entry*)(_archiveFileContent+_archiveFilelength) )
366 throwf("corrupt archive, member starts past end of file");
367 if ( (member->content() + member->contentSize()) > (_archiveFileContent+_archiveFilelength) )
368 throwf("corrupt archive, member contents extends past end of file");
369 const char* mPath = strdup(memberPath);
370 // see if member is mach-o file
371 ld::File::Ordinal ordinal = this->ordinal().archiveOrdinalWithMemberIndex(memberIndex);
372 ld::relocatable::File* result = mach_o::relocatable::parse(member->content(), member->contentSize(),
373 mPath, member->modificationTime(),
374 ordinal, _objOpts);
375 if ( result != NULL ) {
376 MemberState state = {result, member, false, false, memberIndex};
377 _instantiatedEntries[member] = state;
378 return _instantiatedEntries[member];
379 }
380 // see if member is llvm bitcode file
381 result = lto::parse(member->content(), member->contentSize(),
382 mPath, member->modificationTime(), ordinal,
383 _objOpts.architecture, _objOpts.subType, _logAllFiles);
384 if ( result != NULL ) {
385 MemberState state = {result, member, false, false, memberIndex};
386 _instantiatedEntries[member] = state;
387 return _instantiatedEntries[member];
388 }
389
390 throwf("archive member '%s' with length %d is not mach-o or llvm bitcode", memberName, member->contentSize());
391 }
392 catch (const char* msg) {
393 throwf("in %s, %s", memberPath, msg);
394 }
395 }
396
397
398 template <typename A>
399 bool File<A>::loadMember(MemberState& state, ld::File::AtomHandler& handler, const char *format, ...) const
400 {
401 bool didSomething = false;
402 if (!state.loaded) {
403 if ( _verboseLoad && !state.logged ) {
404 va_list list;
405 va_start(list, format);
406 vprintf(format, list);
407 va_end(list);
408 state.logged = true;
409 }
410 state.loaded = true;
411 didSomething = state.file->forEachAtom(handler);
412 }
413 return didSomething;
414 }
415
416
417 template <typename A>
418 bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
419 {
420 bool didSome = false;
421 if ( _forceLoadAll || _forceLoadThis ) {
422 // call handler on all .o files in this archive
423 const Entry* const start = (Entry*)&_archiveFileContent[8];
424 const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength];
425 for (const Entry* p=start; p < end; p = p->next()) {
426 char memberName[256];
427 p->getName(memberName, sizeof(memberName));
428 if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) )
429 continue;
430 MemberState& state = this->makeObjectFileForMember(p);
431 didSome |= loadMember(state, handler, "%s forced load of %s(%s)\n", _forceLoadThis ? "-force_load" : "-all_load", this->path(), memberName);
432 }
433 }
434 else if ( _forceLoadObjC ) {
435 // call handler on all .o files in this archive containing objc classes
436 for(typename NameToEntryMap::const_iterator it = _hashTable.begin(); it != _hashTable.end(); ++it) {
437 if ( (strncmp(it->first, ".objc_c", 7) == 0) || (strncmp(it->first, "_OBJC_CLASS_$_", 14) == 0) ) {
438 const Entry* member = (Entry*)&_archiveFileContent[E::get32(it->second->ran_off)];
439 MemberState& state = this->makeObjectFileForMember(member);
440 char memberName[256];
441 member->getName(memberName, sizeof(memberName));
442 didSome |= loadMember(state, handler, "-ObjC forced load of %s(%s)\n", this->path(), memberName);
443 }
444 }
445 // ObjC2 has no symbols in .o files with categories but not classes, look deeper for those
446 const Entry* const start = (Entry*)&_archiveFileContent[8];
447 const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength];
448 for (const Entry* member=start; member < end; member = member->next()) {
449 char mname[256];
450 member->getName(mname, sizeof(mname));
451 // skip table-of-content member
452 if ( (member==start) && ((strcmp(mname, SYMDEF_SORTED) == 0) || (strcmp(mname, SYMDEF) == 0)) )
453 continue;
454 MemberState& state = this->makeObjectFileForMember(member);
455 // only look at files not already loaded
456 if ( ! state.loaded ) {
457 if ( this->memberHasObjCCategories(member) ) {
458 MemberState& state = this->makeObjectFileForMember(member);
459 char memberName[256];
460 member->getName(memberName, sizeof(memberName));
461 didSome |= loadMember(state, handler, "-ObjC forced load of %s(%s)\n", this->path(), memberName);
462 }
463 }
464 }
465 }
466 return didSome;
467 }
468
469 template <typename A>
470 bool File<A>::justInTimeforEachAtom(const char* name, ld::File::AtomHandler& handler) const
471 {
472 // in force load case, all members already loaded
473 if ( _forceLoadAll || _forceLoadThis )
474 return false;
475
476 // do a hash search of table of contents looking for requested symbol
477 const struct ranlib* result = ranlibHashSearch(name);
478 if ( result != NULL ) {
479 const Entry* member = (Entry*)&_archiveFileContent[E::get32(result->ran_off)];
480 MemberState& state = this->makeObjectFileForMember(member);
481 char memberName[256];
482 member->getName(memberName, sizeof(memberName));
483 return loadMember(state, handler, "%s forced load of %s(%s)\n", name, this->path(), memberName);
484 }
485 //fprintf(stderr, "%s NOT found in archive %s\n", name, this->path());
486 return false;
487 }
488
489 class CheckIsDataSymbolHandler : public ld::File::AtomHandler
490 {
491 public:
492 CheckIsDataSymbolHandler(const char* n) : _name(n), _isData(false) {}
493 virtual void doAtom(const class ld::Atom& atom) {
494 if ( strcmp(atom.name(), _name) == 0 ) {
495 if ( atom.section().type() != ld::Section::typeCode )
496 _isData = true;
497 }
498 }
499 virtual void doFile(const class ld::File&) {}
500 bool symbolIsDataDefinition() { return _isData; }
501
502 private:
503 const char* _name;
504 bool _isData;
505
506 };
507
508 template <typename A>
509 bool File<A>::justInTimeDataOnlyforEachAtom(const char* name, ld::File::AtomHandler& handler) const
510 {
511 // in force load case, all members already loaded
512 if ( _forceLoadAll || _forceLoadThis )
513 return false;
514
515 // do a hash search of table of contents looking for requested symbol
516 const struct ranlib* result = ranlibHashSearch(name);
517 if ( result != NULL ) {
518 const Entry* member = (Entry*)&_archiveFileContent[E::get32(result->ran_off)];
519 MemberState& state = this->makeObjectFileForMember(member);
520 // only call handler for each member once
521 if ( ! state.loaded ) {
522 CheckIsDataSymbolHandler checker(name);
523 state.file->forEachAtom(checker);
524 if ( checker.symbolIsDataDefinition() ) {
525 char memberName[256];
526 member->getName(memberName, sizeof(memberName));
527 return loadMember(state, handler, "%s forced load of %s(%s)\n", name, this->path(), memberName);
528 }
529 }
530 }
531 //fprintf(stderr, "%s NOT found in archive %s\n", name, this->path());
532 return false;
533 }
534
535
536 typedef const struct ranlib* ConstRanLibPtr;
537
538 template <typename A>
539 ConstRanLibPtr File<A>::ranlibHashSearch(const char* name) const
540 {
541 typename NameToEntryMap::const_iterator pos = _hashTable.find(name);
542 if ( pos != _hashTable.end() )
543 return pos->second;
544 else
545 return NULL;
546 }
547
548 template <typename A>
549 void File<A>::buildHashTable()
550 {
551 // walk through list backwards, adding/overwriting entries
552 // this assures that with duplicates those earliest in the list will be found
553 for (int i = _tableOfContentCount-1; i >= 0; --i) {
554 const struct ranlib* entry = &_tableOfContents[i];
555 const char* entryName = &_tableOfContentStrings[E::get32(entry->ran_un.ran_strx)];
556 if ( E::get32(entry->ran_off) > _archiveFilelength ) {
557 throwf("malformed archive TOC entry for %s, offset %d is beyond end of file %lld\n",
558 entryName, entry->ran_off, _archiveFilelength);
559 }
560
561 //const Entry* member = (Entry*)&_archiveFileContent[E::get32(entry->ran_off)];
562 //fprintf(stderr, "adding hash %d, %s -> %p\n", i, entryName, entry);
563 _hashTable[entryName] = entry;
564 }
565 }
566
567 template <typename A>
568 void File<A>::dumpTableOfContents()
569 {
570 for (unsigned int i=0; i < _tableOfContentCount; ++i) {
571 const struct ranlib* e = &_tableOfContents[i];
572 printf("%s in %s\n", &_tableOfContentStrings[E::get32(e->ran_un.ran_strx)], ((Entry*)&_archiveFileContent[E::get32(e->ran_off)])->name());
573 }
574 }
575
576
577 //
578 // main function used by linker to instantiate archive files
579 //
580 ld::archive::File* parse(const uint8_t* fileContent, uint64_t fileLength,
581 const char* path, time_t modTime, ld::File::Ordinal ordinal, const ParserOptions& opts)
582 {
583 switch ( opts.objOpts.architecture ) {
584 #if SUPPORT_ARCH_x86_64
585 case CPU_TYPE_X86_64:
586 if ( archive::Parser<x86_64>::validFile(fileContent, fileLength, opts.objOpts) )
587 return archive::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
588 break;
589 #endif
590 #if SUPPORT_ARCH_i386
591 case CPU_TYPE_I386:
592 if ( archive::Parser<x86>::validFile(fileContent, fileLength, opts.objOpts) )
593 return archive::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
594 break;
595 #endif
596 #if SUPPORT_ARCH_arm_any
597 case CPU_TYPE_ARM:
598 if ( archive::Parser<arm>::validFile(fileContent, fileLength, opts.objOpts) )
599 return archive::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
600 break;
601 #endif
602 }
603 return NULL;
604 }
605
606
607
608 }; // namespace archive
609
610