]> git.saurik.com Git - apple/ld64.git/blob - src/ld/parsers/archive_file.cpp
66f9432af5f203c01d4df8bd2575b02ee0384e08
[apple/ld64.git] / src / ld / parsers / archive_file.cpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2005-2011 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 #include <stdint.h>
26 #include <math.h>
27 #include <unistd.h>
28 #include <sys/param.h>
29 #include <mach-o/ranlib.h>
30 #include <ar.h>
31
32 #include <vector>
33 #include <set>
34 #include <map>
35 #include <algorithm>
36 #include <ext/hash_map>
37
38 #include "MachOFileAbstraction.hpp"
39 #include "Architectures.hpp"
40
41 #include "macho_relocatable_file.h"
42 #include "lto_file.h"
43 #include "archive_file.h"
44
45
46 namespace archive {
47
48 typedef const struct ranlib* ConstRanLibPtr;
49
50 // forward reference
51 template <typename A> class File;
52
53
54 template <typename A>
55 class Parser
56 {
57 public:
58 typedef typename A::P P;
59
60 static bool validFile(const uint8_t* fileContent, uint64_t fileLength,
61 const mach_o::relocatable::ParserOptions& opts) {
62 return File<A>::validFile(fileContent, fileLength, opts); }
63 static File<A>* parse(const uint8_t* fileContent, uint64_t fileLength,
64 const char* path, time_t mTime,
65 uint32_t ordinal, const ParserOptions& opts) {
66 return new File<A>(fileContent, fileLength, path, mTime,
67 ordinal, opts);
68 }
69
70 };
71
72 template <typename A>
73 class File : public ld::archive::File
74 {
75 public:
76 static bool validFile(const uint8_t* fileContent, uint64_t fileLength,
77 const mach_o::relocatable::ParserOptions& opts);
78 File(const uint8_t* fileContent, uint64_t fileLength,
79 const char* pth, time_t modTime,
80 uint32_t ord, const ParserOptions& opts);
81 virtual ~File() {}
82
83 // overrides of ld::File
84 virtual bool forEachAtom(ld::File::AtomHandler&) const;
85 virtual bool justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const;
86 virtual uint32_t subFileCount() const { return _archiveFilelength/sizeof(ar_hdr); }
87
88 // overrides of ld::archive::File
89 virtual bool justInTimeDataOnlyforEachAtom(const char* name, ld::File::AtomHandler& handler) const;
90
91 private:
92 static bool validMachOFile(const uint8_t* fileContent, uint64_t fileLength,
93 const mach_o::relocatable::ParserOptions& opts);
94 static bool validLTOFile(const uint8_t* fileContent, uint64_t fileLength,
95 const mach_o::relocatable::ParserOptions& opts);
96 static cpu_type_t architecture();
97
98 class Entry : ar_hdr
99 {
100 public:
101 const char* name() const;
102 time_t modificationTime() const;
103 const uint8_t* content() const;
104 uint32_t contentSize() const;
105 const Entry* next() const;
106 private:
107 bool hasLongName() const;
108 unsigned int getLongNameSpace() const;
109
110 };
111
112 class CStringEquals
113 {
114 public:
115 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
116 };
117 typedef __gnu_cxx::hash_map<const char*, const struct ranlib*, __gnu_cxx::hash<const char*>, CStringEquals> NameToEntryMap;
118
119 typedef typename A::P P;
120 typedef typename A::P::E E;
121
122 struct MemberState { ld::relocatable::File* file; bool logged; bool loaded; };
123
124 typedef std::map<const class Entry*, MemberState> MemberToStateMap;
125
126 const struct ranlib* ranlibHashSearch(const char* name) const;
127 MemberState& makeObjectFileForMember(const Entry* member) const;
128 bool memberHasObjCCategories(const Entry* member) const;
129 void dumpTableOfContents();
130 void buildHashTable();
131
132 const uint8_t* _archiveFileContent;
133 uint64_t _archiveFilelength;
134 const struct ranlib* _tableOfContents;
135 uint32_t _tableOfContentCount;
136 const char* _tableOfContentStrings;
137 mutable MemberToStateMap _instantiatedEntries;
138 NameToEntryMap _hashTable;
139 const bool _forceLoadAll;
140 const bool _forceLoadObjC;
141 const bool _forceLoadThis;
142 const bool _objc2ABI;
143 const bool _verboseLoad;
144 const bool _logAllFiles;
145 const mach_o::relocatable::ParserOptions _objOpts;
146 };
147
148
149 template <typename A>
150 bool File<A>::Entry::hasLongName() const
151 {
152 return ( strncmp(this->ar_name, AR_EFMT1, strlen(AR_EFMT1)) == 0 );
153 }
154
155 template <typename A>
156 unsigned int File<A>::Entry::getLongNameSpace() const
157 {
158 char* endptr;
159 long result = strtol(&this->ar_name[strlen(AR_EFMT1)], &endptr, 10);
160 return result;
161 }
162
163 template <typename A>
164 const char* File<A>::Entry::name() const
165 {
166 if ( this->hasLongName() ) {
167 int len = this->getLongNameSpace();
168 static char longName[256];
169 strncpy(longName, ((char*)this)+sizeof(ar_hdr), len);
170 longName[len] = '\0';
171 return longName;
172 }
173 else {
174 static char shortName[20];
175 strncpy(shortName, this->ar_name, 16);
176 shortName[16] = '\0';
177 char* space = strchr(shortName, ' ');
178 if ( space != NULL )
179 *space = '\0';
180 return shortName;
181 }
182 }
183
184 template <typename A>
185 time_t File<A>::Entry::modificationTime() const
186 {
187 char temp[14];
188 strncpy(temp, this->ar_date, 12);
189 temp[12] = '\0';
190 char* endptr;
191 return (time_t)strtol(temp, &endptr, 10);
192 }
193
194
195 template <typename A>
196 const uint8_t* File<A>::Entry::content() const
197 {
198 if ( this->hasLongName() )
199 return ((uint8_t*)this) + sizeof(ar_hdr) + this->getLongNameSpace();
200 else
201 return ((uint8_t*)this) + sizeof(ar_hdr);
202 }
203
204
205 template <typename A>
206 uint32_t File<A>::Entry::contentSize() const
207 {
208 char temp[12];
209 strncpy(temp, this->ar_size, 10);
210 temp[10] = '\0';
211 char* endptr;
212 long size = strtol(temp, &endptr, 10);
213 // long name is included in ar_size
214 if ( this->hasLongName() )
215 size -= this->getLongNameSpace();
216 return size;
217 }
218
219
220 template <typename A>
221 const class File<A>::Entry* File<A>::Entry::next() const
222 {
223 const uint8_t* p = this->content() + contentSize();
224 p = (const uint8_t*)(((uintptr_t)p+3) & (-4)); // 4-byte align
225 return (class File<A>::Entry*)p;
226 }
227
228
229 template <> cpu_type_t File<ppc>::architecture() { return CPU_TYPE_POWERPC; }
230 template <> cpu_type_t File<ppc64>::architecture() { return CPU_TYPE_POWERPC64; }
231 template <> cpu_type_t File<x86>::architecture() { return CPU_TYPE_I386; }
232 template <> cpu_type_t File<x86_64>::architecture() { return CPU_TYPE_X86_64; }
233 template <> cpu_type_t File<arm>::architecture() { return CPU_TYPE_ARM; }
234
235
236 template <typename A>
237 bool File<A>::validMachOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
238 {
239 return mach_o::relocatable::isObjectFile(fileContent, fileLength, opts);
240 }
241
242 template <typename A>
243 bool File<A>::validLTOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
244 {
245 return lto::isObjectFile(fileContent, fileLength, opts.architecture, opts.subType);
246 }
247
248
249
250 template <typename A>
251 bool File<A>::validFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
252 {
253 // must have valid archive header
254 if ( strncmp((const char*)fileContent, "!<arch>\n", 8) != 0 )
255 return false;
256
257 // peak at first .o file and verify it is correct architecture
258 const Entry* const start = (Entry*)&fileContent[8];
259 const Entry* const end = (Entry*)&fileContent[fileLength];
260 for (const Entry* p=start; p < end; p = p->next()) {
261 const char* memberName = p->name();
262 // skip option table-of-content member
263 if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) )
264 continue;
265 // archive is valid if first .o file is valid
266 return (validMachOFile(p->content(), p->contentSize(), opts) || validLTOFile(p->content(), p->contentSize(), opts));
267 }
268 // empty archive
269 return true;
270 }
271
272
273 template <typename A>
274 File<A>::File(const uint8_t fileContent[], uint64_t fileLength, const char* pth, time_t modTime,
275 uint32_t ord, const ParserOptions& opts)
276 : ld::archive::File(strdup(pth), modTime, ord),
277 _archiveFileContent(fileContent), _archiveFilelength(fileLength),
278 _tableOfContents(NULL), _tableOfContentCount(0), _tableOfContentStrings(NULL),
279 _forceLoadAll(opts.forceLoadAll), _forceLoadObjC(opts.forceLoadObjC),
280 _forceLoadThis(opts.forceLoadThisArchive), _objc2ABI(opts.objcABI2), _verboseLoad(opts.verboseLoad),
281 _logAllFiles(opts.logAllFiles), _objOpts(opts.objOpts)
282 {
283 if ( strncmp((const char*)fileContent, "!<arch>\n", 8) != 0 )
284 throw "not an archive";
285
286 if ( !_forceLoadAll ) {
287 const Entry* const firstMember = (Entry*)&_archiveFileContent[8];
288 if ( (strcmp(firstMember->name(), SYMDEF_SORTED) == 0) || (strcmp(firstMember->name(), SYMDEF) == 0) ) {
289 const uint8_t* contents = firstMember->content();
290 uint32_t ranlibArrayLen = E::get32(*((uint32_t*)contents));
291 _tableOfContents = (const struct ranlib*)&contents[4];
292 _tableOfContentCount = ranlibArrayLen / sizeof(struct ranlib);
293 _tableOfContentStrings = (const char*)&contents[ranlibArrayLen+8];
294 if ( ((uint8_t*)(&_tableOfContents[_tableOfContentCount]) > &fileContent[fileLength])
295 || ((uint8_t*)_tableOfContentStrings > &fileContent[fileLength]) )
296 throw "malformed archive, perhaps wrong architecture";
297 this->buildHashTable();
298 }
299 else
300 throw "archive has no table of contents";
301 }
302 }
303
304 template <>
305 bool File<x86>::memberHasObjCCategories(const Entry* member) const
306 {
307 if ( _objc2ABI ) {
308 // i386 for iOS simulator uses ObjC2 which has no global symbol for categories
309 return mach_o::relocatable::hasObjC2Categories(member->content());
310 }
311 else {
312 // i386 uses ObjC1 ABI which has .objc_category* global symbols
313 return false;
314 }
315 }
316
317 template <>
318 bool File<ppc>::memberHasObjCCategories(const Entry* member) const
319 {
320 // ppc uses ObjC1 ABI which has .objc_category* global symbols
321 return false;
322 }
323
324
325 template <typename A>
326 bool File<A>::memberHasObjCCategories(const Entry* member) const
327 {
328 // x86_64 and ARM use ObjC2 which has no global symbol for categories
329 return mach_o::relocatable::hasObjC2Categories(member->content());
330 }
331
332
333 template <typename A>
334 typename File<A>::MemberState& File<A>::makeObjectFileForMember(const Entry* member) const
335 {
336 // in case member was instantiated earlier but not needed yet
337 typename MemberToStateMap::iterator pos = _instantiatedEntries.find(member);
338 if ( pos != _instantiatedEntries.end() )
339 return pos->second;
340
341 const char* memberName = member->name();
342 char memberPath[strlen(this->path()) + strlen(memberName)+4];
343 strcpy(memberPath, this->path());
344 strcat(memberPath, "(");
345 strcat(memberPath, memberName);
346 strcat(memberPath, ")");
347 //fprintf(stderr, "using %s from %s\n", memberName, this->path());
348 try {
349 // range check
350 if ( member > (Entry*)(_archiveFileContent+_archiveFilelength) )
351 throwf("corrupt archive, member starts past end of file");
352 if ( (member->content() + member->contentSize()) > (_archiveFileContent+_archiveFilelength) )
353 throwf("corrupt archive, member contents extends past end of file");
354 const char* mPath = strdup(memberPath);
355 // offset the ordinals in this mach-o .o file, so that atoms layout in same order as in archive
356 uint32_t memberIndex = ((uint8_t*)member - _archiveFileContent)/sizeof(ar_hdr);
357 // see if member is mach-o file
358 ld::relocatable::File* result = mach_o::relocatable::parse(member->content(), member->contentSize(),
359 mPath, member->modificationTime(),
360 this->ordinal() + memberIndex, _objOpts);
361 if ( result != NULL ) {
362 MemberState state = {result, false, false};
363 _instantiatedEntries[member] = state;
364 return _instantiatedEntries[member];
365 }
366 // see if member is llvm bitcode file
367 result = lto::parse(member->content(), member->contentSize(),
368 mPath, member->modificationTime(), this->ordinal() + memberIndex,
369 _objOpts.architecture, _objOpts.subType, _logAllFiles);
370 if ( result != NULL ) {
371 MemberState state = {result, false, false};
372 _instantiatedEntries[member] = state;
373 return _instantiatedEntries[member];
374 }
375
376 throwf("archive member '%s' with length %d is not mach-o or llvm bitcode", memberName, member->contentSize());
377 }
378 catch (const char* msg) {
379 throwf("in %s, %s", memberPath, msg);
380 }
381 }
382
383
384 template <typename A>
385 bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
386 {
387 bool didSome = false;
388 if ( _forceLoadAll || _forceLoadThis ) {
389 // call handler on all .o files in this archive
390 const Entry* const start = (Entry*)&_archiveFileContent[8];
391 const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength];
392 for (const Entry* p=start; p < end; p = p->next()) {
393 const char* memberName = p->name();
394 if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) )
395 continue;
396 MemberState& state = this->makeObjectFileForMember(p);
397 if ( _verboseLoad ) {
398 if ( _forceLoadThis )
399 printf("-force_load forced load of %s(%s)\n", this->path(), memberName);
400 else
401 printf("-all_load forced load of %s(%s)\n", this->path(), memberName);
402 state.logged = true;
403 }
404 didSome |= state.file->forEachAtom(handler);
405 state.loaded = true;
406 }
407 }
408 else if ( _forceLoadObjC ) {
409 // call handler on all .o files in this archive containing objc classes
410 for(typename NameToEntryMap::const_iterator it = _hashTable.begin(); it != _hashTable.end(); ++it) {
411 if ( (strncmp(it->first, ".objc_c", 7) == 0) || (strncmp(it->first, "_OBJC_CLASS_$_", 14) == 0) ) {
412 const Entry* member = (Entry*)&_archiveFileContent[E::get32(it->second->ran_off)];
413 MemberState& state = this->makeObjectFileForMember(member);
414 if ( _verboseLoad && !state.logged ) {
415 printf("-ObjC forced load of %s(%s)\n", this->path(), member->name());
416 state.logged = true;
417 }
418 if ( ! state.loaded ) {
419 didSome |= state.file->forEachAtom(handler);
420 state.loaded = true;
421 }
422 }
423 }
424 // ObjC2 has no symbols in .o files with categories but not classes, look deeper for those
425 const Entry* const start = (Entry*)&_archiveFileContent[8];
426 const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength];
427 for (const Entry* member=start; member < end; member = member->next()) {
428 // only look at files not already instantiated
429 if ( _instantiatedEntries.count(member) == 0 ) {
430 //fprintf(stderr, "checking member %s\n", member->name());
431 if ( this->memberHasObjCCategories(member) ) {
432 MemberState& state = this->makeObjectFileForMember(member);
433 if ( _verboseLoad && !state.logged ) {
434 printf("-ObjC forced load of %s(%s)\n", this->path(), member->name());
435 state.logged = true;
436 }
437 if ( ! state.loaded ) {
438 didSome |= state.file->forEachAtom(handler);
439 state.loaded = true;
440 }
441 }
442 }
443 }
444 }
445 return didSome;
446 }
447
448 template <typename A>
449 bool File<A>::justInTimeforEachAtom(const char* name, ld::File::AtomHandler& handler) const
450 {
451 // in force load case, all members already loaded
452 if ( _forceLoadAll || _forceLoadThis )
453 return false;
454
455 // do a hash search of table of contents looking for requested symbol
456 const struct ranlib* result = ranlibHashSearch(name);
457 if ( result != NULL ) {
458 const Entry* member = (Entry*)&_archiveFileContent[E::get32(result->ran_off)];
459 MemberState& state = this->makeObjectFileForMember(member);
460 // only call handler for each member once
461 if ( ! state.loaded && !state.logged ) {
462 if ( _verboseLoad ) {
463 printf("%s forced load of %s(%s)\n", name, this->path(), member->name());
464 state.logged = true;
465 }
466 state.loaded = true;
467 return state.file->forEachAtom(handler);
468 }
469 }
470 //fprintf(stderr, "%s NOT found in archive %s\n", name, this->path());
471 return false;
472 }
473
474 class CheckIsDataSymbolHandler : public ld::File::AtomHandler
475 {
476 public:
477 CheckIsDataSymbolHandler(const char* n) : _name(n), _isData(false) {}
478 virtual void doAtom(const class ld::Atom& atom) {
479 if ( strcmp(atom.name(), _name) == 0 ) {
480 if ( atom.section().type() != ld::Section::typeCode )
481 _isData = true;
482 }
483 }
484 virtual void doFile(const class ld::File&) {}
485 bool symbolIsDataDefinition() { return _isData; }
486
487 private:
488 const char* _name;
489 bool _isData;
490
491 };
492
493 template <typename A>
494 bool File<A>::justInTimeDataOnlyforEachAtom(const char* name, ld::File::AtomHandler& handler) const
495 {
496 // in force load case, all members already loaded
497 if ( _forceLoadAll || _forceLoadThis )
498 return false;
499
500 // do a hash search of table of contents looking for requested symbol
501 const struct ranlib* result = ranlibHashSearch(name);
502 if ( result != NULL ) {
503 const Entry* member = (Entry*)&_archiveFileContent[E::get32(result->ran_off)];
504 MemberState& state = this->makeObjectFileForMember(member);
505 // only call handler for each member once
506 if ( ! state.loaded ) {
507 CheckIsDataSymbolHandler checker(name);
508 state.file->forEachAtom(checker);
509 if ( checker.symbolIsDataDefinition() ) {
510 if ( _verboseLoad && !state.logged ) {
511 printf("%s forced load of %s(%s)\n", name, this->path(), member->name());
512 state.logged = true;
513 }
514 state.loaded = true;
515 return state.file->forEachAtom(handler);
516 }
517 }
518 }
519 //fprintf(stderr, "%s NOT found in archive %s\n", name, this->path());
520 return false;
521 }
522
523
524 typedef const struct ranlib* ConstRanLibPtr;
525
526 template <typename A>
527 ConstRanLibPtr File<A>::ranlibHashSearch(const char* name) const
528 {
529 typename NameToEntryMap::const_iterator pos = _hashTable.find(name);
530 if ( pos != _hashTable.end() )
531 return pos->second;
532 else
533 return NULL;
534 }
535
536 template <typename A>
537 void File<A>::buildHashTable()
538 {
539 // walk through list backwards, adding/overwriting entries
540 // this assures that with duplicates those earliest in the list will be found
541 for (int i = _tableOfContentCount-1; i >= 0; --i) {
542 const struct ranlib* entry = &_tableOfContents[i];
543 const char* entryName = &_tableOfContentStrings[E::get32(entry->ran_un.ran_strx)];
544 if ( E::get32(entry->ran_off) > _archiveFilelength ) {
545 throwf("malformed archive TOC entry for %s, offset %d is beyond end of file %lld\n",
546 entryName, entry->ran_off, _archiveFilelength);
547 }
548
549 //const Entry* member = (Entry*)&_archiveFileContent[E::get32(entry->ran_off)];
550 //fprintf(stderr, "adding hash %d, %s -> %p\n", i, entryName, entry);
551 _hashTable[entryName] = entry;
552 }
553 }
554
555 template <typename A>
556 void File<A>::dumpTableOfContents()
557 {
558 for (unsigned int i=0; i < _tableOfContentCount; ++i) {
559 const struct ranlib* e = &_tableOfContents[i];
560 printf("%s in %s\n", &_tableOfContentStrings[E::get32(e->ran_un.ran_strx)], ((Entry*)&_archiveFileContent[E::get32(e->ran_off)])->name());
561 }
562 }
563
564
565 //
566 // main function used by linker to instantiate archive files
567 //
568 ld::archive::File* parse(const uint8_t* fileContent, uint64_t fileLength,
569 const char* path, time_t modTime, uint32_t ordinal, const ParserOptions& opts)
570 {
571 switch ( opts.objOpts.architecture ) {
572 case CPU_TYPE_X86_64:
573 if ( archive::Parser<x86_64>::validFile(fileContent, fileLength, opts.objOpts) )
574 return archive::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
575 break;
576 case CPU_TYPE_I386:
577 if ( archive::Parser<x86>::validFile(fileContent, fileLength, opts.objOpts) )
578 return archive::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
579 break;
580 case CPU_TYPE_ARM:
581 if ( archive::Parser<arm>::validFile(fileContent, fileLength, opts.objOpts) )
582 return archive::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
583 break;
584 case CPU_TYPE_POWERPC:
585 if ( archive::Parser<ppc>::validFile(fileContent, fileLength, opts.objOpts) )
586 return archive::Parser<ppc>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
587 break;
588 case CPU_TYPE_POWERPC64:
589 if ( archive::Parser<ppc64>::validFile(fileContent, fileLength, opts.objOpts) )
590 return archive::Parser<ppc64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
591 break;
592 }
593 return NULL;
594 }
595
596
597
598 }; // namespace archive
599
600