]> git.saurik.com Git - apple/ld64.git/blob - src/ld/parsers/archive_file.cpp
ld64-128.2.tar.gz
[apple/ld64.git] / src / ld / parsers / archive_file.cpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2005-2011 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 #include <stdint.h>
26 #include <math.h>
27 #include <unistd.h>
28 #include <sys/param.h>
29 #include <mach-o/ranlib.h>
30 #include <ar.h>
31
32 #include <vector>
33 #include <set>
34 #include <map>
35 #include <algorithm>
36 #include <ext/hash_map>
37
38 #include "MachOFileAbstraction.hpp"
39 #include "Architectures.hpp"
40
41 #include "macho_relocatable_file.h"
42 #include "lto_file.h"
43 #include "archive_file.h"
44
45
46 namespace archive {
47
48 typedef const struct ranlib* ConstRanLibPtr;
49
50 // forward reference
51 template <typename A> class File;
52
53
54 template <typename A>
55 class Parser
56 {
57 public:
58 typedef typename A::P P;
59
60 static bool validFile(const uint8_t* fileContent, uint64_t fileLength,
61 const mach_o::relocatable::ParserOptions& opts) {
62 return File<A>::validFile(fileContent, fileLength, opts); }
63 static File<A>* parse(const uint8_t* fileContent, uint64_t fileLength,
64 const char* path, time_t mTime,
65 uint32_t ordinal, const ParserOptions& opts) {
66 return new File<A>(fileContent, fileLength, path, mTime,
67 ordinal, opts);
68 }
69
70 };
71
72 template <typename A>
73 class File : public ld::archive::File
74 {
75 public:
76 static bool validFile(const uint8_t* fileContent, uint64_t fileLength,
77 const mach_o::relocatable::ParserOptions& opts);
78 File(const uint8_t* fileContent, uint64_t fileLength,
79 const char* pth, time_t modTime,
80 uint32_t ord, const ParserOptions& opts);
81 virtual ~File() {}
82
83 // overrides of ld::File
84 virtual bool forEachAtom(ld::File::AtomHandler&) const;
85 virtual bool justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const;
86 virtual uint32_t subFileCount() const { return _archiveFilelength/sizeof(ar_hdr); }
87
88 // overrides of ld::archive::File
89 virtual bool justInTimeDataOnlyforEachAtom(const char* name, ld::File::AtomHandler& handler) const;
90
91 private:
92 static bool validMachOFile(const uint8_t* fileContent, uint64_t fileLength,
93 const mach_o::relocatable::ParserOptions& opts);
94 static bool validLTOFile(const uint8_t* fileContent, uint64_t fileLength,
95 const mach_o::relocatable::ParserOptions& opts);
96 static cpu_type_t architecture();
97
98 class Entry : ar_hdr
99 {
100 public:
101 const char* name() const;
102 time_t modificationTime() const;
103 const uint8_t* content() const;
104 uint32_t contentSize() const;
105 const Entry* next() const;
106 private:
107 bool hasLongName() const;
108 unsigned int getLongNameSpace() const;
109
110 };
111
112 class CStringEquals
113 {
114 public:
115 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
116 };
117 typedef __gnu_cxx::hash_map<const char*, const struct ranlib*, __gnu_cxx::hash<const char*>, CStringEquals> NameToEntryMap;
118
119 typedef typename A::P P;
120 typedef typename A::P::E E;
121
122 struct MemberState { ld::relocatable::File* file; bool logged; bool loaded; };
123
124 typedef std::map<const class Entry*, MemberState> MemberToStateMap;
125
126 const struct ranlib* ranlibHashSearch(const char* name) const;
127 MemberState& makeObjectFileForMember(const Entry* member) const;
128 bool memberHasObjCCategories(const Entry* member) const;
129 void dumpTableOfContents();
130 void buildHashTable();
131
132 const uint8_t* _archiveFileContent;
133 uint64_t _archiveFilelength;
134 const struct ranlib* _tableOfContents;
135 uint32_t _tableOfContentCount;
136 const char* _tableOfContentStrings;
137 mutable MemberToStateMap _instantiatedEntries;
138 NameToEntryMap _hashTable;
139 const bool _forceLoadAll;
140 const bool _forceLoadObjC;
141 const bool _forceLoadThis;
142 const bool _objc2ABI;
143 const bool _verboseLoad;
144 const bool _logAllFiles;
145 const mach_o::relocatable::ParserOptions _objOpts;
146 };
147
148
149 template <typename A>
150 bool File<A>::Entry::hasLongName() const
151 {
152 return ( strncmp(this->ar_name, AR_EFMT1, strlen(AR_EFMT1)) == 0 );
153 }
154
155 template <typename A>
156 unsigned int File<A>::Entry::getLongNameSpace() const
157 {
158 char* endptr;
159 long result = strtol(&this->ar_name[strlen(AR_EFMT1)], &endptr, 10);
160 return result;
161 }
162
163 template <typename A>
164 const char* File<A>::Entry::name() const
165 {
166 if ( this->hasLongName() ) {
167 int len = this->getLongNameSpace();
168 static char longName[256];
169 strncpy(longName, ((char*)this)+sizeof(ar_hdr), len);
170 longName[len] = '\0';
171 return longName;
172 }
173 else {
174 static char shortName[20];
175 strncpy(shortName, this->ar_name, 16);
176 shortName[16] = '\0';
177 char* space = strchr(shortName, ' ');
178 if ( space != NULL )
179 *space = '\0';
180 return shortName;
181 }
182 }
183
184 template <typename A>
185 time_t File<A>::Entry::modificationTime() const
186 {
187 char temp[14];
188 strncpy(temp, this->ar_date, 12);
189 temp[12] = '\0';
190 char* endptr;
191 return (time_t)strtol(temp, &endptr, 10);
192 }
193
194
195 template <typename A>
196 const uint8_t* File<A>::Entry::content() const
197 {
198 if ( this->hasLongName() )
199 return ((uint8_t*)this) + sizeof(ar_hdr) + this->getLongNameSpace();
200 else
201 return ((uint8_t*)this) + sizeof(ar_hdr);
202 }
203
204
205 template <typename A>
206 uint32_t File<A>::Entry::contentSize() const
207 {
208 char temp[12];
209 strncpy(temp, this->ar_size, 10);
210 temp[10] = '\0';
211 char* endptr;
212 long size = strtol(temp, &endptr, 10);
213 // long name is included in ar_size
214 if ( this->hasLongName() )
215 size -= this->getLongNameSpace();
216 return size;
217 }
218
219
220 template <typename A>
221 const class File<A>::Entry* File<A>::Entry::next() const
222 {
223 const uint8_t* p = this->content() + contentSize();
224 p = (const uint8_t*)(((uintptr_t)p+3) & (-4)); // 4-byte align
225 return (class File<A>::Entry*)p;
226 }
227
228
229 template <> cpu_type_t File<x86>::architecture() { return CPU_TYPE_I386; }
230 template <> cpu_type_t File<x86_64>::architecture() { return CPU_TYPE_X86_64; }
231 template <> cpu_type_t File<arm>::architecture() { return CPU_TYPE_ARM; }
232
233
234 template <typename A>
235 bool File<A>::validMachOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
236 {
237 return mach_o::relocatable::isObjectFile(fileContent, fileLength, opts);
238 }
239
240 template <typename A>
241 bool File<A>::validLTOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
242 {
243 return lto::isObjectFile(fileContent, fileLength, opts.architecture, opts.subType);
244 }
245
246
247
248 template <typename A>
249 bool File<A>::validFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
250 {
251 // must have valid archive header
252 if ( strncmp((const char*)fileContent, "!<arch>\n", 8) != 0 )
253 return false;
254
255 // peak at first .o file and verify it is correct architecture
256 const Entry* const start = (Entry*)&fileContent[8];
257 const Entry* const end = (Entry*)&fileContent[fileLength];
258 for (const Entry* p=start; p < end; p = p->next()) {
259 const char* memberName = p->name();
260 // skip option table-of-content member
261 if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) )
262 continue;
263 // archive is valid if first .o file is valid
264 return (validMachOFile(p->content(), p->contentSize(), opts) || validLTOFile(p->content(), p->contentSize(), opts));
265 }
266 // empty archive
267 return true;
268 }
269
270
271 template <typename A>
272 File<A>::File(const uint8_t fileContent[], uint64_t fileLength, const char* pth, time_t modTime,
273 uint32_t ord, const ParserOptions& opts)
274 : ld::archive::File(strdup(pth), modTime, ord),
275 _archiveFileContent(fileContent), _archiveFilelength(fileLength),
276 _tableOfContents(NULL), _tableOfContentCount(0), _tableOfContentStrings(NULL),
277 _forceLoadAll(opts.forceLoadAll), _forceLoadObjC(opts.forceLoadObjC),
278 _forceLoadThis(opts.forceLoadThisArchive), _objc2ABI(opts.objcABI2), _verboseLoad(opts.verboseLoad),
279 _logAllFiles(opts.logAllFiles), _objOpts(opts.objOpts)
280 {
281 if ( strncmp((const char*)fileContent, "!<arch>\n", 8) != 0 )
282 throw "not an archive";
283
284 if ( !_forceLoadAll ) {
285 const Entry* const firstMember = (Entry*)&_archiveFileContent[8];
286 if ( (strcmp(firstMember->name(), SYMDEF_SORTED) == 0) || (strcmp(firstMember->name(), SYMDEF) == 0) ) {
287 const uint8_t* contents = firstMember->content();
288 uint32_t ranlibArrayLen = E::get32(*((uint32_t*)contents));
289 _tableOfContents = (const struct ranlib*)&contents[4];
290 _tableOfContentCount = ranlibArrayLen / sizeof(struct ranlib);
291 _tableOfContentStrings = (const char*)&contents[ranlibArrayLen+8];
292 if ( ((uint8_t*)(&_tableOfContents[_tableOfContentCount]) > &fileContent[fileLength])
293 || ((uint8_t*)_tableOfContentStrings > &fileContent[fileLength]) )
294 throw "malformed archive, perhaps wrong architecture";
295 this->buildHashTable();
296 }
297 else
298 throw "archive has no table of contents";
299 }
300 }
301
302 template <>
303 bool File<x86>::memberHasObjCCategories(const Entry* member) const
304 {
305 if ( _objc2ABI ) {
306 // i386 for iOS simulator uses ObjC2 which has no global symbol for categories
307 return mach_o::relocatable::hasObjC2Categories(member->content());
308 }
309 else {
310 // i386 uses ObjC1 ABI which has .objc_category* global symbols
311 return false;
312 }
313 }
314
315
316
317 template <typename A>
318 bool File<A>::memberHasObjCCategories(const Entry* member) const
319 {
320 // x86_64 and ARM use ObjC2 which has no global symbol for categories
321 return mach_o::relocatable::hasObjC2Categories(member->content());
322 }
323
324
325 template <typename A>
326 typename File<A>::MemberState& File<A>::makeObjectFileForMember(const Entry* member) const
327 {
328 // in case member was instantiated earlier but not needed yet
329 typename MemberToStateMap::iterator pos = _instantiatedEntries.find(member);
330 if ( pos != _instantiatedEntries.end() )
331 return pos->second;
332
333 const char* memberName = member->name();
334 char memberPath[strlen(this->path()) + strlen(memberName)+4];
335 strcpy(memberPath, this->path());
336 strcat(memberPath, "(");
337 strcat(memberPath, memberName);
338 strcat(memberPath, ")");
339 //fprintf(stderr, "using %s from %s\n", memberName, this->path());
340 try {
341 // range check
342 if ( member > (Entry*)(_archiveFileContent+_archiveFilelength) )
343 throwf("corrupt archive, member starts past end of file");
344 if ( (member->content() + member->contentSize()) > (_archiveFileContent+_archiveFilelength) )
345 throwf("corrupt archive, member contents extends past end of file");
346 const char* mPath = strdup(memberPath);
347 // offset the ordinals in this mach-o .o file, so that atoms layout in same order as in archive
348 uint32_t memberIndex = ((uint8_t*)member - _archiveFileContent)/sizeof(ar_hdr);
349 // see if member is mach-o file
350 ld::relocatable::File* result = mach_o::relocatable::parse(member->content(), member->contentSize(),
351 mPath, member->modificationTime(),
352 this->ordinal() + memberIndex, _objOpts);
353 if ( result != NULL ) {
354 MemberState state = {result, false, false};
355 _instantiatedEntries[member] = state;
356 return _instantiatedEntries[member];
357 }
358 // see if member is llvm bitcode file
359 result = lto::parse(member->content(), member->contentSize(),
360 mPath, member->modificationTime(), this->ordinal() + memberIndex,
361 _objOpts.architecture, _objOpts.subType, _logAllFiles);
362 if ( result != NULL ) {
363 MemberState state = {result, false, false};
364 _instantiatedEntries[member] = state;
365 return _instantiatedEntries[member];
366 }
367
368 throwf("archive member '%s' with length %d is not mach-o or llvm bitcode", memberName, member->contentSize());
369 }
370 catch (const char* msg) {
371 throwf("in %s, %s", memberPath, msg);
372 }
373 }
374
375
376 template <typename A>
377 bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
378 {
379 bool didSome = false;
380 if ( _forceLoadAll || _forceLoadThis ) {
381 // call handler on all .o files in this archive
382 const Entry* const start = (Entry*)&_archiveFileContent[8];
383 const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength];
384 for (const Entry* p=start; p < end; p = p->next()) {
385 const char* memberName = p->name();
386 if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) )
387 continue;
388 MemberState& state = this->makeObjectFileForMember(p);
389 if ( _verboseLoad ) {
390 if ( _forceLoadThis )
391 printf("-force_load forced load of %s(%s)\n", this->path(), memberName);
392 else
393 printf("-all_load forced load of %s(%s)\n", this->path(), memberName);
394 state.logged = true;
395 }
396 didSome |= state.file->forEachAtom(handler);
397 state.loaded = true;
398 }
399 }
400 else if ( _forceLoadObjC ) {
401 // call handler on all .o files in this archive containing objc classes
402 for(typename NameToEntryMap::const_iterator it = _hashTable.begin(); it != _hashTable.end(); ++it) {
403 if ( (strncmp(it->first, ".objc_c", 7) == 0) || (strncmp(it->first, "_OBJC_CLASS_$_", 14) == 0) ) {
404 const Entry* member = (Entry*)&_archiveFileContent[E::get32(it->second->ran_off)];
405 MemberState& state = this->makeObjectFileForMember(member);
406 if ( _verboseLoad && !state.logged ) {
407 printf("-ObjC forced load of %s(%s)\n", this->path(), member->name());
408 state.logged = true;
409 }
410 if ( ! state.loaded ) {
411 didSome |= state.file->forEachAtom(handler);
412 state.loaded = true;
413 }
414 }
415 }
416 // ObjC2 has no symbols in .o files with categories but not classes, look deeper for those
417 const Entry* const start = (Entry*)&_archiveFileContent[8];
418 const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength];
419 for (const Entry* member=start; member < end; member = member->next()) {
420 // only look at files not already instantiated
421 if ( _instantiatedEntries.count(member) == 0 ) {
422 //fprintf(stderr, "checking member %s\n", member->name());
423 if ( this->memberHasObjCCategories(member) ) {
424 MemberState& state = this->makeObjectFileForMember(member);
425 if ( _verboseLoad && !state.logged ) {
426 printf("-ObjC forced load of %s(%s)\n", this->path(), member->name());
427 state.logged = true;
428 }
429 if ( ! state.loaded ) {
430 didSome |= state.file->forEachAtom(handler);
431 state.loaded = true;
432 }
433 }
434 }
435 }
436 }
437 return didSome;
438 }
439
440 template <typename A>
441 bool File<A>::justInTimeforEachAtom(const char* name, ld::File::AtomHandler& handler) const
442 {
443 // in force load case, all members already loaded
444 if ( _forceLoadAll || _forceLoadThis )
445 return false;
446
447 // do a hash search of table of contents looking for requested symbol
448 const struct ranlib* result = ranlibHashSearch(name);
449 if ( result != NULL ) {
450 const Entry* member = (Entry*)&_archiveFileContent[E::get32(result->ran_off)];
451 MemberState& state = this->makeObjectFileForMember(member);
452 // only call handler for each member once
453 if ( ! state.loaded && !state.logged ) {
454 if ( _verboseLoad ) {
455 printf("%s forced load of %s(%s)\n", name, this->path(), member->name());
456 state.logged = true;
457 }
458 state.loaded = true;
459 return state.file->forEachAtom(handler);
460 }
461 }
462 //fprintf(stderr, "%s NOT found in archive %s\n", name, this->path());
463 return false;
464 }
465
466 class CheckIsDataSymbolHandler : public ld::File::AtomHandler
467 {
468 public:
469 CheckIsDataSymbolHandler(const char* n) : _name(n), _isData(false) {}
470 virtual void doAtom(const class ld::Atom& atom) {
471 if ( strcmp(atom.name(), _name) == 0 ) {
472 if ( atom.section().type() != ld::Section::typeCode )
473 _isData = true;
474 }
475 }
476 virtual void doFile(const class ld::File&) {}
477 bool symbolIsDataDefinition() { return _isData; }
478
479 private:
480 const char* _name;
481 bool _isData;
482
483 };
484
485 template <typename A>
486 bool File<A>::justInTimeDataOnlyforEachAtom(const char* name, ld::File::AtomHandler& handler) const
487 {
488 // in force load case, all members already loaded
489 if ( _forceLoadAll || _forceLoadThis )
490 return false;
491
492 // do a hash search of table of contents looking for requested symbol
493 const struct ranlib* result = ranlibHashSearch(name);
494 if ( result != NULL ) {
495 const Entry* member = (Entry*)&_archiveFileContent[E::get32(result->ran_off)];
496 MemberState& state = this->makeObjectFileForMember(member);
497 // only call handler for each member once
498 if ( ! state.loaded ) {
499 CheckIsDataSymbolHandler checker(name);
500 state.file->forEachAtom(checker);
501 if ( checker.symbolIsDataDefinition() ) {
502 if ( _verboseLoad && !state.logged ) {
503 printf("%s forced load of %s(%s)\n", name, this->path(), member->name());
504 state.logged = true;
505 }
506 state.loaded = true;
507 return state.file->forEachAtom(handler);
508 }
509 }
510 }
511 //fprintf(stderr, "%s NOT found in archive %s\n", name, this->path());
512 return false;
513 }
514
515
516 typedef const struct ranlib* ConstRanLibPtr;
517
518 template <typename A>
519 ConstRanLibPtr File<A>::ranlibHashSearch(const char* name) const
520 {
521 typename NameToEntryMap::const_iterator pos = _hashTable.find(name);
522 if ( pos != _hashTable.end() )
523 return pos->second;
524 else
525 return NULL;
526 }
527
528 template <typename A>
529 void File<A>::buildHashTable()
530 {
531 // walk through list backwards, adding/overwriting entries
532 // this assures that with duplicates those earliest in the list will be found
533 for (int i = _tableOfContentCount-1; i >= 0; --i) {
534 const struct ranlib* entry = &_tableOfContents[i];
535 const char* entryName = &_tableOfContentStrings[E::get32(entry->ran_un.ran_strx)];
536 if ( E::get32(entry->ran_off) > _archiveFilelength ) {
537 throwf("malformed archive TOC entry for %s, offset %d is beyond end of file %lld\n",
538 entryName, entry->ran_off, _archiveFilelength);
539 }
540
541 //const Entry* member = (Entry*)&_archiveFileContent[E::get32(entry->ran_off)];
542 //fprintf(stderr, "adding hash %d, %s -> %p\n", i, entryName, entry);
543 _hashTable[entryName] = entry;
544 }
545 }
546
547 template <typename A>
548 void File<A>::dumpTableOfContents()
549 {
550 for (unsigned int i=0; i < _tableOfContentCount; ++i) {
551 const struct ranlib* e = &_tableOfContents[i];
552 printf("%s in %s\n", &_tableOfContentStrings[E::get32(e->ran_un.ran_strx)], ((Entry*)&_archiveFileContent[E::get32(e->ran_off)])->name());
553 }
554 }
555
556
557 //
558 // main function used by linker to instantiate archive files
559 //
560 ld::archive::File* parse(const uint8_t* fileContent, uint64_t fileLength,
561 const char* path, time_t modTime, uint32_t ordinal, const ParserOptions& opts)
562 {
563 switch ( opts.objOpts.architecture ) {
564 case CPU_TYPE_X86_64:
565 if ( archive::Parser<x86_64>::validFile(fileContent, fileLength, opts.objOpts) )
566 return archive::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
567 break;
568 case CPU_TYPE_I386:
569 if ( archive::Parser<x86>::validFile(fileContent, fileLength, opts.objOpts) )
570 return archive::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
571 break;
572 case CPU_TYPE_ARM:
573 if ( archive::Parser<arm>::validFile(fileContent, fileLength, opts.objOpts) )
574 return archive::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
575 break;
576 }
577 return NULL;
578 }
579
580
581
582 }; // namespace archive
583
584