]> git.saurik.com Git - apple/ld64.git/blob - src/ld/parsers/archive_file.cpp
ld64-123.2.tar.gz
[apple/ld64.git] / src / ld / parsers / archive_file.cpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2005-2009 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 #include <stdint.h>
26 #include <math.h>
27 #include <unistd.h>
28 #include <sys/param.h>
29 #include <mach-o/ranlib.h>
30 #include <ar.h>
31
32 #include <vector>
33 #include <set>
34 #include <algorithm>
35 #include <ext/hash_map>
36
37 #include "MachOFileAbstraction.hpp"
38 #include "Architectures.hpp"
39
40 #include "macho_relocatable_file.h"
41 #include "lto_file.h"
42 #include "archive_file.h"
43
44
45 namespace archive {
46
47 typedef const struct ranlib* ConstRanLibPtr;
48
49 // forward reference
50 template <typename A> class File;
51
52
53 template <typename A>
54 class Parser
55 {
56 public:
57 typedef typename A::P P;
58
59 static bool validFile(const uint8_t* fileContent, uint64_t fileLength,
60 const mach_o::relocatable::ParserOptions& opts) {
61 return File<A>::validFile(fileContent, fileLength, opts); }
62 static File<A>* parse(const uint8_t* fileContent, uint64_t fileLength,
63 const char* path, time_t mTime,
64 uint32_t ordinal, const ParserOptions& opts) {
65 return new File<A>(fileContent, fileLength, path, mTime,
66 ordinal, opts);
67 }
68
69 };
70
71 template <typename A>
72 class File : public ld::File
73 {
74 public:
75 static bool validFile(const uint8_t* fileContent, uint64_t fileLength,
76 const mach_o::relocatable::ParserOptions& opts);
77 File(const uint8_t* fileContent, uint64_t fileLength,
78 const char* pth, time_t modTime,
79 uint32_t ord, const ParserOptions& opts);
80 virtual ~File() {}
81
82 // overrides of ld::File
83 virtual bool forEachAtom(ld::File::AtomHandler&) const;
84 virtual bool justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const;
85 virtual uint32_t subFileCount() const { return _archiveFilelength/sizeof(ar_hdr); }
86
87 private:
88 static bool validMachOFile(const uint8_t* fileContent, uint64_t fileLength,
89 const mach_o::relocatable::ParserOptions& opts);
90 static bool validLTOFile(const uint8_t* fileContent, uint64_t fileLength,
91 const mach_o::relocatable::ParserOptions& opts);
92 static cpu_type_t architecture();
93
94
95 class Entry : ar_hdr
96 {
97 public:
98 const char* name() const;
99 time_t modificationTime() const;
100 const uint8_t* content() const;
101 uint32_t contentSize() const;
102 const Entry* next() const;
103 private:
104 bool hasLongName() const;
105 unsigned int getLongNameSpace() const;
106
107 };
108
109 class CStringEquals
110 {
111 public:
112 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
113 };
114 typedef __gnu_cxx::hash_map<const char*, const struct ranlib*, __gnu_cxx::hash<const char*>, CStringEquals> NameToEntryMap;
115
116 typedef typename A::P P;
117 typedef typename A::P::E E;
118
119 const struct ranlib* ranlibHashSearch(const char* name) const;
120 ld::relocatable::File* makeObjectFileForMember(const Entry* member) const;
121 bool memberHasObjCCategories(const Entry* member) const;
122 void dumpTableOfContents();
123 void buildHashTable();
124
125 const uint8_t* _archiveFileContent;
126 uint64_t _archiveFilelength;
127 const struct ranlib* _tableOfContents;
128 uint32_t _tableOfContentCount;
129 const char* _tableOfContentStrings;
130 mutable std::vector<ld::relocatable::File*> _instantiatedFiles;
131 mutable std::set<const class Entry*> _instantiatedEntries;
132 NameToEntryMap _hashTable;
133 const bool _forceLoadAll;
134 const bool _forceLoadObjC;
135 const bool _forceLoadThis;
136 const bool _verboseLoad;
137 const bool _logAllFiles;
138 const mach_o::relocatable::ParserOptions _objOpts;
139 };
140
141
142 template <typename A>
143 bool File<A>::Entry::hasLongName() const
144 {
145 return ( strncmp(this->ar_name, AR_EFMT1, strlen(AR_EFMT1)) == 0 );
146 }
147
148 template <typename A>
149 unsigned int File<A>::Entry::getLongNameSpace() const
150 {
151 char* endptr;
152 long result = strtol(&this->ar_name[strlen(AR_EFMT1)], &endptr, 10);
153 return result;
154 }
155
156 template <typename A>
157 const char* File<A>::Entry::name() const
158 {
159 if ( this->hasLongName() ) {
160 int len = this->getLongNameSpace();
161 static char longName[256];
162 strncpy(longName, ((char*)this)+sizeof(ar_hdr), len);
163 longName[len] = '\0';
164 return longName;
165 }
166 else {
167 static char shortName[20];
168 strncpy(shortName, this->ar_name, 16);
169 shortName[16] = '\0';
170 char* space = strchr(shortName, ' ');
171 if ( space != NULL )
172 *space = '\0';
173 return shortName;
174 }
175 }
176
177 template <typename A>
178 time_t File<A>::Entry::modificationTime() const
179 {
180 char temp[14];
181 strncpy(temp, this->ar_date, 12);
182 temp[12] = '\0';
183 char* endptr;
184 return (time_t)strtol(temp, &endptr, 10);
185 }
186
187
188 template <typename A>
189 const uint8_t* File<A>::Entry::content() const
190 {
191 if ( this->hasLongName() )
192 return ((uint8_t*)this) + sizeof(ar_hdr) + this->getLongNameSpace();
193 else
194 return ((uint8_t*)this) + sizeof(ar_hdr);
195 }
196
197
198 template <typename A>
199 uint32_t File<A>::Entry::contentSize() const
200 {
201 char temp[12];
202 strncpy(temp, this->ar_size, 10);
203 temp[10] = '\0';
204 char* endptr;
205 long size = strtol(temp, &endptr, 10);
206 // long name is included in ar_size
207 if ( this->hasLongName() )
208 size -= this->getLongNameSpace();
209 return size;
210 }
211
212
213 template <typename A>
214 const class File<A>::Entry* File<A>::Entry::next() const
215 {
216 const uint8_t* p = this->content() + contentSize();
217 p = (const uint8_t*)(((uintptr_t)p+3) & (-4)); // 4-byte align
218 return (class File<A>::Entry*)p;
219 }
220
221
222 template <> cpu_type_t File<ppc>::architecture() { return CPU_TYPE_POWERPC; }
223 template <> cpu_type_t File<ppc64>::architecture() { return CPU_TYPE_POWERPC64; }
224 template <> cpu_type_t File<x86>::architecture() { return CPU_TYPE_I386; }
225 template <> cpu_type_t File<x86_64>::architecture() { return CPU_TYPE_X86_64; }
226 template <> cpu_type_t File<arm>::architecture() { return CPU_TYPE_ARM; }
227
228
229 template <typename A>
230 bool File<A>::validMachOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
231 {
232 return mach_o::relocatable::isObjectFile(fileContent, fileLength, opts);
233 }
234
235 template <typename A>
236 bool File<A>::validLTOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
237 {
238 return lto::isObjectFile(fileContent, fileLength, opts.architecture, opts.subType);
239 }
240
241
242
243 template <typename A>
244 bool File<A>::validFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
245 {
246 // must have valid archive header
247 if ( strncmp((const char*)fileContent, "!<arch>\n", 8) != 0 )
248 return false;
249
250 // peak at first .o file and verify it is correct architecture
251 const Entry* const start = (Entry*)&fileContent[8];
252 const Entry* const end = (Entry*)&fileContent[fileLength];
253 for (const Entry* p=start; p < end; p = p->next()) {
254 const char* memberName = p->name();
255 // skip option table-of-content member
256 if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) )
257 continue;
258 // archive is valid if first .o file is valid
259 return (validMachOFile(p->content(), p->contentSize(), opts) || validLTOFile(p->content(), p->contentSize(), opts));
260 }
261 // empty archive
262 return true;
263 }
264
265
266 template <typename A>
267 File<A>::File(const uint8_t fileContent[], uint64_t fileLength, const char* pth, time_t modTime,
268 uint32_t ord, const ParserOptions& opts)
269 : ld::File(strdup(pth), modTime, ord),
270 _archiveFileContent(fileContent), _archiveFilelength(fileLength),
271 _tableOfContents(NULL), _tableOfContentCount(0), _tableOfContentStrings(NULL),
272 _forceLoadAll(opts.forceLoadAll), _forceLoadObjC(opts.forceLoadObjC),
273 _forceLoadThis(opts.forceLoadThisArchive), _verboseLoad(opts.verboseLoad),
274 _logAllFiles(opts.logAllFiles), _objOpts(opts.objOpts)
275 {
276 if ( strncmp((const char*)fileContent, "!<arch>\n", 8) != 0 )
277 throw "not an archive";
278
279 if ( !_forceLoadAll ) {
280 const Entry* const firstMember = (Entry*)&_archiveFileContent[8];
281 if ( (strcmp(firstMember->name(), SYMDEF_SORTED) == 0) || (strcmp(firstMember->name(), SYMDEF) == 0) ) {
282 const uint8_t* contents = firstMember->content();
283 uint32_t ranlibArrayLen = E::get32(*((uint32_t*)contents));
284 _tableOfContents = (const struct ranlib*)&contents[4];
285 _tableOfContentCount = ranlibArrayLen / sizeof(struct ranlib);
286 _tableOfContentStrings = (const char*)&contents[ranlibArrayLen+8];
287 if ( ((uint8_t*)(&_tableOfContents[_tableOfContentCount]) > &fileContent[fileLength])
288 || ((uint8_t*)_tableOfContentStrings > &fileContent[fileLength]) )
289 throw "malformed archive, perhaps wrong architecture";
290 this->buildHashTable();
291 }
292 else
293 throw "archive has no table of contents";
294 }
295 }
296
297 template <>
298 bool File<x86>::memberHasObjCCategories(const Entry* member) const
299 {
300 // i386 uses ObjC1 ABI which has .objc_category* global symbols
301 return false;
302 }
303
304 template <>
305 bool File<ppc>::memberHasObjCCategories(const Entry* member) const
306 {
307 // ppc uses ObjC1 ABI which has .objc_category* global symbols
308 return false;
309 }
310
311
312 template <typename A>
313 bool File<A>::memberHasObjCCategories(const Entry* member) const
314 {
315 // x86_64 and ARM use ObjC2 which has no global symbol for categories
316 return mach_o::relocatable::hasObjC2Categories(member->content());
317 }
318
319
320 template <typename A>
321 ld::relocatable::File* File<A>::makeObjectFileForMember(const Entry* member) const
322 {
323 const char* memberName = member->name();
324 char memberPath[strlen(this->path()) + strlen(memberName)+4];
325 strcpy(memberPath, this->path());
326 strcat(memberPath, "(");
327 strcat(memberPath, memberName);
328 strcat(memberPath, ")");
329 //fprintf(stderr, "using %s from %s\n", memberName, this->path());
330 try {
331 // range check
332 if ( member > (Entry*)(_archiveFileContent+_archiveFilelength) )
333 throwf("corrupt archive, member starts past end of file");
334 if ( (member->content() + member->contentSize()) > (_archiveFileContent+_archiveFilelength) )
335 throwf("corrupt archive, member contents extends past end of file");
336 const char* mPath = strdup(memberPath);
337 // offset the ordinals in this mach-o .o file, so that atoms layout in same order as in archive
338 uint32_t memberIndex = ((uint8_t*)member - _archiveFileContent)/sizeof(ar_hdr);
339 // see if member is mach-o file
340 ld::relocatable::File* result = mach_o::relocatable::parse(member->content(), member->contentSize(),
341 mPath, member->modificationTime(),
342 this->ordinal() + memberIndex, _objOpts);
343 if ( result != NULL )
344 return result;
345 // see if member is llvm bitcode file
346 result = lto::parse(member->content(), member->contentSize(),
347 mPath, member->modificationTime(), this->ordinal() + memberIndex,
348 _objOpts.architecture, _objOpts.subType, _logAllFiles);
349 if ( result != NULL )
350 return result;
351
352 throwf("archive member '%s' with length %d is not mach-o or llvm bitcode", memberName, member->contentSize());
353 }
354 catch (const char* msg) {
355 throwf("in %s, %s", memberPath, msg);
356 }
357 }
358
359
360 template <typename A>
361 bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
362 {
363 bool didSome = false;
364 if ( _forceLoadAll || _forceLoadThis ) {
365 // call handler on all .o files in this archive
366 const Entry* const start = (Entry*)&_archiveFileContent[8];
367 const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength];
368 for (const Entry* p=start; p < end; p = p->next()) {
369 const char* memberName = p->name();
370 if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) )
371 continue;
372 if ( _verboseLoad ) {
373 if ( _forceLoadThis )
374 printf("-force_load forced load of %s(%s)\n", this->path(), memberName);
375 else
376 printf("-all_load forced load of %s(%s)\n", this->path(), memberName);
377 }
378 ld::relocatable::File* file = this->makeObjectFileForMember(p);
379 didSome |= file->forEachAtom(handler);
380 }
381 }
382 else if ( _forceLoadObjC ) {
383 // call handler on all .o files in this archive containing objc classes
384 for(typename NameToEntryMap::const_iterator it = _hashTable.begin(); it != _hashTable.end(); ++it) {
385 if ( (strncmp(it->first, ".objc_c", 7) == 0) || (strncmp(it->first, "_OBJC_CLASS_$_", 14) == 0) ) {
386 const Entry* member = (Entry*)&_archiveFileContent[E::get32(it->second->ran_off)];
387 if ( _instantiatedEntries.count(member) == 0 ) {
388 if ( _verboseLoad )
389 printf("-ObjC forced load of %s(%s)\n", this->path(), member->name());
390 // only return these atoms once
391 _instantiatedEntries.insert(member);
392 ld::relocatable::File* file = this->makeObjectFileForMember(member);
393 didSome |= file->forEachAtom(handler);
394 _instantiatedFiles.push_back(file);
395 }
396 }
397 }
398 // ObjC2 has no symbols in .o files with categories, but not classes, look deeper for those
399 const Entry* const start = (Entry*)&_archiveFileContent[8];
400 const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength];
401 for (const Entry* member=start; member < end; member = member->next()) {
402 // only look at files not already instantiated
403 if ( _instantiatedEntries.count(member) == 0 ) {
404 //fprintf(stderr, "checking member %s\n", member->name());
405 if ( this->memberHasObjCCategories(member) ) {
406 if ( _verboseLoad )
407 printf("-ObjC forced load of %s(%s)\n", this->path(), member->name());
408 // only return these atoms once
409 _instantiatedEntries.insert(member);
410 ld::relocatable::File* file = this->makeObjectFileForMember(member);
411 didSome |= file->forEachAtom(handler);
412 _instantiatedFiles.push_back(file);
413 }
414 }
415 }
416 }
417 return didSome;
418 }
419
420 template <typename A>
421 bool File<A>::justInTimeforEachAtom(const char* name, ld::File::AtomHandler& handler) const
422 {
423 // in force load case, all members already loaded
424 if ( _forceLoadAll || _forceLoadThis )
425 return false;
426
427 // do a hash search of table of contents looking for requested symbol
428 const struct ranlib* result = ranlibHashSearch(name);
429 if ( result != NULL ) {
430 const Entry* member = (Entry*)&_archiveFileContent[E::get32(result->ran_off)];
431 // only call handler for each member once
432 if ( _instantiatedEntries.count(member) == 0 ) {
433 _instantiatedEntries.insert(member);
434 if ( _verboseLoad )
435 printf("%s forced load of %s(%s)\n", name, this->path(), member->name());
436 ld::relocatable::File* file = this->makeObjectFileForMember(member);
437 _instantiatedFiles.push_back(file);
438 return file->forEachAtom(handler);
439 }
440 }
441 //fprintf(stderr, "%s NOT found in archive %s\n", name, this->path());
442 return false;
443 }
444
445
446 typedef const struct ranlib* ConstRanLibPtr;
447
448 template <typename A>
449 ConstRanLibPtr File<A>::ranlibHashSearch(const char* name) const
450 {
451 typename NameToEntryMap::const_iterator pos = _hashTable.find(name);
452 if ( pos != _hashTable.end() )
453 return pos->second;
454 else
455 return NULL;
456 }
457
458 template <typename A>
459 void File<A>::buildHashTable()
460 {
461 // walk through list backwards, adding/overwriting entries
462 // this assures that with duplicates those earliest in the list will be found
463 for (int i = _tableOfContentCount-1; i >= 0; --i) {
464 const struct ranlib* entry = &_tableOfContents[i];
465 const char* entryName = &_tableOfContentStrings[E::get32(entry->ran_un.ran_strx)];
466 if ( E::get32(entry->ran_off) > _archiveFilelength ) {
467 throwf("malformed archive TOC entry for %s, offset %d is beyond end of file %lld\n",
468 entryName, entry->ran_off, _archiveFilelength);
469 }
470
471 //const Entry* member = (Entry*)&_archiveFileContent[E::get32(entry->ran_off)];
472 //fprintf(stderr, "adding hash %d, %s -> %p\n", i, entryName, entry);
473 _hashTable[entryName] = entry;
474 }
475 }
476
477 template <typename A>
478 void File<A>::dumpTableOfContents()
479 {
480 for (unsigned int i=0; i < _tableOfContentCount; ++i) {
481 const struct ranlib* e = &_tableOfContents[i];
482 printf("%s in %s\n", &_tableOfContentStrings[E::get32(e->ran_un.ran_strx)], ((Entry*)&_archiveFileContent[E::get32(e->ran_off)])->name());
483 }
484 }
485
486
487 //
488 // main function used by linker to instantiate archive files
489 //
490 ld::File* parse(const uint8_t* fileContent, uint64_t fileLength,
491 const char* path, time_t modTime, uint32_t ordinal, const ParserOptions& opts)
492 {
493 switch ( opts.objOpts.architecture ) {
494 case CPU_TYPE_X86_64:
495 if ( archive::Parser<x86_64>::validFile(fileContent, fileLength, opts.objOpts) )
496 return archive::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
497 break;
498 case CPU_TYPE_I386:
499 if ( archive::Parser<x86>::validFile(fileContent, fileLength, opts.objOpts) )
500 return archive::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
501 break;
502 case CPU_TYPE_ARM:
503 if ( archive::Parser<arm>::validFile(fileContent, fileLength, opts.objOpts) )
504 return archive::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
505 break;
506 case CPU_TYPE_POWERPC:
507 if ( archive::Parser<ppc>::validFile(fileContent, fileLength, opts.objOpts) )
508 return archive::Parser<ppc>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
509 break;
510 case CPU_TYPE_POWERPC64:
511 if ( archive::Parser<ppc64>::validFile(fileContent, fileLength, opts.objOpts) )
512 return archive::Parser<ppc64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
513 break;
514 }
515 return NULL;
516 }
517
518
519
520 }; // namespace archive
521
522