X-Git-Url: https://git.saurik.com/apple/ld64.git/blobdiff_plain/07feaf2cb00322d025073eb8ec22189ada5e4180..a645023da60d22e86be13f7b4d97adeff8bc6665:/src/ld/parsers/archive_file.cpp?ds=inline diff --git a/src/ld/parsers/archive_file.cpp b/src/ld/parsers/archive_file.cpp new file mode 100644 index 0000000..8c866cd --- /dev/null +++ b/src/ld/parsers/archive_file.cpp @@ -0,0 +1,522 @@ +/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- + * + * Copyright (c) 2005-2009 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "MachOFileAbstraction.hpp" +#include "Architectures.hpp" + +#include "macho_relocatable_file.h" +#include "lto_file.h" +#include "archive_file.h" + + +namespace archive { + +typedef const struct ranlib* ConstRanLibPtr; + +// forward reference +template class File; + + +template +class Parser +{ +public: + typedef typename A::P P; + + static bool validFile(const uint8_t* fileContent, uint64_t fileLength, + const mach_o::relocatable::ParserOptions& opts) { + return File::validFile(fileContent, fileLength, opts); } + static File* parse(const uint8_t* fileContent, uint64_t fileLength, + const char* path, time_t mTime, + uint32_t ordinal, const ParserOptions& opts) { + return new File(fileContent, fileLength, path, mTime, + ordinal, opts); + } + +}; + +template +class File : public ld::File +{ +public: + static bool validFile(const uint8_t* fileContent, uint64_t fileLength, + const mach_o::relocatable::ParserOptions& opts); + File(const uint8_t* fileContent, uint64_t fileLength, + const char* pth, time_t modTime, + uint32_t ord, const ParserOptions& opts); + virtual ~File() {} + + // overrides of ld::File + virtual bool forEachAtom(ld::File::AtomHandler&) const; + virtual bool justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const; + virtual uint32_t subFileCount() const { return _archiveFilelength/sizeof(ar_hdr); } + +private: + static bool validMachOFile(const uint8_t* fileContent, uint64_t fileLength, + const mach_o::relocatable::ParserOptions& opts); + static bool validLTOFile(const uint8_t* fileContent, uint64_t fileLength, + const mach_o::relocatable::ParserOptions& opts); + static cpu_type_t architecture(); + + + class Entry : ar_hdr + { + public: + const char* name() const; + time_t modificationTime() const; + const uint8_t* content() const; + uint32_t contentSize() const; + const Entry* next() const; + private: + bool hasLongName() const; + unsigned int getLongNameSpace() const; + + }; + + class CStringEquals + { + public: + bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); } + }; + typedef __gnu_cxx::hash_map, CStringEquals> NameToEntryMap; + + typedef typename A::P P; + typedef typename A::P::E E; + + const struct ranlib* ranlibHashSearch(const char* name) const; + ld::relocatable::File* makeObjectFileForMember(const Entry* member) const; + bool memberHasObjCCategories(const Entry* member) const; + void dumpTableOfContents(); + void buildHashTable(); + + const uint8_t* _archiveFileContent; + uint64_t _archiveFilelength; + const struct ranlib* _tableOfContents; + uint32_t _tableOfContentCount; + const char* _tableOfContentStrings; + mutable std::vector _instantiatedFiles; + mutable std::set _instantiatedEntries; + NameToEntryMap _hashTable; + const bool _forceLoadAll; + const bool _forceLoadObjC; + const bool _forceLoadThis; + const bool _verboseLoad; + const bool _logAllFiles; + const mach_o::relocatable::ParserOptions _objOpts; +}; + + +template +bool File::Entry::hasLongName() const +{ + return ( strncmp(this->ar_name, AR_EFMT1, strlen(AR_EFMT1)) == 0 ); +} + +template +unsigned int File::Entry::getLongNameSpace() const +{ + char* endptr; + long result = strtol(&this->ar_name[strlen(AR_EFMT1)], &endptr, 10); + return result; +} + +template +const char* File::Entry::name() const +{ + if ( this->hasLongName() ) { + int len = this->getLongNameSpace(); + static char longName[256]; + strncpy(longName, ((char*)this)+sizeof(ar_hdr), len); + longName[len] = '\0'; + return longName; + } + else { + static char shortName[20]; + strncpy(shortName, this->ar_name, 16); + shortName[16] = '\0'; + char* space = strchr(shortName, ' '); + if ( space != NULL ) + *space = '\0'; + return shortName; + } +} + +template +time_t File::Entry::modificationTime() const +{ + char temp[14]; + strncpy(temp, this->ar_date, 12); + temp[12] = '\0'; + char* endptr; + return (time_t)strtol(temp, &endptr, 10); +} + + +template +const uint8_t* File::Entry::content() const +{ + if ( this->hasLongName() ) + return ((uint8_t*)this) + sizeof(ar_hdr) + this->getLongNameSpace(); + else + return ((uint8_t*)this) + sizeof(ar_hdr); +} + + +template +uint32_t File::Entry::contentSize() const +{ + char temp[12]; + strncpy(temp, this->ar_size, 10); + temp[10] = '\0'; + char* endptr; + long size = strtol(temp, &endptr, 10); + // long name is included in ar_size + if ( this->hasLongName() ) + size -= this->getLongNameSpace(); + return size; +} + + +template +const class File::Entry* File::Entry::next() const +{ + const uint8_t* p = this->content() + contentSize(); + p = (const uint8_t*)(((uintptr_t)p+3) & (-4)); // 4-byte align + return (class File::Entry*)p; +} + + +template <> cpu_type_t File::architecture() { return CPU_TYPE_POWERPC; } +template <> cpu_type_t File::architecture() { return CPU_TYPE_POWERPC64; } +template <> cpu_type_t File::architecture() { return CPU_TYPE_I386; } +template <> cpu_type_t File::architecture() { return CPU_TYPE_X86_64; } +template <> cpu_type_t File::architecture() { return CPU_TYPE_ARM; } + + +template +bool File::validMachOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts) +{ + return mach_o::relocatable::isObjectFile(fileContent, fileLength, opts); +} + +template +bool File::validLTOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts) +{ + return lto::isObjectFile(fileContent, fileLength, opts.architecture, opts.subType); +} + + + +template +bool File::validFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts) +{ + // must have valid archive header + if ( strncmp((const char*)fileContent, "!\n", 8) != 0 ) + return false; + + // peak at first .o file and verify it is correct architecture + const Entry* const start = (Entry*)&fileContent[8]; + const Entry* const end = (Entry*)&fileContent[fileLength]; + for (const Entry* p=start; p < end; p = p->next()) { + const char* memberName = p->name(); + // skip option table-of-content member + if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) ) + continue; + // archive is valid if first .o file is valid + return (validMachOFile(p->content(), p->contentSize(), opts) || validLTOFile(p->content(), p->contentSize(), opts)); + } + // empty archive + return true; +} + + +template +File::File(const uint8_t fileContent[], uint64_t fileLength, const char* pth, time_t modTime, + uint32_t ord, const ParserOptions& opts) + : ld::File(strdup(pth), modTime, ord), + _archiveFileContent(fileContent), _archiveFilelength(fileLength), + _tableOfContents(NULL), _tableOfContentCount(0), _tableOfContentStrings(NULL), + _forceLoadAll(opts.forceLoadAll), _forceLoadObjC(opts.forceLoadObjC), + _forceLoadThis(opts.forceLoadThisArchive), _verboseLoad(opts.verboseLoad), + _logAllFiles(opts.logAllFiles), _objOpts(opts.objOpts) +{ + if ( strncmp((const char*)fileContent, "!\n", 8) != 0 ) + throw "not an archive"; + + if ( !_forceLoadAll ) { + const Entry* const firstMember = (Entry*)&_archiveFileContent[8]; + if ( (strcmp(firstMember->name(), SYMDEF_SORTED) == 0) || (strcmp(firstMember->name(), SYMDEF) == 0) ) { + const uint8_t* contents = firstMember->content(); + uint32_t ranlibArrayLen = E::get32(*((uint32_t*)contents)); + _tableOfContents = (const struct ranlib*)&contents[4]; + _tableOfContentCount = ranlibArrayLen / sizeof(struct ranlib); + _tableOfContentStrings = (const char*)&contents[ranlibArrayLen+8]; + if ( ((uint8_t*)(&_tableOfContents[_tableOfContentCount]) > &fileContent[fileLength]) + || ((uint8_t*)_tableOfContentStrings > &fileContent[fileLength]) ) + throw "malformed archive, perhaps wrong architecture"; + this->buildHashTable(); + } + else + throw "archive has no table of contents"; + } +} + +template <> +bool File::memberHasObjCCategories(const Entry* member) const +{ + // i386 uses ObjC1 ABI which has .objc_category* global symbols + return false; +} + +template <> +bool File::memberHasObjCCategories(const Entry* member) const +{ + // ppc uses ObjC1 ABI which has .objc_category* global symbols + return false; +} + + +template +bool File::memberHasObjCCategories(const Entry* member) const +{ + // x86_64 and ARM use ObjC2 which has no global symbol for categories + return mach_o::relocatable::hasObjC2Categories(member->content()); +} + + +template +ld::relocatable::File* File::makeObjectFileForMember(const Entry* member) const +{ + const char* memberName = member->name(); + char memberPath[strlen(this->path()) + strlen(memberName)+4]; + strcpy(memberPath, this->path()); + strcat(memberPath, "("); + strcat(memberPath, memberName); + strcat(memberPath, ")"); + //fprintf(stderr, "using %s from %s\n", memberName, this->path()); + try { + // range check + if ( member > (Entry*)(_archiveFileContent+_archiveFilelength) ) + throwf("corrupt archive, member starts past end of file"); + if ( (member->content() + member->contentSize()) > (_archiveFileContent+_archiveFilelength) ) + throwf("corrupt archive, member contents extends past end of file"); + const char* mPath = strdup(memberPath); + // offset the ordinals in this mach-o .o file, so that atoms layout in same order as in archive + uint32_t memberIndex = ((uint8_t*)member - _archiveFileContent)/sizeof(ar_hdr); + // see if member is mach-o file + ld::relocatable::File* result = mach_o::relocatable::parse(member->content(), member->contentSize(), + mPath, member->modificationTime(), + this->ordinal() + memberIndex, _objOpts); + if ( result != NULL ) + return result; + // see if member is llvm bitcode file + result = lto::parse(member->content(), member->contentSize(), + mPath, member->modificationTime(), this->ordinal() + memberIndex, + _objOpts.architecture, _objOpts.subType, _logAllFiles); + if ( result != NULL ) + return result; + + throwf("archive member '%s' with length %d is not mach-o or llvm bitcode", memberName, member->contentSize()); + } + catch (const char* msg) { + throwf("in %s, %s", memberPath, msg); + } +} + + +template +bool File::forEachAtom(ld::File::AtomHandler& handler) const +{ + bool didSome = false; + if ( _forceLoadAll || _forceLoadThis ) { + // call handler on all .o files in this archive + const Entry* const start = (Entry*)&_archiveFileContent[8]; + const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength]; + for (const Entry* p=start; p < end; p = p->next()) { + const char* memberName = p->name(); + if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) ) + continue; + if ( _verboseLoad ) { + if ( _forceLoadThis ) + printf("-force_load forced load of %s(%s)\n", this->path(), memberName); + else + printf("-all_load forced load of %s(%s)\n", this->path(), memberName); + } + ld::relocatable::File* file = this->makeObjectFileForMember(p); + didSome |= file->forEachAtom(handler); + } + } + else if ( _forceLoadObjC ) { + // call handler on all .o files in this archive containing objc classes + for(typename NameToEntryMap::const_iterator it = _hashTable.begin(); it != _hashTable.end(); ++it) { + if ( (strncmp(it->first, ".objc_c", 7) == 0) || (strncmp(it->first, "_OBJC_CLASS_$_", 14) == 0) ) { + const Entry* member = (Entry*)&_archiveFileContent[E::get32(it->second->ran_off)]; + if ( _instantiatedEntries.count(member) == 0 ) { + if ( _verboseLoad ) + printf("-ObjC forced load of %s(%s)\n", this->path(), member->name()); + // only return these atoms once + _instantiatedEntries.insert(member); + ld::relocatable::File* file = this->makeObjectFileForMember(member); + didSome |= file->forEachAtom(handler); + _instantiatedFiles.push_back(file); + } + } + } + // ObjC2 has no symbols in .o files with categories, but not classes, look deeper for those + const Entry* const start = (Entry*)&_archiveFileContent[8]; + const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength]; + for (const Entry* member=start; member < end; member = member->next()) { + // only look at files not already instantiated + if ( _instantiatedEntries.count(member) == 0 ) { + //fprintf(stderr, "checking member %s\n", member->name()); + if ( this->memberHasObjCCategories(member) ) { + if ( _verboseLoad ) + printf("-ObjC forced load of %s(%s)\n", this->path(), member->name()); + // only return these atoms once + _instantiatedEntries.insert(member); + ld::relocatable::File* file = this->makeObjectFileForMember(member); + didSome |= file->forEachAtom(handler); + _instantiatedFiles.push_back(file); + } + } + } + } + return didSome; +} + +template +bool File::justInTimeforEachAtom(const char* name, ld::File::AtomHandler& handler) const +{ + // in force load case, all members already loaded + if ( _forceLoadAll || _forceLoadThis ) + return false; + + // do a hash search of table of contents looking for requested symbol + const struct ranlib* result = ranlibHashSearch(name); + if ( result != NULL ) { + const Entry* member = (Entry*)&_archiveFileContent[E::get32(result->ran_off)]; + // only call handler for each member once + if ( _instantiatedEntries.count(member) == 0 ) { + _instantiatedEntries.insert(member); + if ( _verboseLoad ) + printf("%s forced load of %s(%s)\n", name, this->path(), member->name()); + ld::relocatable::File* file = this->makeObjectFileForMember(member); + _instantiatedFiles.push_back(file); + return file->forEachAtom(handler); + } + } + //fprintf(stderr, "%s NOT found in archive %s\n", name, this->path()); + return false; +} + + +typedef const struct ranlib* ConstRanLibPtr; + +template +ConstRanLibPtr File::ranlibHashSearch(const char* name) const +{ + typename NameToEntryMap::const_iterator pos = _hashTable.find(name); + if ( pos != _hashTable.end() ) + return pos->second; + else + return NULL; +} + +template +void File::buildHashTable() +{ + // walk through list backwards, adding/overwriting entries + // this assures that with duplicates those earliest in the list will be found + for (int i = _tableOfContentCount-1; i >= 0; --i) { + const struct ranlib* entry = &_tableOfContents[i]; + const char* entryName = &_tableOfContentStrings[E::get32(entry->ran_un.ran_strx)]; + if ( E::get32(entry->ran_off) > _archiveFilelength ) { + throwf("malformed archive TOC entry for %s, offset %d is beyond end of file %lld\n", + entryName, entry->ran_off, _archiveFilelength); + } + + //const Entry* member = (Entry*)&_archiveFileContent[E::get32(entry->ran_off)]; + //fprintf(stderr, "adding hash %d, %s -> %p\n", i, entryName, entry); + _hashTable[entryName] = entry; + } +} + +template +void File::dumpTableOfContents() +{ + for (unsigned int i=0; i < _tableOfContentCount; ++i) { + const struct ranlib* e = &_tableOfContents[i]; + printf("%s in %s\n", &_tableOfContentStrings[E::get32(e->ran_un.ran_strx)], ((Entry*)&_archiveFileContent[E::get32(e->ran_off)])->name()); + } +} + + +// +// main function used by linker to instantiate archive files +// +ld::File* parse(const uint8_t* fileContent, uint64_t fileLength, + const char* path, time_t modTime, uint32_t ordinal, const ParserOptions& opts) +{ + switch ( opts.objOpts.architecture ) { + case CPU_TYPE_X86_64: + if ( archive::Parser::validFile(fileContent, fileLength, opts.objOpts) ) + return archive::Parser::parse(fileContent, fileLength, path, modTime, ordinal, opts); + break; + case CPU_TYPE_I386: + if ( archive::Parser::validFile(fileContent, fileLength, opts.objOpts) ) + return archive::Parser::parse(fileContent, fileLength, path, modTime, ordinal, opts); + break; + case CPU_TYPE_ARM: + if ( archive::Parser::validFile(fileContent, fileLength, opts.objOpts) ) + return archive::Parser::parse(fileContent, fileLength, path, modTime, ordinal, opts); + break; + case CPU_TYPE_POWERPC: + if ( archive::Parser::validFile(fileContent, fileLength, opts.objOpts) ) + return archive::Parser::parse(fileContent, fileLength, path, modTime, ordinal, opts); + break; + case CPU_TYPE_POWERPC64: + if ( archive::Parser::validFile(fileContent, fileLength, opts.objOpts) ) + return archive::Parser::parse(fileContent, fileLength, path, modTime, ordinal, opts); + break; + } + return NULL; +} + + + +}; // namespace archive + +