]> git.saurik.com Git - apple/ld64.git/blobdiff - src/ld/parsers/archive_file.cpp
ld64-123.2.tar.gz
[apple/ld64.git] / src / ld / parsers / archive_file.cpp
diff --git a/src/ld/parsers/archive_file.cpp b/src/ld/parsers/archive_file.cpp
new file mode 100644 (file)
index 0000000..8c866cd
--- /dev/null
@@ -0,0 +1,522 @@
+/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
+ *
+ * Copyright (c) 2005-2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <stdint.h>
+#include <math.h>
+#include <unistd.h>
+#include <sys/param.h>
+#include <mach-o/ranlib.h>
+#include <ar.h>
+
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <ext/hash_map>
+
+#include "MachOFileAbstraction.hpp"
+#include "Architectures.hpp"
+
+#include "macho_relocatable_file.h"
+#include "lto_file.h"
+#include "archive_file.h"
+
+
+namespace archive {
+
+typedef const struct ranlib* ConstRanLibPtr;
+
+// forward reference
+template <typename A> class File;
+
+
+template <typename A>
+class Parser 
+{
+public:
+       typedef typename A::P                                   P;
+
+       static bool                                                                             validFile(const uint8_t* fileContent, uint64_t fileLength, 
+                                                                                                                               const mach_o::relocatable::ParserOptions& opts) {
+                                                                                                               return File<A>::validFile(fileContent, fileLength, opts); }
+       static File<A>*                                                                 parse(const uint8_t* fileContent, uint64_t fileLength, 
+                                                                                                                       const char* path, time_t mTime, 
+                                                                                                                       uint32_t ordinal, const ParserOptions& opts) {
+                                                                                                                        return new File<A>(fileContent, fileLength, path, mTime,
+                                                                                                                                                       ordinal, opts);
+                                                                                                               }
+
+};
+
+template <typename A>
+class File : public ld::File
+{
+public:
+       static bool                                                                             validFile(const uint8_t* fileContent, uint64_t fileLength,
+                                                                                                                               const mach_o::relocatable::ParserOptions& opts);
+                                                                                                       File(const uint8_t* fileContent, uint64_t fileLength,
+                                                                                                                       const char* pth, time_t modTime, 
+                                                                                                                       uint32_t ord, const ParserOptions& opts);
+       virtual                                                                                 ~File() {}
+
+       // overrides of ld::File
+       virtual bool                                                                            forEachAtom(ld::File::AtomHandler&) const;
+       virtual bool                                                                            justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const;
+       virtual uint32_t                                                                        subFileCount() const  { return _archiveFilelength/sizeof(ar_hdr); }
+       
+private:
+       static bool                                                                             validMachOFile(const uint8_t* fileContent, uint64_t fileLength, 
+                                                                                                                                       const mach_o::relocatable::ParserOptions& opts);
+       static bool                                                                             validLTOFile(const uint8_t* fileContent, uint64_t fileLength, 
+                                                                                                                                       const mach_o::relocatable::ParserOptions& opts);
+       static cpu_type_t                                                               architecture();
+
+
+       class Entry : ar_hdr
+       {
+       public:
+               const char*                     name() const;
+               time_t                          modificationTime() const;
+               const uint8_t*          content() const;
+               uint32_t                        contentSize() const;
+               const Entry*            next() const;
+       private:
+               bool                            hasLongName() const;
+               unsigned int            getLongNameSpace() const;
+
+       };
+
+       class CStringEquals
+       {
+       public:
+               bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
+       };
+       typedef __gnu_cxx::hash_map<const char*, const struct ranlib*, __gnu_cxx::hash<const char*>, CStringEquals> NameToEntryMap;
+
+       typedef typename A::P                                                   P;
+       typedef typename A::P::E                                                E;
+
+       const struct ranlib*                                                    ranlibHashSearch(const char* name) const;
+       ld::relocatable::File*                                                  makeObjectFileForMember(const Entry* member) const;
+       bool                                                                                    memberHasObjCCategories(const Entry* member) const;
+       void                                                                                    dumpTableOfContents();
+       void                                                                                    buildHashTable();
+
+       const uint8_t*                                                                  _archiveFileContent;
+       uint64_t                                                                                _archiveFilelength;
+       const struct ranlib*                                                    _tableOfContents;
+       uint32_t                                                                                _tableOfContentCount;
+       const char*                                                                             _tableOfContentStrings;
+       mutable std::vector<ld::relocatable::File*>             _instantiatedFiles;
+       mutable std::set<const class Entry*>                    _instantiatedEntries;
+       NameToEntryMap                                                                  _hashTable;
+       const bool                                                                              _forceLoadAll;
+       const bool                                                                              _forceLoadObjC;
+       const bool                                                                              _forceLoadThis;
+       const bool                                                                              _verboseLoad;
+       const bool                                                                              _logAllFiles;
+       const mach_o::relocatable::ParserOptions                _objOpts;
+};
+
+
+template <typename A>
+bool File<A>::Entry::hasLongName() const
+{
+       return ( strncmp(this->ar_name, AR_EFMT1, strlen(AR_EFMT1)) == 0 );
+}
+
+template <typename A>
+unsigned int File<A>::Entry::getLongNameSpace() const
+{
+       char* endptr;
+       long result = strtol(&this->ar_name[strlen(AR_EFMT1)], &endptr, 10);
+       return result;
+}
+
+template <typename A>
+const char* File<A>::Entry::name() const
+{
+       if ( this->hasLongName() ) {
+               int len = this->getLongNameSpace();
+               static char longName[256];
+               strncpy(longName, ((char*)this)+sizeof(ar_hdr), len);
+               longName[len] = '\0';
+               return longName;
+       }
+       else {
+               static char shortName[20];
+               strncpy(shortName, this->ar_name, 16);
+               shortName[16] = '\0';
+               char* space = strchr(shortName, ' ');
+               if ( space != NULL )
+                       *space = '\0';
+               return shortName;
+       }
+}
+
+template <typename A>
+time_t File<A>::Entry::modificationTime() const
+{
+       char temp[14];
+       strncpy(temp, this->ar_date, 12);
+       temp[12] = '\0';
+       char* endptr;
+       return (time_t)strtol(temp, &endptr, 10);
+}
+
+
+template <typename A>
+const uint8_t* File<A>::Entry::content() const
+{
+       if ( this->hasLongName() )
+               return ((uint8_t*)this) + sizeof(ar_hdr) + this->getLongNameSpace();
+       else
+               return ((uint8_t*)this) + sizeof(ar_hdr);
+}
+
+
+template <typename A>
+uint32_t File<A>::Entry::contentSize() const
+{
+       char temp[12];
+       strncpy(temp, this->ar_size, 10);
+       temp[10] = '\0';
+       char* endptr;
+       long size = strtol(temp, &endptr, 10);
+       // long name is included in ar_size
+       if ( this->hasLongName() )
+               size -= this->getLongNameSpace();
+       return size;
+}
+
+
+template <typename A>
+const class File<A>::Entry* File<A>::Entry::next() const
+{
+       const uint8_t* p = this->content() + contentSize();
+       p = (const uint8_t*)(((uintptr_t)p+3) & (-4));  // 4-byte align
+       return (class File<A>::Entry*)p;
+}
+
+
+template <> cpu_type_t File<ppc>::architecture()    { return CPU_TYPE_POWERPC; }
+template <> cpu_type_t File<ppc64>::architecture()  { return CPU_TYPE_POWERPC64; }
+template <> cpu_type_t File<x86>::architecture()    { return CPU_TYPE_I386; }
+template <> cpu_type_t File<x86_64>::architecture() { return CPU_TYPE_X86_64; }
+template <> cpu_type_t File<arm>::architecture()    { return CPU_TYPE_ARM; }
+
+
+template <typename A>
+bool File<A>::validMachOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
+{      
+       return mach_o::relocatable::isObjectFile(fileContent, fileLength, opts);
+}
+
+template <typename A>
+bool File<A>::validLTOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
+{
+       return lto::isObjectFile(fileContent, fileLength, opts.architecture, opts.subType);
+}
+
+
+
+template <typename A>
+bool File<A>::validFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
+{
+       // must have valid archive header
+       if ( strncmp((const char*)fileContent, "!<arch>\n", 8) != 0 )
+               return false;
+               
+       // peak at first .o file and verify it is correct architecture
+       const Entry* const start = (Entry*)&fileContent[8];
+       const Entry* const end = (Entry*)&fileContent[fileLength];
+       for (const Entry* p=start; p < end; p = p->next()) {
+               const char* memberName = p->name();
+               // skip option table-of-content member
+               if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) )
+                       continue;
+               // archive is valid if first .o file is valid
+               return (validMachOFile(p->content(), p->contentSize(), opts) || validLTOFile(p->content(), p->contentSize(), opts));
+       }       
+       // empty archive
+       return true;
+}
+
+
+template <typename A>
+File<A>::File(const uint8_t fileContent[], uint64_t fileLength, const char* pth, time_t modTime, 
+                                       uint32_t ord, const ParserOptions& opts)
+ : ld::File(strdup(pth), modTime, ord),
+       _archiveFileContent(fileContent), _archiveFilelength(fileLength), 
+       _tableOfContents(NULL), _tableOfContentCount(0), _tableOfContentStrings(NULL), 
+       _forceLoadAll(opts.forceLoadAll), _forceLoadObjC(opts.forceLoadObjC), 
+       _forceLoadThis(opts.forceLoadThisArchive), _verboseLoad(opts.verboseLoad), 
+       _logAllFiles(opts.logAllFiles), _objOpts(opts.objOpts)
+{
+       if ( strncmp((const char*)fileContent, "!<arch>\n", 8) != 0 )
+               throw "not an archive";
+
+       if ( !_forceLoadAll ) {
+               const Entry* const firstMember = (Entry*)&_archiveFileContent[8];
+               if ( (strcmp(firstMember->name(), SYMDEF_SORTED) == 0) || (strcmp(firstMember->name(), SYMDEF) == 0) ) {
+                       const uint8_t* contents = firstMember->content();
+                       uint32_t ranlibArrayLen = E::get32(*((uint32_t*)contents));
+                       _tableOfContents = (const struct ranlib*)&contents[4];
+                       _tableOfContentCount = ranlibArrayLen / sizeof(struct ranlib);
+                       _tableOfContentStrings = (const char*)&contents[ranlibArrayLen+8];
+                       if ( ((uint8_t*)(&_tableOfContents[_tableOfContentCount]) > &fileContent[fileLength])
+                               || ((uint8_t*)_tableOfContentStrings > &fileContent[fileLength]) )
+                               throw "malformed archive, perhaps wrong architecture";
+                       this->buildHashTable();
+               }
+               else
+                       throw "archive has no table of contents";
+       }
+}
+
+template <>
+bool File<x86>::memberHasObjCCategories(const Entry* member) const
+{
+       // i386 uses ObjC1 ABI which has .objc_category* global symbols
+       return false;
+}
+
+template <>
+bool File<ppc>::memberHasObjCCategories(const Entry* member) const
+{
+       // ppc uses ObjC1 ABI which has .objc_category* global symbols
+       return false;
+}
+
+
+template <typename A>
+bool File<A>::memberHasObjCCategories(const Entry* member) const
+{
+       // x86_64 and ARM use ObjC2 which has no global symbol for categories
+       return mach_o::relocatable::hasObjC2Categories(member->content());
+}
+
+
+template <typename A>
+ld::relocatable::File* File<A>::makeObjectFileForMember(const Entry* member) const
+{
+       const char* memberName = member->name();
+       char memberPath[strlen(this->path()) + strlen(memberName)+4];
+       strcpy(memberPath, this->path());
+       strcat(memberPath, "(");
+       strcat(memberPath, memberName);
+       strcat(memberPath, ")");
+       //fprintf(stderr, "using %s from %s\n", memberName, this->path());
+       try {
+               // range check
+               if ( member > (Entry*)(_archiveFileContent+_archiveFilelength) )
+                       throwf("corrupt archive, member starts past end of file");                                                                              
+               if ( (member->content() + member->contentSize()) > (_archiveFileContent+_archiveFilelength) )
+                       throwf("corrupt archive, member contents extends past end of file");                                                                            
+               const char* mPath = strdup(memberPath);
+               // offset the ordinals in this mach-o .o file, so that atoms layout in same order as in archive
+               uint32_t memberIndex = ((uint8_t*)member - _archiveFileContent)/sizeof(ar_hdr);
+               // see if member is mach-o file
+               ld::relocatable::File* result = mach_o::relocatable::parse(member->content(), member->contentSize(), 
+                                                                                                                                       mPath, member->modificationTime(), 
+                                                                                                                                       this->ordinal() + memberIndex, _objOpts);
+               if ( result != NULL )
+                       return result;
+               // see if member is llvm bitcode file
+               result = lto::parse(member->content(), member->contentSize(), 
+                                                               mPath, member->modificationTime(), this->ordinal() + memberIndex, 
+                                                               _objOpts.architecture, _objOpts.subType, _logAllFiles);
+               if ( result != NULL )
+                       return result;
+                       
+               throwf("archive member '%s' with length %d is not mach-o or llvm bitcode", memberName, member->contentSize());
+       }
+       catch (const char* msg) {
+               throwf("in %s, %s", memberPath, msg);
+       }
+}
+
+
+template <typename A>
+bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
+{
+       bool didSome = false;
+       if ( _forceLoadAll || _forceLoadThis ) {
+               // call handler on all .o files in this archive
+               const Entry* const start = (Entry*)&_archiveFileContent[8];
+               const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength];
+               for (const Entry* p=start; p < end; p = p->next()) {
+                       const char* memberName = p->name();
+                       if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) )
+                               continue;
+                       if ( _verboseLoad ) {
+                               if ( _forceLoadThis )
+                                       printf("-force_load forced load of %s(%s)\n", this->path(), memberName);
+                               else
+                                       printf("-all_load forced load of %s(%s)\n", this->path(), memberName);
+                       }
+                       ld::relocatable::File* file = this->makeObjectFileForMember(p);
+                       didSome |= file->forEachAtom(handler);
+               }
+       }
+       else if ( _forceLoadObjC ) {
+               // call handler on all .o files in this archive containing objc classes
+               for(typename NameToEntryMap::const_iterator it = _hashTable.begin(); it != _hashTable.end(); ++it) {
+                       if ( (strncmp(it->first, ".objc_c", 7) == 0) || (strncmp(it->first, "_OBJC_CLASS_$_", 14) == 0) ) {
+                               const Entry* member = (Entry*)&_archiveFileContent[E::get32(it->second->ran_off)];
+                               if ( _instantiatedEntries.count(member) == 0 ) {
+                                       if ( _verboseLoad )
+                                               printf("-ObjC forced load of %s(%s)\n", this->path(), member->name());
+                                       // only return these atoms once
+                                       _instantiatedEntries.insert(member);
+                                       ld::relocatable::File* file = this->makeObjectFileForMember(member);
+                                       didSome |= file->forEachAtom(handler);
+                                       _instantiatedFiles.push_back(file);
+                               }
+                       }
+               }
+               // ObjC2 has no symbols in .o files with categories, but not classes, look deeper for those
+               const Entry* const start = (Entry*)&_archiveFileContent[8];
+               const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength];
+               for (const Entry* member=start; member < end; member = member->next()) {
+                       // only look at files not already instantiated
+                       if ( _instantiatedEntries.count(member) == 0 ) {
+                               //fprintf(stderr, "checking member %s\n", member->name());
+                               if ( this->memberHasObjCCategories(member) ) {
+                                       if ( _verboseLoad )
+                                               printf("-ObjC forced load of %s(%s)\n", this->path(), member->name());
+                                       // only return these atoms once
+                                       _instantiatedEntries.insert(member);
+                                       ld::relocatable::File* file = this->makeObjectFileForMember(member);
+                                       didSome |= file->forEachAtom(handler);
+                                       _instantiatedFiles.push_back(file);
+                               }
+                       }
+               }
+       }
+       return didSome;
+}
+
+template <typename A>
+bool File<A>::justInTimeforEachAtom(const char* name, ld::File::AtomHandler& handler) const
+{
+       // in force load case, all members already loaded
+       if ( _forceLoadAll || _forceLoadThis ) 
+               return false;
+       
+       // do a hash search of table of contents looking for requested symbol
+       const struct ranlib* result = ranlibHashSearch(name);
+       if ( result != NULL ) {
+               const Entry* member = (Entry*)&_archiveFileContent[E::get32(result->ran_off)];
+               // only call handler for each member once
+               if ( _instantiatedEntries.count(member) == 0 ) {
+                       _instantiatedEntries.insert(member);
+                       if ( _verboseLoad ) 
+                               printf("%s forced load of %s(%s)\n", name, this->path(), member->name());
+                       ld::relocatable::File* file = this->makeObjectFileForMember(member);
+                       _instantiatedFiles.push_back(file);
+                       return file->forEachAtom(handler);
+               }
+       }
+       //fprintf(stderr, "%s NOT found in archive %s\n", name, this->path());
+       return false;
+}
+
+
+typedef const struct ranlib* ConstRanLibPtr;
+
+template <typename A>
+ConstRanLibPtr  File<A>::ranlibHashSearch(const char* name) const
+{
+       typename NameToEntryMap::const_iterator pos = _hashTable.find(name);
+       if ( pos != _hashTable.end() )
+               return pos->second;
+       else
+               return NULL;
+}
+
+template <typename A>
+void File<A>::buildHashTable()
+{
+       // walk through list backwards, adding/overwriting entries
+       // this assures that with duplicates those earliest in the list will be found
+       for (int i = _tableOfContentCount-1; i >= 0; --i) {
+               const struct ranlib* entry = &_tableOfContents[i];
+               const char* entryName = &_tableOfContentStrings[E::get32(entry->ran_un.ran_strx)];
+               if ( E::get32(entry->ran_off) > _archiveFilelength ) {
+                       throwf("malformed archive TOC entry for %s, offset %d is beyond end of file %lld\n",
+                               entryName, entry->ran_off, _archiveFilelength);
+               }
+               
+               //const Entry* member = (Entry*)&_archiveFileContent[E::get32(entry->ran_off)];
+               //fprintf(stderr, "adding hash %d, %s -> %p\n", i, entryName, entry);
+               _hashTable[entryName] = entry;
+       }
+}
+
+template <typename A>
+void File<A>::dumpTableOfContents()
+{
+       for (unsigned int i=0; i < _tableOfContentCount; ++i) {
+               const struct ranlib* e = &_tableOfContents[i];
+               printf("%s in %s\n", &_tableOfContentStrings[E::get32(e->ran_un.ran_strx)], ((Entry*)&_archiveFileContent[E::get32(e->ran_off)])->name());
+       }
+}
+
+
+//
+// main function used by linker to instantiate archive files
+//
+ld::File* parse(const uint8_t* fileContent, uint64_t fileLength, 
+                               const char* path, time_t modTime, uint32_t ordinal, const ParserOptions& opts)
+{
+       switch ( opts.objOpts.architecture ) {
+               case CPU_TYPE_X86_64:
+                       if ( archive::Parser<x86_64>::validFile(fileContent, fileLength, opts.objOpts) )
+                               return archive::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
+                       break;
+               case CPU_TYPE_I386:
+                       if ( archive::Parser<x86>::validFile(fileContent, fileLength, opts.objOpts) )
+                               return archive::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
+                       break;
+               case CPU_TYPE_ARM:
+                       if ( archive::Parser<arm>::validFile(fileContent, fileLength, opts.objOpts) )
+                               return archive::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
+                       break;
+               case CPU_TYPE_POWERPC:
+                       if ( archive::Parser<ppc>::validFile(fileContent, fileLength, opts.objOpts) )
+                               return archive::Parser<ppc>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
+                       break;
+               case CPU_TYPE_POWERPC64:
+                       if ( archive::Parser<ppc64>::validFile(fileContent, fileLength, opts.objOpts) )
+                               return archive::Parser<ppc64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
+                       break;
+       }
+       return NULL;
+}
+
+
+
+}; // namespace archive
+
+