]> git.saurik.com Git - apple/ld64.git/blobdiff - src/machochecker.cpp
ld64-47.2.tar.gz
[apple/ld64.git] / src / machochecker.cpp
diff --git a/src/machochecker.cpp b/src/machochecker.cpp
new file mode 100644 (file)
index 0000000..55f9fd9
--- /dev/null
@@ -0,0 +1,519 @@
+/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- 
+ *
+ * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <mach-o/loader.h>
+#include <mach-o/fat.h>
+#include <mach-o/stab.h>
+
+#include <vector>
+
+#include "MachOFileAbstraction.hpp"
+#include "Architectures.hpp"
+
+
+ __attribute__((noreturn))
+void throwf(const char* format, ...) 
+{
+       va_list list;
+       char*   p;
+       va_start(list, format);
+       vasprintf(&p, format, list);
+       va_end(list);
+       
+       const char*     t = p;
+       throw t;
+}
+
+
+template <typename A>
+class MachOChecker
+{
+public:
+       static bool                                                                     validFile(const uint8_t* fileContent);
+       static MachOChecker<A>*                                         make(const uint8_t* fileContent, uint32_t fileLength, const char* path) 
+                                                                                                               { return new MachOChecker<A>(fileContent, fileLength, path); }
+       virtual                                                                         ~MachOChecker() {}
+
+
+private:
+       typedef typename A::P                                   P;
+       typedef typename A::P::E                                E;
+       typedef typename A::P::uint_t                   pint_t;
+       
+                                                                                               MachOChecker(const uint8_t* fileContent, uint32_t fileLength, const char* path);
+       void                                                                            checkMachHeader();
+       void                                                                            checkLoadCommands();
+       void                                                                            checkSection(const macho_segment_command<P>* segCmd, const macho_section<P>* sect);
+       uint8_t                                                                         loadCommandSizeMask();
+       void                                                                            checkIndirectSymbolTable();
+
+       const char*                                                                     fPath;
+       const macho_header<P>*                                          fHeader;
+       uint32_t                                                                        fLength;
+       const char*                                                                     fStrings;
+       const char*                                                                     fStringsEnd;
+       const macho_nlist<P>*                                           fSymbols;
+       uint32_t                                                                        fSymbolCount;
+       const uint32_t*                                                         fIndirectTable;
+       uint32_t                                                                        fIndirectTableCount;
+
+};
+
+
+
+template <>
+bool MachOChecker<ppc>::validFile(const uint8_t* fileContent)
+{      
+       const macho_header<P>* header = (const macho_header<P>*)fileContent;
+       if ( header->magic() != MH_MAGIC )
+               return false;
+       if ( header->cputype() != CPU_TYPE_POWERPC )
+               return false;
+       switch (header->filetype()) {
+               case MH_EXECUTE:
+               case MH_DYLIB:
+               case MH_BUNDLE:
+               case MH_DYLINKER:
+                       return true;
+       }
+       return false;
+}
+
+template <>
+bool MachOChecker<ppc64>::validFile(const uint8_t* fileContent)
+{      
+       const macho_header<P>* header = (const macho_header<P>*)fileContent;
+       if ( header->magic() != MH_MAGIC_64 )
+               return false;
+       if ( header->cputype() != CPU_TYPE_POWERPC64 )
+               return false;
+       switch (header->filetype()) {
+               case MH_EXECUTE:
+               case MH_DYLIB:
+               case MH_BUNDLE:
+               case MH_DYLINKER:
+                       return true;
+       }
+       return false;
+}
+
+template <>
+bool MachOChecker<x86>::validFile(const uint8_t* fileContent)
+{      
+       const macho_header<P>* header = (const macho_header<P>*)fileContent;
+       if ( header->magic() != MH_MAGIC )
+               return false;
+       if ( header->cputype() != CPU_TYPE_I386 )
+               return false;
+       switch (header->filetype()) {
+               case MH_EXECUTE:
+               case MH_DYLIB:
+               case MH_BUNDLE:
+               case MH_DYLINKER:
+                       return true;
+       }
+       return false;
+}
+
+
+
+template <> uint8_t MachOChecker<ppc>::loadCommandSizeMask()   { return 0x03; }
+template <> uint8_t MachOChecker<ppc64>::loadCommandSizeMask() { return 0x07; }
+template <> uint8_t MachOChecker<x86>::loadCommandSizeMask()   { return 0x03; }
+
+
+template <typename A>
+MachOChecker<A>::MachOChecker(const uint8_t* fileContent, uint32_t fileLength, const char* path)
+ : fHeader(NULL), fLength(fileLength), fStrings(NULL), fSymbols(NULL), fSymbolCount(0), fIndirectTableCount(0)
+{
+       // sanity check
+       if ( ! validFile(fileContent) )
+               throw "not a mach-o file that can be checked";
+
+       fPath = strdup(path);
+       fHeader = (const macho_header<P>*)fileContent;
+       
+       // sanity check header
+       checkMachHeader();
+       
+       // check load commands
+       checkLoadCommands();
+       
+       checkIndirectSymbolTable();
+
+}
+
+
+template <typename A>
+void MachOChecker<A>::checkMachHeader()
+{
+       if ( (fHeader->sizeofcmds() + sizeof(macho_header<P>)) > fLength )
+               throw "sizeofcmds in mach_header is larger than file";
+       
+       uint32_t flags = fHeader->flags();
+       uint32_t invalidBits = MH_INCRLINK | MH_LAZY_INIT | 0xFFFC0000;
+       if ( flags & invalidBits )
+               throw "invalid bits in mach_header flags";
+               
+}
+
+template <typename A>
+void MachOChecker<A>::checkLoadCommands()
+{
+       // check that all load commands fit within the load command space file
+       const uint8_t* const endOfFile = (uint8_t*)fHeader + fLength;
+       const uint8_t* const endOfLoadCommands = (uint8_t*)fHeader + sizeof(macho_header<P>) + fHeader->sizeofcmds();
+       const uint32_t cmd_count = fHeader->ncmds();
+       const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
+       const macho_load_command<P>* cmd = cmds;
+       for (uint32_t i = 0; i < cmd_count; ++i) {
+               uint32_t size = cmd->cmdsize();
+               if ( (size & this->loadCommandSizeMask()) != 0 )
+                       throwf("load command #%d has a unaligned size", i);
+               const uint8_t* endOfCmd = ((uint8_t*)cmd)+cmd->cmdsize();
+               if ( endOfCmd > endOfLoadCommands )
+                       throwf("load command #%d extends beyond the end of the load commands", i);
+               if ( endOfCmd > endOfFile )
+                       throwf("load command #%d extends beyond the end of the file", i);
+               switch ( cmd->cmd()     ) {
+                       case macho_segment_command<P>::CMD:
+                       case LC_SYMTAB:
+                       case LC_UNIXTHREAD:
+                       case LC_DYSYMTAB:
+                       case LC_LOAD_DYLIB:
+                       case LC_ID_DYLIB:
+                       case LC_LOAD_DYLINKER:
+                       case LC_ID_DYLINKER:
+                       case macho_routines_command<P>::CMD:
+                       case LC_SUB_FRAMEWORK:
+                       case LC_SUB_UMBRELLA:
+                       case LC_SUB_CLIENT:
+                       case LC_TWOLEVEL_HINTS:
+                       case LC_PREBIND_CKSUM:
+                       case LC_LOAD_WEAK_DYLIB:
+                       case LC_UUID:
+                               break;
+                       default:
+                               throwf("load command #%d is an unknown kind 0x%X", i, cmd->cmd());
+               }
+               cmd = (const macho_load_command<P>*)endOfCmd;
+       }
+       
+       // check segments
+       cmd = cmds;
+       std::vector<std::pair<pint_t, pint_t> > segmentAddressRanges;
+       std::vector<std::pair<pint_t, pint_t> > segmentFileOffsetRanges;
+       const macho_segment_command<P>* linkEditSegment = NULL;
+       for (uint32_t i = 0; i < cmd_count; ++i) {
+               if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
+                       const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
+                       if ( segCmd->cmdsize() != (sizeof(macho_segment_command<P>) + segCmd->nsects() * sizeof(macho_section_content<P>)) )
+                               throw "invalid segment load command size";
+                               
+                       // see if this overlaps another segment address range
+                       uint64_t startAddr = segCmd->vmaddr();
+                       uint64_t endAddr = startAddr + segCmd->vmsize();
+                       for (typename std::vector<std::pair<pint_t, pint_t> >::iterator it = segmentAddressRanges.begin(); it != segmentAddressRanges.end(); ++it) {
+                               if ( it->first < startAddr ) {
+                                       if ( it->second > startAddr )
+                                               throw "overlapping segment vm addresses";
+                               }
+                               else if ( it->first > startAddr ) {
+                                       if ( it->first < endAddr )
+                                               throw "overlapping segment vm addresses";
+                               }
+                               else {
+                                       throw "overlapping segment vm addresses";
+                               }
+                               segmentAddressRanges.push_back(std::make_pair<pint_t, pint_t>(startAddr, endAddr));
+                       }
+                       // see if this overlaps another segment file offset range
+                       uint64_t startOffset = segCmd->fileoff();
+                       uint64_t endOffset = startOffset + segCmd->filesize();
+                       for (typename std::vector<std::pair<pint_t, pint_t> >::iterator it = segmentFileOffsetRanges.begin(); it != segmentFileOffsetRanges.end(); ++it) {
+                               if ( it->first < startOffset ) {
+                                       if ( it->second > startOffset )
+                                               throw "overlapping segment file data";
+                               }
+                               else if ( it->first > startOffset ) {
+                                       if ( it->first < endOffset )
+                                               throw "overlapping segment file data";
+                               }
+                               else {
+                                       throw "overlapping segment file data";
+                               }
+                               segmentFileOffsetRanges.push_back(std::make_pair<pint_t, pint_t>(startOffset, endOffset));
+                               // check is within file bounds
+                               if ( (startOffset > fLength) || (endOffset > fLength) )
+                                       throw "segment file data is past end of file";
+                       }
+                       // verify it fits in file
+                       if ( startOffset > fLength )
+                               throw "segment fileoff does not fit in file";
+                       if ( endOffset > fLength )
+                               throw "segment fileoff+filesize does not fit in file";
+                               
+                       // keep LINKEDIT segment 
+                       if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 )
+                               linkEditSegment = segCmd;
+                               
+                       // check section ranges
+                       const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
+                       const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
+                       for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
+                               // check all sections are within segment
+                               if ( sect->addr() < startAddr )
+                                       throwf("section %s vm address not within segment", sect->sectname());
+                               if ( (sect->addr()+sect->size()) > endAddr )
+                                       throwf("section %s vm address not within segment", sect->sectname());
+                               if ( (sect->flags() &SECTION_TYPE) != S_ZEROFILL ) {
+                                       if ( sect->offset() < startOffset )
+                                               throwf("section %s file offset not within segment", sect->sectname());
+                                       if ( (sect->offset()+sect->size()) > endOffset )
+                                               throwf("section %s file offset not within segment", sect->sectname());
+                               }       
+                               checkSection(segCmd, sect);
+                       }
+               }
+               cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
+       }
+       
+       // verify there was a LINKEDIT segment
+       if ( linkEditSegment == NULL )
+               throw "no __LINKEDIT segment";
+       
+       // checks for executables
+       bool isStaticExecutable = false;
+       if ( fHeader->filetype() == MH_EXECUTE ) {
+               isStaticExecutable = true;
+               cmd = cmds;
+               for (uint32_t i = 0; i < cmd_count; ++i) {
+                       switch ( cmd->cmd() ) {
+                               case LC_LOAD_DYLINKER:
+                                       // the existence of a dyld load command makes a executable dynamic
+                                       isStaticExecutable = false;
+                                       break;
+                       }
+                       cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
+               }
+               if ( isStaticExecutable ) {
+                       if ( fHeader->flags() != MH_NOUNDEFS )
+                               throw "invalid bits in mach_header flags for static executable";
+               }
+       }
+
+       // check LC_SYMTAB and LC_DYSYMTAB
+       cmd = cmds;
+       bool foundDynamicSymTab = false;
+       for (uint32_t i = 0; i < cmd_count; ++i) {
+               switch ( cmd->cmd() ) {
+                       case LC_SYMTAB:
+                               {
+                                       const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
+                                       fSymbolCount = symtab->nsyms();
+                                       fSymbols = (const macho_nlist<P>*)((char*)fHeader + symtab->symoff());
+                                       if ( symtab->symoff() < linkEditSegment->fileoff() )
+                                               throw "symbol table not in __LINKEDIT";
+                                       if ( (symtab->symoff() + fSymbolCount*sizeof(macho_nlist<P>*)) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
+                                               throw "symbol table end not in __LINKEDIT";
+                                       fStrings = (char*)fHeader + symtab->stroff();
+                                       fStringsEnd = fStrings + symtab->strsize();
+                                       if ( symtab->stroff() < linkEditSegment->fileoff() )
+                                               throw "string pool not in __LINKEDIT";
+                                       if ( (symtab->stroff()+symtab->strsize()) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
+                                               throw "string pool extends beyond __LINKEDIT";
+                               }
+                               break;
+                       case LC_DYSYMTAB:
+                               {
+                                       if ( isStaticExecutable )
+                                               throw "LC_DYSYMTAB should not be used in static executable";
+                                       foundDynamicSymTab = true;
+                                       const macho_dysymtab_command<P>* dsymtab = (struct macho_dysymtab_command<P>*)cmd;
+                                       fIndirectTable = (uint32_t*)((char*)fHeader + dsymtab->indirectsymoff());
+                                       fIndirectTableCount = dsymtab->nindirectsyms();
+                                       if ( dsymtab->indirectsymoff() < linkEditSegment->fileoff() )
+                                               throw "indirect symbol table not in __LINKEDIT";
+                                       if ( (dsymtab->indirectsymoff()+fIndirectTableCount*8) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
+                                               throw "indirect symbol table not in __LINKEDIT";
+                               }
+                               break;
+               }
+               cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
+       }
+       if ( !isStaticExecutable && !foundDynamicSymTab )
+               throw "missing dynamic symbol table";
+       if ( fStrings == NULL )
+               throw "missing symbol table";
+       
+       
+       
+}
+
+template <typename A>
+void MachOChecker<A>::checkSection(const macho_segment_command<P>* segCmd, const macho_section<P>* sect)
+{
+       uint8_t sectionType = (sect->flags() & SECTION_TYPE);
+       if ( sectionType == S_ZEROFILL ) {
+               if ( sect->offset() != 0 )
+                       throwf("section offset should be zero for zero-fill section %s", sect->sectname());
+       }
+       
+       // more section tests here
+}
+
+template <typename A>
+void MachOChecker<A>::checkIndirectSymbolTable()
+{
+       const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
+       const uint32_t cmd_count = fHeader->ncmds();
+       const macho_load_command<P>* cmd = cmds;
+       for (uint32_t i = 0; i < cmd_count; ++i) {
+               if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
+                       const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
+                       const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
+                       const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
+                       for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
+                               // make sure all magic sections that use indirect symbol table fit within it
+                               uint32_t start = 0;
+                               uint32_t elementSize = 0;
+                               switch ( sect->flags() & SECTION_TYPE ) {
+                                       case S_SYMBOL_STUBS:
+                                               elementSize = sect->reserved2();
+                                               start = sect->reserved1();
+                                               break;
+                                       case S_LAZY_SYMBOL_POINTERS:
+                                       case S_NON_LAZY_SYMBOL_POINTERS:
+                                               elementSize = sizeof(pint_t);
+                                               start = sect->reserved1();
+                                               break;
+                               }
+                               if ( elementSize != 0 ) {
+                                       uint32_t count = sect->size() / elementSize;
+                                       if ( (count*elementSize) != sect->size() )
+                                               throwf("%s section size is not an even multiple of element size", sect->sectname());
+                                       if ( (start+count) > fIndirectTableCount )
+                                               throwf("%s section references beyond end of indirect symbol table (%d > %d)", sect->sectname(), start+count, fIndirectTableCount );
+                               }
+                       }
+               }
+               cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
+       }
+}
+
+
+static void check(const char* path)
+{
+       struct stat stat_buf;
+       
+       try {
+               int fd = ::open(path, O_RDONLY, 0);
+               if ( fd == -1 )
+                       throw "cannot open file";
+               ::fstat(fd, &stat_buf);
+               uint32_t length = stat_buf.st_size;
+               uint8_t* p = (uint8_t*)::mmap(NULL, stat_buf.st_size, PROT_READ, MAP_FILE, fd, 0);
+               ::close(fd);
+               const mach_header* mh = (mach_header*)p;
+               if ( mh->magic == OSSwapBigToHostInt32(FAT_MAGIC) ) {
+                       const struct fat_header* fh = (struct fat_header*)p;
+                       const struct fat_arch* archs = (struct fat_arch*)(p + sizeof(struct fat_header));
+                       for (unsigned long i=0; i < fh->nfat_arch; ++i) {
+                               if ( archs[i].cputype == CPU_TYPE_POWERPC ) {
+                                       if ( MachOChecker<ppc>::validFile(p + archs[i].offset) )
+                                               MachOChecker<ppc>::make(p + archs[i].offset, archs[i].size, path);
+                                       else
+                                               throw "in universal file, ppc slice does not contain ppc mach-o";
+                               }
+                               else if ( archs[i].cputype == CPU_TYPE_I386 ) {
+                                       if ( MachOChecker<x86>::validFile(p + archs[i].offset) )
+                                               MachOChecker<x86>::make(p + archs[i].offset, archs[i].size, path);
+                                       else
+                                               throw "in universal file, i386 slice does not contain i386 mach-o";
+                               }
+                               else if ( archs[i].cputype == CPU_TYPE_POWERPC64 ) {
+                                       if ( MachOChecker<ppc64>::validFile(p + archs[i].offset) )
+                                               MachOChecker<ppc64>::make(p + archs[i].offset, archs[i].size, path);
+                                       else
+                                               throw "in universal file, ppc64 slice does not contain ppc64 mach-o";
+                               }
+                               else {
+                                               throw "in universal file, unknown architecture slice";
+                               }
+                       }
+               }
+               else if ( MachOChecker<x86>::validFile(p) ) {
+                       MachOChecker<x86>::make(p, length, path);
+               }
+               else if ( MachOChecker<ppc>::validFile(p) ) {
+                       MachOChecker<ppc>::make(p, length, path);
+               }
+               else if ( MachOChecker<ppc64>::validFile(p) ) {
+                       MachOChecker<ppc64>::make(p, length, path);
+               }
+               else {
+                       throw "not a known file type";
+               }
+       }
+       catch (const char* msg) {
+               throwf("%s in %s", msg, path);
+       }
+}
+
+
+int main(int argc, const char* argv[])
+{
+       try {
+               for(int i=1; i < argc; ++i) {
+                       const char* arg = argv[i];
+                       if ( arg[0] == '-' ) {
+                               if ( strcmp(arg, "-no_content") == 0 ) {
+                                       
+                               }
+                               else {
+                                       throwf("unknown option: %s\n", arg);
+                               }
+                       }
+                       else {
+                               check(arg);
+                       }
+               }
+       }
+       catch (const char* msg) {
+               fprintf(stderr, "machocheck failed: %s\n", msg);
+               return 1;
+       }
+       
+       return 0;
+}
+
+
+