1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2006-2011 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
25 #ifndef __MACHO_LAYOUT__
26 #define __MACHO_LAYOUT__
28 #include <sys/types.h>
30 #include <sys/errno.h>
32 #include <mach/mach.h>
39 #include <mach-o/loader.h>
40 #include <mach-o/fat.h>
44 #include <ext/hash_map>
46 #include "MachOFileAbstraction.hpp"
47 #include "Architectures.hpp"
50 void throwf(const char* format, ...) __attribute__((format(printf, 1, 2)));
52 __attribute__((noreturn))
53 void throwf(const char* format, ...)
57 va_start(list, format);
58 vasprintf(&p, format, list);
66 class MachOLayoutAbstraction
72 Segment(uint64_t addr, uint64_t vmsize, uint64_t offset, uint64_t file_size,
73 uint32_t prot, const char* segName) : fOrigAddress(addr), fOrigSize(vmsize),
74 fOrigFileOffset(offset), fOrigFileSize(file_size), fOrigPermissions(prot),
75 fSize(vmsize), fFileOffset(offset), fFileSize(file_size), fPermissions(prot),
76 fNewAddress(0), fMappedAddress(NULL) {
77 strlcpy(fOrigName, segName, 16);
80 uint64_t address() const { return fOrigAddress; }
81 uint64_t size() const { return fSize; }
82 uint64_t fileOffset() const { return fFileOffset; }
83 uint64_t fileSize() const { return fFileSize; }
84 uint32_t permissions() const { return fPermissions; }
85 bool readable() const { return fPermissions & VM_PROT_READ; }
86 bool writable() const { return fPermissions & VM_PROT_WRITE; }
87 bool executable() const { return fPermissions & VM_PROT_EXECUTE; }
88 const char* name() const { return fOrigName; }
89 uint64_t newAddress() const { return fNewAddress; }
90 void* mappedAddress() const { return fMappedAddress; }
91 void setNewAddress(uint64_t addr) { fNewAddress = addr; }
92 void setMappedAddress(void* addr) { fMappedAddress = addr; }
93 void setSize(uint64_t new_size) { fSize = new_size; }
94 void setFileOffset(uint64_t new_off) { fFileOffset = new_off; }
95 void setFileSize(uint64_t new_size) { fFileSize = new_size; }
96 void setWritable(bool w) { if (w) fPermissions |= VM_PROT_WRITE; else fPermissions &= ~VM_PROT_WRITE; }
97 void reset() { fSize=fOrigSize; fFileOffset=fOrigFileOffset; fFileSize=fOrigFileSize; fPermissions=fOrigPermissions; }
99 uint64_t fOrigAddress;
101 uint64_t fOrigFileOffset;
102 uint64_t fOrigFileSize;
103 uint32_t fOrigPermissions;
106 uint64_t fFileOffset;
108 uint32_t fPermissions;
109 uint64_t fNewAddress;
110 void* fMappedAddress;
116 uint32_t currentVersion;
117 uint32_t compatibilityVersion;
122 virtual ArchPair getArchPair() const = 0;
123 virtual const char* getFilePath() const = 0;
124 virtual uint64_t getOffsetInUniversalFile() const = 0;
125 virtual uint32_t getFileType() const = 0;
126 virtual uint32_t getFlags() const = 0;
127 virtual Library getID() const = 0;
128 virtual bool isDylib() const = 0;
129 virtual bool isSplitSeg() const = 0;
130 virtual bool hasSplitSegInfo() const = 0;
131 virtual bool isRootOwned() const = 0;
132 virtual bool inSharableLocation() const = 0;
133 virtual bool hasDynamicLookupLinkage() const = 0;
134 virtual bool hasMainExecutableLookupLinkage() const = 0;
135 virtual bool isTwoLevelNamespace() const = 0;
136 virtual bool hasDyldInfo() const = 0;
137 virtual uint32_t getNameFileOffset() const = 0;
138 virtual time_t getLastModTime() const = 0;
139 virtual ino_t getInode() const = 0;
140 virtual std::vector<Segment>& getSegments() = 0;
141 virtual const std::vector<Segment>& getSegments() const = 0;
142 virtual const std::vector<Library>& getLibraries() const = 0;
143 virtual uint64_t getBaseAddress() const = 0;
144 virtual uint64_t getVMSize() const = 0;
145 virtual uint64_t getBaseExecutableAddress() const = 0;
146 virtual uint64_t getBaseWritableAddress() const = 0;
147 virtual uint64_t getBaseReadOnlyAddress() const = 0;
148 virtual uint64_t getExecutableVMSize() const = 0;
149 virtual uint64_t getWritableVMSize() const = 0;
150 virtual uint64_t getReadOnlyVMSize() const = 0;
151 // need getDyldInfoExports because export info uses ULEB encoding and size could grow
152 virtual const uint8_t* getDyldInfoExports() const = 0;
153 virtual void setDyldInfoExports(const uint8_t* newExports) const = 0;
159 template <typename A>
160 class MachOLayout : public MachOLayoutAbstraction
163 MachOLayout(const void* machHeader, uint64_t offset, const char* path,
164 ino_t inode, time_t modTime, uid_t uid);
165 virtual ~MachOLayout() {}
167 virtual ArchPair getArchPair() const { return fArchPair; }
168 virtual const char* getFilePath() const { return fPath; }
169 virtual uint64_t getOffsetInUniversalFile() const { return fOffset; }
170 virtual uint32_t getFileType() const { return fFileType; }
171 virtual uint32_t getFlags() const { return fFlags; }
172 virtual Library getID() const { return fDylibID; }
173 virtual bool isDylib() const { return fIsDylib; }
174 virtual bool isSplitSeg() const;
175 virtual bool hasSplitSegInfo() const { return fHasSplitSegInfo; }
176 virtual bool isRootOwned() const { return fRootOwned; }
177 virtual bool inSharableLocation() const { return fShareableLocation; }
178 virtual bool hasDynamicLookupLinkage() const { return fDynamicLookupLinkage; }
179 virtual bool hasMainExecutableLookupLinkage() const { return fMainExecutableLookupLinkage; }
180 virtual bool isTwoLevelNamespace() const { return (fFlags & MH_TWOLEVEL); }
181 virtual bool hasDyldInfo() const { return fHasDyldInfo; }
182 virtual uint32_t getNameFileOffset() const{ return fNameFileOffset; }
183 virtual time_t getLastModTime() const { return fMTime; }
184 virtual ino_t getInode() const { return fInode; }
185 virtual std::vector<Segment>& getSegments() { return fSegments; }
186 virtual const std::vector<Segment>& getSegments() const { return fSegments; }
187 virtual const std::vector<Library>& getLibraries() const { return fLibraries; }
188 virtual uint64_t getBaseAddress() const { return fLowSegment->address(); }
189 virtual uint64_t getVMSize() const { return fVMSize; }
190 virtual uint64_t getBaseExecutableAddress() const { return fLowExecutableSegment->address(); }
191 virtual uint64_t getBaseWritableAddress() const { return fLowWritableSegment->address(); }
192 virtual uint64_t getBaseReadOnlyAddress() const { return fLowReadOnlySegment->address(); }
193 virtual uint64_t getExecutableVMSize() const { return fVMExecutableSize; }
194 virtual uint64_t getWritableVMSize() const { return fVMWritablSize; }
195 virtual uint64_t getReadOnlyVMSize() const { return fVMReadOnlySize; }
196 virtual const uint8_t* getDyldInfoExports() const { return fDyldInfoExports; }
197 virtual void setDyldInfoExports(const uint8_t* newExports) const { fDyldInfoExports = newExports; }
200 typedef typename A::P P;
201 typedef typename A::P::E E;
202 typedef typename A::P::uint_t pint_t;
204 static cpu_type_t arch();
211 std::vector<Segment> fSegments;
212 std::vector<Library> fLibraries;
213 const Segment* fLowSegment;
214 const Segment* fLowExecutableSegment;
215 const Segment* fLowWritableSegment;
216 const Segment* fLowReadOnlySegment;
218 uint32_t fNameFileOffset;
222 uint64_t fVMExecutableSize;
223 uint64_t fVMWritablSize;
224 uint64_t fVMReadOnlySize;
225 bool fHasSplitSegInfo;
227 bool fShareableLocation;
228 bool fDynamicLookupLinkage;
229 bool fMainExecutableLookupLinkage;
232 mutable const uint8_t* fDyldInfoExports;
237 class UniversalMachOLayout
240 UniversalMachOLayout(const char* path, const std::set<ArchPair>* onlyArchs=NULL);
241 ~UniversalMachOLayout() {}
243 static const UniversalMachOLayout& find(const char* path, const std::set<ArchPair>* onlyArchs=NULL);
244 const MachOLayoutAbstraction* getSlice(ArchPair ap) const;
245 const std::vector<MachOLayoutAbstraction*>& allLayouts() const { return fLayouts; }
248 struct CStringEquals {
249 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
251 typedef __gnu_cxx::hash_map<const char*, const UniversalMachOLayout*, __gnu_cxx::hash<const char*>, CStringEquals> PathToNode;
253 static bool requestedSlice(const std::set<ArchPair>* onlyArchs, cpu_type_t cpuType, cpu_subtype_t cpuSubType);
255 static PathToNode fgLayoutCache;
257 std::vector<MachOLayoutAbstraction*> fLayouts;
260 UniversalMachOLayout::PathToNode UniversalMachOLayout::fgLayoutCache;
265 const MachOLayoutAbstraction* UniversalMachOLayout::getSlice(ArchPair ap) const
267 // use matching cputype and cpusubtype
268 for(std::vector<MachOLayoutAbstraction*>::const_iterator it=fLayouts.begin(); it != fLayouts.end(); ++it) {
269 const MachOLayoutAbstraction* layout = *it;
270 if ( layout->getArchPair().arch == ap.arch ) {
273 if ( layout->getArchPair().subtype == ap.subtype )
285 const UniversalMachOLayout& UniversalMachOLayout::find(const char* path, const std::set<ArchPair>* onlyArchs)
288 PathToNode::iterator pos = fgLayoutCache.find(path);
289 if ( pos != fgLayoutCache.end() )
292 // create UniversalMachOLayout
293 const UniversalMachOLayout* result = new UniversalMachOLayout(path, onlyArchs);
296 fgLayoutCache[result->fPath] = result;
302 bool UniversalMachOLayout::requestedSlice(const std::set<ArchPair>* onlyArchs, cpu_type_t cpuType, cpu_subtype_t cpuSubType)
304 if ( onlyArchs == NULL )
306 // must match cputype and cpusubtype
307 for (std::set<ArchPair>::const_iterator it = onlyArchs->begin(); it != onlyArchs->end(); ++it) {
308 ArchPair anArch = *it;
309 if ( cpuType == anArch.arch ) {
312 if ( cpuSubType == anArch.subtype )
324 UniversalMachOLayout::UniversalMachOLayout(const char* path, const std::set<ArchPair>* onlyArchs)
325 : fPath(strdup(path))
328 int fd = ::open(path, O_RDONLY, 0);
332 throwf("file not found");
334 throwf("can't open file, errno=%d", err);
336 struct stat stat_buf;
337 if ( fstat(fd, &stat_buf) == -1)
338 throwf("can't stat open file %s, errno=%d", path, errno);
339 if ( stat_buf.st_size < 20 )
340 throwf("file too small %s", path);
341 uint8_t* p = (uint8_t*)::mmap(NULL, stat_buf.st_size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0);
342 if ( p == (uint8_t*)(-1) )
343 throwf("can't map file %s, errno=%d", path, errno);
347 // if fat file, process each architecture
348 const fat_header* fh = (fat_header*)p;
349 const mach_header* mh = (mach_header*)p;
350 if ( fh->magic == OSSwapBigToHostInt32(FAT_MAGIC) ) {
351 // Fat header is always big-endian
352 const struct fat_arch* slices = (struct fat_arch*)(p + sizeof(struct fat_header));
353 const uint32_t sliceCount = OSSwapBigToHostInt32(fh->nfat_arch);
354 for (uint32_t i=0; i < sliceCount; ++i) {
355 if ( requestedSlice(onlyArchs, OSSwapBigToHostInt32(slices[i].cputype), OSSwapBigToHostInt32(slices[i].cpusubtype)) ) {
356 uint32_t fileOffset = OSSwapBigToHostInt32(slices[i].offset);
357 if ( fileOffset > stat_buf.st_size ) {
358 throwf("malformed universal file, slice %u for architecture 0x%08X is beyond end of file: %s",
359 i, OSSwapBigToHostInt32(slices[i].cputype), path);
361 if ( (fileOffset+OSSwapBigToHostInt32(slices[i].size)) > stat_buf.st_size ) {
362 throwf("malformed universal file, slice %u for architecture 0x%08X is beyond end of file: %s",
363 i, OSSwapBigToHostInt32(slices[i].cputype), path);
366 switch ( OSSwapBigToHostInt32(slices[i].cputype) ) {
367 case CPU_TYPE_POWERPC:
368 fLayouts.push_back(new MachOLayout<ppc>(&p[fileOffset], fileOffset, fPath, stat_buf.st_ino, stat_buf.st_mtime, stat_buf.st_uid));
371 fLayouts.push_back(new MachOLayout<x86>(&p[fileOffset], fileOffset, fPath, stat_buf.st_ino, stat_buf.st_mtime, stat_buf.st_uid));
373 case CPU_TYPE_X86_64:
374 fLayouts.push_back(new MachOLayout<x86_64>(&p[fileOffset], fileOffset, fPath, stat_buf.st_ino, stat_buf.st_mtime, stat_buf.st_uid));
377 fLayouts.push_back(new MachOLayout<arm>(&p[fileOffset], fileOffset, fPath, stat_buf.st_ino, stat_buf.st_mtime, stat_buf.st_uid));
379 case CPU_TYPE_POWERPC64:
380 // ignore ppc64 slices
383 throw "unknown slice in fat file";
386 catch (const char* msg) {
387 fprintf(stderr, "warning: %s for %s\n", msg, path);
394 if ( (OSSwapBigToHostInt32(mh->magic) == MH_MAGIC) && (OSSwapBigToHostInt32(mh->cputype) == CPU_TYPE_POWERPC)) {
395 if ( requestedSlice(onlyArchs, OSSwapBigToHostInt32(mh->cputype), OSSwapBigToHostInt32(mh->cpusubtype)) )
396 fLayouts.push_back(new MachOLayout<ppc>(mh, 0, fPath, stat_buf.st_ino, stat_buf.st_mtime, stat_buf.st_uid));
398 else if ( (OSSwapLittleToHostInt32(mh->magic) == MH_MAGIC) && (OSSwapLittleToHostInt32(mh->cputype) == CPU_TYPE_I386)) {
399 if ( requestedSlice(onlyArchs, OSSwapLittleToHostInt32(mh->cputype), OSSwapLittleToHostInt32(mh->cpusubtype)) )
400 fLayouts.push_back(new MachOLayout<x86>(mh, 0, fPath, stat_buf.st_ino, stat_buf.st_mtime, stat_buf.st_uid));
402 else if ( (OSSwapLittleToHostInt32(mh->magic) == MH_MAGIC_64) && (OSSwapLittleToHostInt32(mh->cputype) == CPU_TYPE_X86_64)) {
403 if ( requestedSlice(onlyArchs, OSSwapLittleToHostInt32(mh->cputype), OSSwapLittleToHostInt32(mh->cpusubtype)) )
404 fLayouts.push_back(new MachOLayout<x86_64>(mh, 0, fPath, stat_buf.st_ino, stat_buf.st_mtime, stat_buf.st_uid));
406 else if ( (OSSwapLittleToHostInt32(mh->magic) == MH_MAGIC) && (OSSwapLittleToHostInt32(mh->cputype) == CPU_TYPE_ARM)) {
407 if ( requestedSlice(onlyArchs, OSSwapLittleToHostInt32(mh->cputype), OSSwapLittleToHostInt32(mh->cpusubtype)) )
408 fLayouts.push_back(new MachOLayout<arm>(mh, 0, fPath, stat_buf.st_ino, stat_buf.st_mtime, stat_buf.st_uid));
410 else if ( (OSSwapBigToHostInt32(mh->magic) == MH_MAGIC_64) && (OSSwapBigToHostInt32(mh->cputype) == CPU_TYPE_POWERPC64)) {
411 // ignore ppc64 slices
414 throw "unknown file format";
417 catch (const char* msg) {
418 fprintf(stderr, "warning: %s for %s\n", msg, path);
423 ::munmap(p, stat_buf.st_size);
429 template <typename A>
430 MachOLayout<A>::MachOLayout(const void* machHeader, uint64_t offset, const char* path, ino_t inode, time_t modTime, uid_t uid)
431 : fPath(path), fOffset(offset), fArchPair(0,0), fMTime(modTime), fInode(inode), fHasSplitSegInfo(false), fRootOwned(uid==0),
432 fShareableLocation(false), fDynamicLookupLinkage(false), fMainExecutableLookupLinkage(false), fIsDylib(false),
433 fHasDyldInfo(false), fDyldInfoExports(NULL)
435 fDylibID.name = NULL;
436 fDylibID.currentVersion = 0;
437 fDylibID.compatibilityVersion = 0;
439 const macho_header<P>* mh = (const macho_header<P>*)machHeader;
440 if ( mh->cputype() != arch() )
441 throw "Layout object is wrong architecture";
442 switch ( mh->filetype() ) {
452 throw "file is not a mach-o final linked image";
454 fFlags = mh->flags();
455 fFileType = mh->filetype();
456 fArchPair.arch = mh->cputype();
457 fArchPair.subtype = mh->cpusubtype();
459 const macho_dyld_info_command<P>* dyldInfo = NULL;
460 const macho_symtab_command<P>* symbolTableCmd = NULL;
461 const macho_dysymtab_command<P>* dynamicSymbolTableCmd = NULL;
462 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
463 const uint32_t cmd_count = mh->ncmds();
464 const macho_load_command<P>* cmd = cmds;
465 for (uint32_t i = 0; i < cmd_count; ++i) {
466 switch ( cmd->cmd() ) {
469 macho_dylib_command<P>* dylib = (macho_dylib_command<P>*)cmd;
470 fDylibID.name = strdup(dylib->name());
471 fDylibID.currentVersion = dylib->current_version();
472 fDylibID.compatibilityVersion = dylib->compatibility_version();
473 fNameFileOffset = dylib->name() - (char*)machHeader;
474 fShareableLocation = ( (strncmp(fDylibID.name, "/usr/lib/", 9) == 0) || (strncmp(fDylibID.name, "/System/Library/", 16) == 0) );
478 case LC_LOAD_WEAK_DYLIB:
479 case LC_REEXPORT_DYLIB:
480 case LC_LOAD_UPWARD_DYLIB:
482 macho_dylib_command<P>* dylib = (macho_dylib_command<P>*)cmd;
484 lib.name = strdup(dylib->name());
485 lib.currentVersion = dylib->current_version();
486 lib.compatibilityVersion = dylib->compatibility_version();
487 lib.weakImport = ( cmd->cmd() == LC_LOAD_WEAK_DYLIB );
488 fLibraries.push_back(lib);
491 case LC_SEGMENT_SPLIT_INFO:
492 fHasSplitSegInfo = true;
494 case macho_segment_command<P>::CMD:
496 macho_segment_command<P>* segCmd = (macho_segment_command<P>*)cmd;
497 fSegments.push_back(Segment(segCmd->vmaddr(), segCmd->vmsize(), segCmd->fileoff(),
498 segCmd->filesize(), segCmd->initprot(), segCmd->segname()));
502 symbolTableCmd = (macho_symtab_command<P>*)cmd;
505 dynamicSymbolTableCmd = (macho_dysymtab_command<P>*)cmd;
508 case LC_DYLD_INFO_ONLY:
510 dyldInfo = (struct macho_dyld_info_command<P>*)cmd;
513 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
517 fLowExecutableSegment = NULL;
518 fLowWritableSegment = NULL;
519 fLowReadOnlySegment = NULL;
520 fVMExecutableSize = 0;
524 const Segment* highSegment = NULL;
525 for(std::vector<Segment>::const_iterator it = fSegments.begin(); it != fSegments.end(); ++it) {
526 const Segment& seg = *it;
527 if ( (fLowSegment == NULL) || (seg.address() < fLowSegment->address()) )
529 if ( (highSegment == NULL) || (seg.address() > highSegment->address()) )
531 if ( seg.executable() ) {
532 if ( (fLowExecutableSegment == NULL) || (seg.address() < fLowExecutableSegment->address()) )
533 fLowExecutableSegment = &seg;
534 fVMExecutableSize += seg.size();
536 else if ( seg.writable()) {
537 if ( (fLowWritableSegment == NULL) || (seg.address() < fLowWritableSegment->address()) )
538 fLowWritableSegment = &seg;
539 fVMWritablSize += seg.size();
542 if ( (fLowReadOnlySegment == NULL) || (seg.address() < fLowReadOnlySegment->address()) )
543 fLowReadOnlySegment = &seg;
544 fVMReadOnlySize += seg.size();
547 if ( (highSegment != NULL) && (fLowSegment != NULL) )
548 fVMSize = (highSegment->address() + highSegment->size() - fLowSegment->address() + 4095) & (-4096);
550 // scan undefines looking, for magic ordinals
551 if ( (symbolTableCmd != NULL) && (dynamicSymbolTableCmd != NULL) ) {
552 const macho_nlist<P>* symbolTable = (macho_nlist<P>*)((uint8_t*)machHeader + symbolTableCmd->symoff());
553 const uint32_t startUndefs = dynamicSymbolTableCmd->iundefsym();
554 const uint32_t endUndefs = startUndefs + dynamicSymbolTableCmd->nundefsym();
555 for (uint32_t i=startUndefs; i < endUndefs; ++i) {
556 uint8_t ordinal = GET_LIBRARY_ORDINAL(symbolTable[i].n_desc());
557 if ( ordinal == DYNAMIC_LOOKUP_ORDINAL )
558 fDynamicLookupLinkage = true;
559 else if ( ordinal == EXECUTABLE_ORDINAL )
560 fMainExecutableLookupLinkage = true;
564 if ( dyldInfo != NULL ) {
565 if ( dyldInfo->export_off() != 0 ) {
566 fDyldInfoExports = (uint8_t*)machHeader + dyldInfo->export_off();
572 template <> cpu_type_t MachOLayout<ppc>::arch() { return CPU_TYPE_POWERPC; }
573 template <> cpu_type_t MachOLayout<x86>::arch() { return CPU_TYPE_I386; }
574 template <> cpu_type_t MachOLayout<x86_64>::arch() { return CPU_TYPE_X86_64; }
575 template <> cpu_type_t MachOLayout<arm>::arch() { return CPU_TYPE_ARM; }
579 bool MachOLayout<ppc>::isSplitSeg() const
581 return ( (this->getFlags() & MH_SPLIT_SEGS) != 0 );
585 bool MachOLayout<x86>::isSplitSeg() const
587 return ( (this->getFlags() & MH_SPLIT_SEGS) != 0 );
591 bool MachOLayout<arm>::isSplitSeg() const
593 return ( (this->getFlags() & MH_SPLIT_SEGS) != 0 );
596 template <typename A>
597 bool MachOLayout<A>::isSplitSeg() const
603 #endif // __MACHO_LAYOUT__