1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
25 #ifndef __MACHO_BINDER__
26 #define __MACHO_BINDER__
28 #include <sys/types.h>
31 #include <mach/mach.h>
38 #include <mach-o/loader.h>
39 #include <mach-o/fat.h>
44 #include "MachOFileAbstraction.hpp"
45 #include "Architectures.hpp"
46 #include "MachOLayout.hpp"
47 #include "MachORebaser.hpp"
53 class Binder : public Rebaser<A>
56 struct CStringEquals {
57 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
59 typedef __gnu_cxx::hash_map<const char*, class Binder<A>*, __gnu_cxx::hash<const char*>, CStringEquals> Map;
62 Binder(const MachOLayoutAbstraction&, uint64_t dyldBaseAddress);
65 const char* getDylibID() const;
66 void setDependentBinders(const Map& map);
70 typedef typename A::P P;
71 typedef typename A::P::E E;
72 typedef typename A::P::uint_t pint_t;
73 struct BinderAndReExportFlag { Binder<A>* binder; bool reExport; };
74 typedef __gnu_cxx::hash_map<const char*, const macho_nlist<P>*, __gnu_cxx::hash<const char*>, CStringEquals> NameToSymbolMap;
76 void doBindExternalRelocations();
77 void doBindIndirectSymbols();
78 void doSetUpDyldSection();
79 void doSetPreboundUndefines();
80 pint_t resolveUndefined(const macho_nlist<P>* undefinedSymbol);
81 const macho_nlist<P>* findExportedSymbol(const char* name);
82 void bindStub(uint8_t elementSize, uint8_t* location, pint_t vmlocation, pint_t value);
83 const char* parentUmbrella();
85 static uint8_t pointerRelocSize();
86 static uint8_t pointerRelocType();
88 std::vector<BinderAndReExportFlag> fDependentDylibs;
89 NameToSymbolMap fHashTable;
90 uint64_t fDyldBaseAddress;
91 const macho_nlist<P>* fSymbolTable;
93 const macho_dysymtab_command<P>* fDynamicInfo;
94 const macho_segment_command<P>* fFristWritableSegment;
95 const macho_dylib_command<P>* fDylibID;
96 const macho_dylib_command<P>* fParentUmbrella;
97 bool fOriginallyPrebound;
101 template <typename A>
102 Binder<A>::Binder(const MachOLayoutAbstraction& layout, uint64_t dyldBaseAddress)
103 : Rebaser<A>(layout), fDyldBaseAddress(dyldBaseAddress),
104 fSymbolTable(NULL), fStrings(NULL), fDynamicInfo(NULL),
105 fFristWritableSegment(NULL), fDylibID(NULL),
106 fParentUmbrella(NULL)
108 fOriginallyPrebound = ((this->fHeader->flags() & MH_PREBOUND) != 0);
109 // update header flags so the cache looks prebound split-seg (0x80000000 is in-shared-cache bit)
110 ((macho_header<P>*)this->fHeader)->set_flags(this->fHeader->flags() | MH_PREBOUND | MH_SPLIT_SEGS | 0x80000000);
112 // calculate fDynamicInfo, fStrings, fSymbolTable
113 const macho_symtab_command<P>* symtab;
114 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)this->fHeader + sizeof(macho_header<P>));
115 const uint32_t cmd_count = this->fHeader->ncmds();
116 const macho_load_command<P>* cmd = cmds;
117 for (uint32_t i = 0; i < cmd_count; ++i) {
118 switch (cmd->cmd()) {
120 symtab = (macho_symtab_command<P>*)cmd;
121 fSymbolTable = (macho_nlist<P>*)(&this->fLinkEditBase[symtab->symoff()]);
122 fStrings = (const char*)&this->fLinkEditBase[symtab->stroff()];
125 fDynamicInfo = (macho_dysymtab_command<P>*)cmd;
128 ((macho_dylib_command<P>*)cmd)->set_timestamp(0);
129 fDylibID = (macho_dylib_command<P>*)cmd;
132 case LC_LOAD_WEAK_DYLIB:
133 case LC_REEXPORT_DYLIB:
134 ((macho_dylib_command<P>*)cmd)->set_timestamp(0);
136 case LC_SUB_FRAMEWORK:
137 fParentUmbrella = (macho_dylib_command<P>*)cmd;
140 if ( cmd->cmd() & LC_REQ_DYLD )
141 throwf("unknown required load command %d", cmd->cmd());
143 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
145 if ( fDynamicInfo == NULL )
146 throw "no LC_DYSYMTAB";
147 if ( fSymbolTable == NULL )
148 throw "no LC_SYMTAB";
150 if ( fDynamicInfo->tocoff() == 0 ) {
151 const macho_nlist<P>* start = &fSymbolTable[fDynamicInfo->iextdefsym()];
152 const macho_nlist<P>* end = &start[fDynamicInfo->nextdefsym()];
153 fHashTable.resize(fDynamicInfo->nextdefsym()); // set initial bucket count
154 for (const macho_nlist<P>* sym=start; sym < end; ++sym) {
155 const char* name = &fStrings[sym->n_strx()];
156 fHashTable[name] = sym;
160 int32_t count = fDynamicInfo->ntoc();
161 fHashTable.resize(count); // set initial bucket count
162 const struct dylib_table_of_contents* toc = (dylib_table_of_contents*)&this->fLinkEditBase[fDynamicInfo->tocoff()];
163 for (int32_t i = 0; i < count; ++i) {
164 const uint32_t index = E::get32(toc[i].symbol_index);
165 const macho_nlist<P>* sym = &fSymbolTable[index];
166 const char* name = &fStrings[sym->n_strx()];
167 fHashTable[name] = sym;
173 template <> uint8_t Binder<ppc>::pointerRelocSize() { return 2; }
174 template <> uint8_t Binder<ppc64>::pointerRelocSize() { return 3; }
175 template <> uint8_t Binder<x86>::pointerRelocSize() { return 2; }
176 template <> uint8_t Binder<x86_64>::pointerRelocSize() { return 3; }
178 template <> uint8_t Binder<ppc>::pointerRelocType() { return GENERIC_RELOC_VANILLA; }
179 template <> uint8_t Binder<ppc64>::pointerRelocType() { return GENERIC_RELOC_VANILLA; }
180 template <> uint8_t Binder<x86>::pointerRelocType() { return GENERIC_RELOC_VANILLA; }
181 template <> uint8_t Binder<x86_64>::pointerRelocType() { return X86_64_RELOC_UNSIGNED; }
184 template <typename A>
185 const char* Binder<A>::getDylibID() const
187 if ( fDylibID != NULL )
188 return fDylibID->name();
193 template <typename A>
194 const char* Binder<A>::parentUmbrella()
196 if ( fParentUmbrella != NULL )
197 return fParentUmbrella->name();
204 template <typename A>
205 void Binder<A>::setDependentBinders(const Map& map)
207 // first pass to build vector of dylibs
208 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)this->fHeader + sizeof(macho_header<P>));
209 const uint32_t cmd_count = this->fHeader->ncmds();
210 const macho_load_command<P>* cmd = cmds;
211 for (uint32_t i = 0; i < cmd_count; ++i) {
212 switch (cmd->cmd()) {
214 case LC_LOAD_WEAK_DYLIB:
215 case LC_REEXPORT_DYLIB:
216 const char* path = ((struct macho_dylib_command<P>*)cmd)->name();
217 typename Map::const_iterator pos = map.find(path);
218 if ( pos != map.end() ) {
219 BinderAndReExportFlag entry;
220 entry.binder = pos->second;
221 entry.reExport = ( cmd->cmd() == LC_REEXPORT_DYLIB );
222 fDependentDylibs.push_back(entry);
225 // the load command string does not match the install name of any loaded dylib
226 // this could happen if there was not a world build and some dylib changed its
227 // install path to be some symlinked path
229 // use realpath() and walk map looking for a realpath match
231 char targetPath[PATH_MAX];
232 if ( realpath(path, targetPath) != NULL ) {
233 for(typename Map::const_iterator it=map.begin(); it != map.end(); ++it) {
234 char aPath[PATH_MAX];
235 if ( realpath(it->first, aPath) != NULL ) {
236 if ( strcmp(targetPath, aPath) == 0 ) {
237 BinderAndReExportFlag entry;
238 entry.binder = it->second;
239 entry.reExport = ( cmd->cmd() == LC_REEXPORT_DYLIB );
240 fDependentDylibs.push_back(entry);
242 fprintf(stderr, "update_dyld_shared_cache: warning mismatched install path in %s for %s\n",
243 this->getDylibID(), path);
250 throwf("in %s can't find dylib %s", this->getDylibID(), path);
254 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
256 // handle pre-10.5 re-exports
257 if ( (this->fHeader->flags() & MH_NO_REEXPORTED_DYLIBS) == 0 ) {
259 // LC_SUB_LIBRARY means re-export one with matching leaf name
260 const char* dylibBaseName;
261 const char* frameworkLeafName;
262 for (uint32_t i = 0; i < cmd_count; ++i) {
263 switch ( cmd->cmd() ) {
265 dylibBaseName = ((macho_sub_library_command<P>*)cmd)->sub_library();
266 for (typename std::vector<BinderAndReExportFlag>::iterator it = fDependentDylibs.begin(); it != fDependentDylibs.end(); ++it) {
267 const char* dylibName = it->binder->getDylibID();
268 const char* lastSlash = strrchr(dylibName, '/');
269 const char* leafStart = &lastSlash[1];
270 if ( lastSlash == NULL )
271 leafStart = dylibName;
272 const char* firstDot = strchr(leafStart, '.');
273 int len = strlen(leafStart);
274 if ( firstDot != NULL )
275 len = firstDot - leafStart;
276 if ( strncmp(leafStart, dylibBaseName, len) == 0 )
280 case LC_SUB_UMBRELLA:
281 frameworkLeafName = ((macho_sub_umbrella_command<P>*)cmd)->sub_umbrella();
282 for (typename std::vector<BinderAndReExportFlag>::iterator it = fDependentDylibs.begin(); it != fDependentDylibs.end(); ++it) {
283 const char* dylibName = it->binder->getDylibID();
284 const char* lastSlash = strrchr(dylibName, '/');
285 if ( (lastSlash != NULL) && (strcmp(&lastSlash[1], frameworkLeafName) == 0) )
290 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
292 // ask dependents if they re-export through me
293 const char* thisName = this->getDylibID();
294 if ( thisName != NULL ) {
295 const char* thisLeafName = strrchr(thisName, '/');
296 if ( thisLeafName != NULL )
298 for (typename std::vector<BinderAndReExportFlag>::iterator it = fDependentDylibs.begin(); it != fDependentDylibs.end(); ++it) {
299 if ( ! it->reExport ) {
300 const char* parentUmbrellaName = it->binder->parentUmbrella();
301 if ( parentUmbrellaName != NULL ) {
302 if ( strcmp(parentUmbrellaName, thisLeafName) == 0 )
311 template <typename A>
312 void Binder<A>::bind()
314 this->doSetUpDyldSection();
315 this->doBindExternalRelocations();
316 this->doBindIndirectSymbols();
317 this->doSetPreboundUndefines();
321 template <typename A>
322 void Binder<A>::doSetUpDyldSection()
324 // find __DATA __dyld section
325 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)this->fHeader + sizeof(macho_header<P>));
326 const uint32_t cmd_count = this->fHeader->ncmds();
327 const macho_load_command<P>* cmd = cmds;
328 for (uint32_t i = 0; i < cmd_count; ++i) {
329 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
330 const macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
331 if ( strcmp(seg->segname(), "__DATA") == 0 ) {
332 const macho_section<P>* const sectionsStart = (macho_section<P>*)((uint8_t*)seg + sizeof(macho_segment_command<P>));
333 const macho_section<P>* const sectionsEnd = §ionsStart[seg->nsects()];
334 for (const macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
335 if ( (strcmp(sect->sectname(), "__dyld") == 0) && (sect->size() >= 2*sizeof(pint_t)) ) {
336 // set two values in __dyld section to point into dyld
337 pint_t* lazyBinder = this->mappedAddressForNewAddress(sect->addr());
338 pint_t* dyldFuncLookup = this->mappedAddressForNewAddress(sect->addr()+sizeof(pint_t));
339 A::P::setP(*lazyBinder, fDyldBaseAddress + 0x1000);
340 A::P::setP(*dyldFuncLookup, fDyldBaseAddress + 0x1008);
345 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
350 template <typename A>
351 void Binder<A>::doSetPreboundUndefines()
353 const macho_dysymtab_command<P>* dysymtab = NULL;
354 macho_nlist<P>* symbolTable = NULL;
356 // get symbol table info
357 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)this->fHeader + sizeof(macho_header<P>));
358 const uint32_t cmd_count = this->fHeader->ncmds();
359 const macho_load_command<P>* cmd = cmds;
360 for (uint32_t i = 0; i < cmd_count; ++i) {
361 switch (cmd->cmd()) {
364 const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
365 symbolTable = (macho_nlist<P>*)(&this->fLinkEditBase[symtab->symoff()]);
369 dysymtab = (macho_dysymtab_command<P>*)cmd;
372 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
375 // walk all undefines and set their prebound n_value
376 macho_nlist<P>* const lastUndefine = &symbolTable[dysymtab->iundefsym()+dysymtab->nundefsym()];
377 for (macho_nlist<P>* entry = &symbolTable[dysymtab->iundefsym()]; entry < lastUndefine; ++entry) {
378 if ( entry->n_type() & N_EXT ) {
379 pint_t pbaddr = this->resolveUndefined(entry);
380 //fprintf(stderr, "doSetPreboundUndefines: r_sym=%s, pbaddr=0x%08X, in %s\n",
381 // &fStrings[entry->n_strx()], pbaddr, this->getDylibID());
382 entry->set_n_value(pbaddr);
388 template <typename A>
389 void Binder<A>::doBindExternalRelocations()
391 // get where reloc addresses start
392 // these address are always relative to first writable segment because they are in cache which always
393 // has writable segments far from read-only segments
394 pint_t firstWritableSegmentBaseAddress = 0;
395 const std::vector<MachOLayoutAbstraction::Segment>& segments = this->fLayout.getSegments();
396 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
397 const MachOLayoutAbstraction::Segment& seg = *it;
398 if ( seg.writable() ) {
399 firstWritableSegmentBaseAddress = seg.newAddress();
404 // loop through all external relocation records and bind each
405 const macho_relocation_info<P>* const relocsStart = (macho_relocation_info<P>*)(&this->fLinkEditBase[fDynamicInfo->extreloff()]);
406 const macho_relocation_info<P>* const relocsEnd = &relocsStart[fDynamicInfo->nextrel()];
407 for (const macho_relocation_info<P>* reloc=relocsStart; reloc < relocsEnd; ++reloc) {
408 if ( reloc->r_length() != pointerRelocSize() )
409 throw "bad external relocation length";
410 if ( reloc->r_type() != pointerRelocType() )
411 throw "unknown external relocation type";
412 if ( reloc->r_pcrel() )
413 throw "r_pcrel external relocaiton not supported";
415 const macho_nlist<P>* undefinedSymbol = &fSymbolTable[reloc->r_symbolnum()];
418 location = mappedAddressForNewAddress(reloc->r_address() + firstWritableSegmentBaseAddress);
420 catch (const char* msg) {
421 throwf("%s processesing external relocation r_address 0x%08X", msg, reloc->r_address());
423 pint_t addend = P::getP(*location);
424 if ( fOriginallyPrebound ) {
425 // in a prebound binary, the n_value field of an undefined symbol is set to the address where the symbol was found when prebound
426 // so, subtracting that gives the initial displacement which we need to add to the newly found symbol address
427 // if mach-o relocation structs had an "addend" field this complication would not be necessary.
428 addend -= undefinedSymbol->n_value();
429 // To further complicate things, if this is defined symbol, then its n_value has already been adjust to the
430 // new base address, so we need to back off the slide too..
431 if ( (undefinedSymbol->n_type() & N_TYPE) == N_SECT ) {
432 addend += this->getSlideForNewAddress(undefinedSymbol->n_value());
435 pint_t symbolAddr = this->resolveUndefined(undefinedSymbol);
436 //fprintf(stderr, "external reloc: r_address=0x%08X, r_sym=%s, symAddr=0x%08llX, addend=0x%08llX in %s\n",
437 // reloc->r_address(), &fStrings[undefinedSymbol->n_strx()], (uint64_t)symbolAddr, (uint64_t)addend, this->getDylibID());
438 P::setP(*location, symbolAddr + addend);
443 // most architectures use pure code, unmodifiable stubs
444 template <typename A>
445 void Binder<A>::bindStub(uint8_t elementSize, uint8_t* location, pint_t vmlocation, pint_t value)
450 // x86 supports fast stubs
452 void Binder<x86>::bindStub(uint8_t elementSize, uint8_t* location, pint_t vmlocation, pint_t value)
454 // if the stub is not 5-bytes, it is an old slow stub
455 if ( elementSize == 5 ) {
456 uint32_t rel32 = value - (vmlocation + 5);
457 location[0] = 0xE9; // JMP rel32
458 location[1] = rel32 & 0xFF;
459 location[2] = (rel32 >> 8) & 0xFF;
460 location[3] = (rel32 >> 16) & 0xFF;
461 location[4] = (rel32 >> 24) & 0xFF;
465 template <typename A>
466 void Binder<A>::doBindIndirectSymbols()
468 const uint32_t* const indirectTable = (uint32_t*)&this->fLinkEditBase[fDynamicInfo->indirectsymoff()];
469 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)this->fHeader + sizeof(macho_header<P>));
470 const uint32_t cmd_count = this->fHeader->ncmds();
471 const macho_load_command<P>* cmd = cmds;
472 for (uint32_t i = 0; i < cmd_count; ++i) {
473 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
474 const macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
475 const macho_section<P>* const sectionsStart = (macho_section<P>*)((uint8_t*)seg + sizeof(macho_segment_command<P>));
476 const macho_section<P>* const sectionsEnd = §ionsStart[seg->nsects()];
477 for (const macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
478 uint8_t elementSize = 0;
479 uint8_t sectionType = sect->flags() & SECTION_TYPE;
480 switch ( sectionType ) {
482 elementSize = sect->reserved2();
484 case S_NON_LAZY_SYMBOL_POINTERS:
485 case S_LAZY_SYMBOL_POINTERS:
486 elementSize = sizeof(pint_t);
489 if ( elementSize != 0 ) {
490 uint32_t elementCount = sect->size() / elementSize;
491 const uint32_t indirectTableOffset = sect->reserved1();
492 uint8_t* location = NULL;
493 if ( sect->size() != 0 )
494 location = (uint8_t*)this->mappedAddressForNewAddress(sect->addr());
495 pint_t vmlocation = sect->addr();
496 for (uint32_t j=0; j < elementCount; ++j, location += elementSize, vmlocation += elementSize) {
497 uint32_t symbolIndex = E::get32(indirectTable[indirectTableOffset + j]);
498 switch ( symbolIndex ) {
499 case INDIRECT_SYMBOL_ABS:
500 case INDIRECT_SYMBOL_LOCAL:
503 const macho_nlist<P>* undefinedSymbol = &fSymbolTable[symbolIndex];
504 pint_t symbolAddr = this->resolveUndefined(undefinedSymbol);
505 switch ( sectionType ) {
506 case S_NON_LAZY_SYMBOL_POINTERS:
507 case S_LAZY_SYMBOL_POINTERS:
508 P::setP(*((pint_t*)location), symbolAddr);
511 this->bindStub(elementSize, location, vmlocation, symbolAddr);
520 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
527 template <typename A>
528 typename A::P::uint_t Binder<A>::resolveUndefined(const macho_nlist<P>* undefinedSymbol)
530 if ( (undefinedSymbol->n_type() & N_TYPE) == N_SECT ) {
531 if ( (undefinedSymbol->n_type() & N_PEXT) != 0 ) {
532 // is a multi-module private_extern internal reference that the linker did not optimize away
533 return undefinedSymbol->n_value();
535 if ( (undefinedSymbol->n_desc() & N_WEAK_DEF) != 0 ) {
536 // is a weak definition, we should prebind to this one in the same linkage unit
537 return undefinedSymbol->n_value();
540 const char* symbolName = &fStrings[undefinedSymbol->n_strx()];
541 if ( (this->fHeader->flags() & MH_TWOLEVEL) == 0 ) {
542 // flat namespace binding
543 throw "flat namespace not supported";
546 uint8_t ordinal = GET_LIBRARY_ORDINAL(undefinedSymbol->n_desc());
547 Binder<A>* binder = NULL;
549 case EXECUTABLE_ORDINAL:
550 case DYNAMIC_LOOKUP_ORDINAL:
551 throw "magic ordineal not supported";
552 case SELF_LIBRARY_ORDINAL:
556 if ( ordinal > fDependentDylibs.size() )
557 throw "two-level ordinal out of range";
558 binder = fDependentDylibs[ordinal-1].binder;
560 const macho_nlist<P>* sym = binder->findExportedSymbol(symbolName);
562 throwf("could not resolve %s from %s", symbolName, this->getDylibID());
563 return sym->n_value();
567 template <typename A>
568 const macho_nlist<typename A::P>* Binder<A>::findExportedSymbol(const char* name)
570 //fprintf(stderr, "findExportedSymbol(%s) in %s\n", name, this->getDylibID());
571 const macho_nlist<P>* sym = NULL;
572 typename NameToSymbolMap::iterator pos = fHashTable.find(name);
573 if ( pos != fHashTable.end() )
577 for (typename std::vector<BinderAndReExportFlag>::iterator it = fDependentDylibs.begin(); it != fDependentDylibs.end(); ++it) {
578 if ( it->reExport ) {
579 sym = it->binder->findExportedSymbol(name);
589 #endif // __MACHO_BINDER__