]> git.saurik.com Git - apple/dyld.git/blame - launch-cache/MachOBinder.hpp
dyld-353.2.3.tar.gz
[apple/dyld.git] / launch-cache / MachOBinder.hpp
CommitLineData
bac542e6
A
1/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
412ebb8e 3 * Copyright (c) 2006-2011 Apple Inc. All rights reserved.
bac542e6
A
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25#ifndef __MACHO_BINDER__
26#define __MACHO_BINDER__
27
28#include <sys/types.h>
29#include <sys/stat.h>
30#include <sys/mman.h>
31#include <mach/mach.h>
32#include <limits.h>
33#include <stdarg.h>
34#include <stdio.h>
35#include <fcntl.h>
36#include <errno.h>
37#include <unistd.h>
38#include <mach-o/loader.h>
39#include <mach-o/fat.h>
40
41#include <vector>
42#include <set>
2fd3f4e8
A
43#include <unordered_map>
44#include <unordered_set>
bac542e6
A
45
46#include "MachOFileAbstraction.hpp"
47#include "Architectures.hpp"
48#include "MachOLayout.hpp"
49#include "MachORebaser.hpp"
39a8cd10 50#include "MachOTrie.hpp"
bac542e6 51
412ebb8e
A
52#ifndef EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER
53 #define EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER 0x10
54#endif
bac542e6 55
2fd3f4e8
A
56#ifndef EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE
57 #define EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE 0x02
58#endif
bac542e6
A
59
60
61template <typename A>
62class Binder : public Rebaser<A>
63{
64public:
2fd3f4e8
A
65 class CStringHash {
66 public:
67 size_t operator()(const char* __s) const {
68 size_t __h = 0;
69 for ( ; *__s; ++__s)
70 __h = 5 * __h + *__s;
71 return __h;
72 };
73 };
bac542e6
A
74 struct CStringEquals {
75 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
76 };
2fd3f4e8 77 typedef std::unordered_map<const char*, class Binder<A>*, CStringHash, CStringEquals> Map;
bac542e6
A
78
79
80 Binder(const MachOLayoutAbstraction&, uint64_t dyldBaseAddress);
81 virtual ~Binder() {}
82
83 const char* getDylibID() const;
84 void setDependentBinders(const Map& map);
412ebb8e
A
85 void bind(std::vector<void*>&);
86 void optimize();
87 void addResolverClient(Binder<A>* clientDylib, const char* symbolName);
bac542e6
A
88private:
89 typedef typename A::P P;
90 typedef typename A::P::E E;
91 typedef typename A::P::uint_t pint_t;
92 struct BinderAndReExportFlag { Binder<A>* binder; bool reExport; };
412ebb8e 93 struct SymbolReExport { const char* exportName; int dylibOrdinal; const char* importName; };
2fd3f4e8
A
94 typedef std::unordered_map<const char*, pint_t, CStringHash, CStringEquals> NameToAddrMap;
95 typedef std::unordered_set<const char*, CStringHash, CStringEquals> NameSet;
412ebb8e
A
96 struct ClientAndSymbol { Binder<A>* client; const char* symbolName; };
97 struct SymbolAndLazyPointer { const char* symbolName; pint_t lpVMAddr; };
bac542e6 98
412ebb8e 99 static bool isPublicLocation(const char* pth);
bac542e6
A
100 void doBindExternalRelocations();
101 void doBindIndirectSymbols();
102 void doSetUpDyldSection();
103 void doSetPreboundUndefines();
412ebb8e
A
104 void hoistPrivateRexports();
105 int ordinalOfDependentBinder(Binder<A>* dep);
106 void doBindDyldInfo(std::vector<void*>& pointersInData);
107 void doBindDyldLazyInfo(std::vector<void*>& pointersInData);
39a8cd10 108 void bindDyldInfoAt(uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type,
412ebb8e 109 int libraryOrdinal, int64_t addend,
832b6fce 110 const char* symbolName, bool lazyPointer, bool weakImport,
412ebb8e 111 std::vector<void*>& pointersInData);
bac542e6 112 pint_t resolveUndefined(const macho_nlist<P>* undefinedSymbol);
2fd3f4e8
A
113 bool findExportedSymbolAddress(const char* name, pint_t* result, Binder<A>** foundIn,
114 bool* isResolverSymbol, bool* isAbsolute);
bac542e6
A
115 void bindStub(uint8_t elementSize, uint8_t* location, pint_t vmlocation, pint_t value);
116 const char* parentUmbrella();
412ebb8e
A
117 pint_t runtimeAddressFromNList(const macho_nlist<P>* sym);
118 void optimizeStub(const char* symbolName, pint_t lpVMAddr);
119 void optimizeStub(uint8_t* stubMappedAddress, pint_t stubVMAddress, uint32_t stubSize, pint_t lpVMAddr);
120 pint_t findLazyPointerFor(const char* symbolName);
121
bac542e6
A
122
123 static uint8_t pointerRelocSize();
124 static uint8_t pointerRelocType();
125
126 std::vector<BinderAndReExportFlag> fDependentDylibs;
39a8cd10 127 NameToAddrMap fHashTable;
412ebb8e 128 NameSet fSymbolResolvers;
2fd3f4e8 129 NameSet fAbsoluteSymbols;
412ebb8e 130 std::vector<SymbolReExport> fReExportedSymbols;
bac542e6
A
131 uint64_t fDyldBaseAddress;
132 const macho_nlist<P>* fSymbolTable;
133 const char* fStrings;
134 const macho_dysymtab_command<P>* fDynamicInfo;
135 const macho_segment_command<P>* fFristWritableSegment;
136 const macho_dylib_command<P>* fDylibID;
137 const macho_dylib_command<P>* fParentUmbrella;
39a8cd10 138 const macho_dyld_info_command<P>* fDyldInfo;
bac542e6 139 bool fOriginallyPrebound;
412ebb8e
A
140 bool fReExportedSymbolsResolved;
141 std::vector<ClientAndSymbol> fClientAndSymbols;
2fd3f4e8 142 NameToAddrMap fResolverLazyPointers;
bac542e6
A
143};
144
412ebb8e
A
145template <>
146uint32_t Binder<arm>::runtimeAddressFromNList(const macho_nlist<Pointer32<LittleEndian> >* sym)
147{
148 if (sym->n_desc() & N_ARM_THUMB_DEF)
149 return sym->n_value() + 1;
150 else
151 return sym->n_value();
152}
153
154template <typename A>
155typename A::P::uint_t Binder<A>::runtimeAddressFromNList(const macho_nlist<P>* sym)
156{
157 return sym->n_value();
158}
159
bac542e6
A
160
161template <typename A>
162Binder<A>::Binder(const MachOLayoutAbstraction& layout, uint64_t dyldBaseAddress)
163 : Rebaser<A>(layout), fDyldBaseAddress(dyldBaseAddress),
164 fSymbolTable(NULL), fStrings(NULL), fDynamicInfo(NULL),
39a8cd10 165 fFristWritableSegment(NULL), fDylibID(NULL), fDyldInfo(NULL),
412ebb8e 166 fParentUmbrella(NULL), fReExportedSymbolsResolved(false)
bac542e6
A
167{
168 fOriginallyPrebound = ((this->fHeader->flags() & MH_PREBOUND) != 0);
169 // update header flags so the cache looks prebound split-seg (0x80000000 is in-shared-cache bit)
170 ((macho_header<P>*)this->fHeader)->set_flags(this->fHeader->flags() | MH_PREBOUND | MH_SPLIT_SEGS | 0x80000000);
171
172 // calculate fDynamicInfo, fStrings, fSymbolTable
2028a915 173 const macho_symtab_command<P>* symtab;
bac542e6
A
174 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)this->fHeader + sizeof(macho_header<P>));
175 const uint32_t cmd_count = this->fHeader->ncmds();
176 const macho_load_command<P>* cmd = cmds;
177 for (uint32_t i = 0; i < cmd_count; ++i) {
178 switch (cmd->cmd()) {
179 case LC_SYMTAB:
2028a915 180 symtab = (macho_symtab_command<P>*)cmd;
bac542e6
A
181 fSymbolTable = (macho_nlist<P>*)(&this->fLinkEditBase[symtab->symoff()]);
182 fStrings = (const char*)&this->fLinkEditBase[symtab->stroff()];
183 break;
184 case LC_DYSYMTAB:
185 fDynamicInfo = (macho_dysymtab_command<P>*)cmd;
186 break;
187 case LC_ID_DYLIB:
188 ((macho_dylib_command<P>*)cmd)->set_timestamp(0);
189 fDylibID = (macho_dylib_command<P>*)cmd;
190 break;
191 case LC_LOAD_DYLIB:
192 case LC_LOAD_WEAK_DYLIB:
193 case LC_REEXPORT_DYLIB:
412ebb8e 194 case LC_LOAD_UPWARD_DYLIB:
bac542e6
A
195 ((macho_dylib_command<P>*)cmd)->set_timestamp(0);
196 break;
197 case LC_SUB_FRAMEWORK:
198 fParentUmbrella = (macho_dylib_command<P>*)cmd;
199 break;
39a8cd10
A
200 case LC_DYLD_INFO:
201 case LC_DYLD_INFO_ONLY:
202 fDyldInfo = (macho_dyld_info_command<P>*)cmd;
203 break;
204 case LC_RPATH:
832b6fce 205 throwf("dyld shared cache does not support LC_RPATH found in %s", layout.getFilePath());
39a8cd10 206 break;
bac542e6
A
207 default:
208 if ( cmd->cmd() & LC_REQ_DYLD )
39a8cd10 209 throwf("unknown required load command 0x%08X", cmd->cmd());
bac542e6
A
210 }
211 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
212 }
213 if ( fDynamicInfo == NULL )
214 throw "no LC_DYSYMTAB";
215 if ( fSymbolTable == NULL )
216 throw "no LC_SYMTAB";
217 // build hash table
39a8cd10
A
218// fprintf(stderr, "exports for %s\n", layout.getFilePath());
219 if ( fDyldInfo != NULL ) {
220 std::vector<mach_o::trie::Entry> exports;
412ebb8e 221 const uint8_t* exportsStart = layout.getDyldInfoExports();
39a8cd10
A
222 const uint8_t* exportsEnd = &exportsStart[fDyldInfo->export_size()];
223 mach_o::trie::parseTrie(exportsStart, exportsEnd, exports);
224 pint_t baseAddress = layout.getSegments()[0].newAddress();
225 for(std::vector<mach_o::trie::Entry>::iterator it = exports.begin(); it != exports.end(); ++it) {
2fd3f4e8
A
226 switch ( it->flags & EXPORT_SYMBOL_FLAGS_KIND_MASK ) {
227 case EXPORT_SYMBOL_FLAGS_KIND_REGULAR:
228 if ( (it->flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) ) {
229 fSymbolResolvers.insert(it->name);
230 }
231 if ( it->flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
232 //fprintf(stderr, "found re-export %s in %s\n", sym.exportName, this->getDylibID());
233 SymbolReExport sym;
234 sym.exportName = it->name;
235 sym.dylibOrdinal = it->other;
236 sym.importName = it->importName;
237 if ( (sym.importName == NULL) || (sym.importName[0] == '\0') )
238 sym.importName = sym.exportName;
239 fReExportedSymbols.push_back(sym);
240 // fHashTable entry will be added in first call to findExportedSymbolAddress()
241 }
242 else {
243 fHashTable[it->name] = it->address + baseAddress;
244 }
245 break;
246 case EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL:
412ebb8e 247 fHashTable[it->name] = it->address + baseAddress;
2fd3f4e8
A
248 break;
249 case EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE:
250 fHashTable[it->name] = it->address;
251 fAbsoluteSymbols.insert(it->name);
252 break;
253 default:
254 throwf("non-regular symbol binding not supported for %s in %s", it->name, layout.getFilePath());
255 break;
412ebb8e 256 }
39a8cd10 257 //fprintf(stderr, "0x%08llX %s\n", it->address + baseAddress, it->name);
bac542e6
A
258 }
259 }
260 else {
39a8cd10
A
261 if ( fDynamicInfo->tocoff() == 0 ) {
262 const macho_nlist<P>* start = &fSymbolTable[fDynamicInfo->iextdefsym()];
263 const macho_nlist<P>* end = &start[fDynamicInfo->nextdefsym()];
2fd3f4e8 264 fHashTable.reserve(fDynamicInfo->nextdefsym()); // set initial bucket count
39a8cd10
A
265 for (const macho_nlist<P>* sym=start; sym < end; ++sym) {
266 const char* name = &fStrings[sym->n_strx()];
412ebb8e 267 fHashTable[name] = runtimeAddressFromNList(sym);
39a8cd10
A
268 //fprintf(stderr, " 0x%08llX %s\n", sym->n_value(), name);
269 }
270 }
271 else {
272 int32_t count = fDynamicInfo->ntoc();
2fd3f4e8 273 fHashTable.reserve(count); // set initial bucket count
39a8cd10
A
274 const struct dylib_table_of_contents* toc = (dylib_table_of_contents*)&this->fLinkEditBase[fDynamicInfo->tocoff()];
275 for (int32_t i = 0; i < count; ++i) {
276 const uint32_t index = E::get32(toc[i].symbol_index);
277 const macho_nlist<P>* sym = &fSymbolTable[index];
278 const char* name = &fStrings[sym->n_strx()];
412ebb8e 279 fHashTable[name] = runtimeAddressFromNList(sym);
39a8cd10
A
280 //fprintf(stderr, "- 0x%08llX %s\n", sym->n_value(), name);
281 }
bac542e6
A
282 }
283 }
bac542e6
A
284}
285
bac542e6
A
286template <> uint8_t Binder<x86>::pointerRelocSize() { return 2; }
287template <> uint8_t Binder<x86_64>::pointerRelocSize() { return 3; }
39a8cd10 288template <> uint8_t Binder<arm>::pointerRelocSize() { return 2; }
19894a12 289template <> uint8_t Binder<arm64>::pointerRelocSize() { return 3; }
bac542e6 290
bac542e6
A
291template <> uint8_t Binder<x86>::pointerRelocType() { return GENERIC_RELOC_VANILLA; }
292template <> uint8_t Binder<x86_64>::pointerRelocType() { return X86_64_RELOC_UNSIGNED; }
39a8cd10 293template <> uint8_t Binder<arm>::pointerRelocType() { return ARM_RELOC_VANILLA; }
19894a12 294template <> uint8_t Binder<arm64>::pointerRelocType() { return ARM64_RELOC_UNSIGNED; }
bac542e6
A
295
296
297template <typename A>
298const char* Binder<A>::getDylibID() const
299{
300 if ( fDylibID != NULL )
301 return fDylibID->name();
302 else
303 return NULL;
304}
305
306template <typename A>
307const char* Binder<A>::parentUmbrella()
308{
309 if ( fParentUmbrella != NULL )
310 return fParentUmbrella->name();
311 else
312 return NULL;
313}
314
315
412ebb8e
A
316template <typename A>
317bool Binder<A>::isPublicLocation(const char* pth)
318{
319 // /usr/lib is a public location
320 if ( (strncmp(pth, "/usr/lib/", 9) == 0) && (strchr(&pth[9], '/') == NULL) )
321 return true;
322
323 // /System/Library/Frameworks/ is a public location
324 if ( strncmp(pth, "/System/Library/Frameworks/", 27) == 0 ) {
325 const char* frameworkDot = strchr(&pth[27], '.');
326 // but only top level framework
327 // /System/Library/Frameworks/Foo.framework/Versions/A/Foo ==> true
328 // /System/Library/Frameworks/Foo.framework/Resources/libBar.dylib ==> false
329 // /System/Library/Frameworks/Foo.framework/Frameworks/Bar.framework/Bar ==> false
330 // /System/Library/Frameworks/Foo.framework/Frameworks/Xfoo.framework/XFoo ==> false
331 if ( frameworkDot != NULL ) {
332 int frameworkNameLen = frameworkDot - &pth[27];
333 if ( strncmp(&pth[strlen(pth)-frameworkNameLen-1], &pth[26], frameworkNameLen+1) == 0 )
334 return true;
335 }
336 }
337
338 return false;
339}
bac542e6
A
340
341template <typename A>
342void Binder<A>::setDependentBinders(const Map& map)
343{
344 // first pass to build vector of dylibs
345 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)this->fHeader + sizeof(macho_header<P>));
346 const uint32_t cmd_count = this->fHeader->ncmds();
347 const macho_load_command<P>* cmd = cmds;
348 for (uint32_t i = 0; i < cmd_count; ++i) {
349 switch (cmd->cmd()) {
350 case LC_LOAD_DYLIB:
351 case LC_LOAD_WEAK_DYLIB:
352 case LC_REEXPORT_DYLIB:
412ebb8e 353 case LC_LOAD_UPWARD_DYLIB:
bac542e6
A
354 const char* path = ((struct macho_dylib_command<P>*)cmd)->name();
355 typename Map::const_iterator pos = map.find(path);
356 if ( pos != map.end() ) {
357 BinderAndReExportFlag entry;
358 entry.binder = pos->second;
359 entry.reExport = ( cmd->cmd() == LC_REEXPORT_DYLIB );
360 fDependentDylibs.push_back(entry);
361 }
362 else {
363 // the load command string does not match the install name of any loaded dylib
364 // this could happen if there was not a world build and some dylib changed its
365 // install path to be some symlinked path
366
367 // use realpath() and walk map looking for a realpath match
368 bool found = false;
369 char targetPath[PATH_MAX];
370 if ( realpath(path, targetPath) != NULL ) {
371 for(typename Map::const_iterator it=map.begin(); it != map.end(); ++it) {
372 char aPath[PATH_MAX];
373 if ( realpath(it->first, aPath) != NULL ) {
374 if ( strcmp(targetPath, aPath) == 0 ) {
375 BinderAndReExportFlag entry;
376 entry.binder = it->second;
377 entry.reExport = ( cmd->cmd() == LC_REEXPORT_DYLIB );
378 fDependentDylibs.push_back(entry);
379 found = true;
380 fprintf(stderr, "update_dyld_shared_cache: warning mismatched install path in %s for %s\n",
381 this->getDylibID(), path);
382 break;
383 }
384 }
385 }
386 }
832b6fce
A
387 if ( ! found ) {
388 if ( cmd->cmd() == LC_LOAD_WEAK_DYLIB ) {
389 BinderAndReExportFlag entry;
390 entry.binder = NULL;
391 entry.reExport = false;
392 fDependentDylibs.push_back(entry);
393 break;
394 }
395 else {
396 throwf("in %s can't find dylib %s", this->getDylibID(), path);
397 }
398 }
bac542e6
A
399 }
400 break;
401 }
402 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
403 }
404 // handle pre-10.5 re-exports
405 if ( (this->fHeader->flags() & MH_NO_REEXPORTED_DYLIBS) == 0 ) {
406 cmd = cmds;
407 // LC_SUB_LIBRARY means re-export one with matching leaf name
2028a915
A
408 const char* dylibBaseName;
409 const char* frameworkLeafName;
bac542e6
A
410 for (uint32_t i = 0; i < cmd_count; ++i) {
411 switch ( cmd->cmd() ) {
412 case LC_SUB_LIBRARY:
2028a915 413 dylibBaseName = ((macho_sub_library_command<P>*)cmd)->sub_library();
bac542e6
A
414 for (typename std::vector<BinderAndReExportFlag>::iterator it = fDependentDylibs.begin(); it != fDependentDylibs.end(); ++it) {
415 const char* dylibName = it->binder->getDylibID();
416 const char* lastSlash = strrchr(dylibName, '/');
417 const char* leafStart = &lastSlash[1];
418 if ( lastSlash == NULL )
419 leafStart = dylibName;
420 const char* firstDot = strchr(leafStart, '.');
421 int len = strlen(leafStart);
422 if ( firstDot != NULL )
423 len = firstDot - leafStart;
424 if ( strncmp(leafStart, dylibBaseName, len) == 0 )
425 it->reExport = true;
426 }
427 break;
428 case LC_SUB_UMBRELLA:
2028a915 429 frameworkLeafName = ((macho_sub_umbrella_command<P>*)cmd)->sub_umbrella();
bac542e6
A
430 for (typename std::vector<BinderAndReExportFlag>::iterator it = fDependentDylibs.begin(); it != fDependentDylibs.end(); ++it) {
431 const char* dylibName = it->binder->getDylibID();
432 const char* lastSlash = strrchr(dylibName, '/');
433 if ( (lastSlash != NULL) && (strcmp(&lastSlash[1], frameworkLeafName) == 0) )
434 it->reExport = true;
435 }
436 break;
437 }
438 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
439 }
440 // ask dependents if they re-export through me
441 const char* thisName = this->getDylibID();
442 if ( thisName != NULL ) {
443 const char* thisLeafName = strrchr(thisName, '/');
444 if ( thisLeafName != NULL )
445 ++thisLeafName;
446 for (typename std::vector<BinderAndReExportFlag>::iterator it = fDependentDylibs.begin(); it != fDependentDylibs.end(); ++it) {
447 if ( ! it->reExport ) {
2fd3f4e8
A
448 Binder<A>* dep = it->binder;
449 if ( dep != NULL ) {
450 const char* parentUmbrellaName = dep->parentUmbrella();
451 if ( parentUmbrellaName != NULL ) {
452 if ( strcmp(parentUmbrellaName, thisLeafName) == 0 )
453 it->reExport = true;
454 }
bac542e6
A
455 }
456 }
457 }
458 }
459 }
412ebb8e
A
460
461}
462
463template <typename A>
464int Binder<A>::ordinalOfDependentBinder(Binder<A>* dep)
465{
466 for (int i=0; i < fDependentDylibs.size(); ++i) {
467 if ( fDependentDylibs[i].binder == dep )
468 return i+1;
469 }
470 throw "dependend dylib not found";
471}
472
473template <typename A>
474void Binder<A>::hoistPrivateRexports()
475{
476 std::vector<Binder<A>*> privateReExportedDylibs;
477 for (typename std::vector<BinderAndReExportFlag>::iterator it = fDependentDylibs.begin(); it != fDependentDylibs.end(); ++it) {
478 if ( it->reExport && ! isPublicLocation(it->binder->getDylibID()) )
479 privateReExportedDylibs.push_back(it->binder);
480 }
481 if ( privateReExportedDylibs.size() != 0 ) {
482 // parse export info into vector of exports
483 const uint8_t* exportsStart = this->fLayout.getDyldInfoExports();
484 const uint8_t* exportsEnd = &exportsStart[fDyldInfo->export_size()];
485 std::vector<mach_o::trie::Entry> exports;
486 mach_o::trie::parseTrie(exportsStart, exportsEnd, exports);
487 //fprintf(stderr, "%s exports %lu symbols from trie of size %u \n", this->fLayout.getFilePath(), exports.size(), fDyldInfo->export_size());
488
489 // add re-exports for each export from an re-exported dylib
490 for(typename std::vector<Binder<A>*>::iterator it = privateReExportedDylibs.begin(); it != privateReExportedDylibs.end(); ++it) {
491 Binder<A>* binder = *it;
492 int ordinal = ordinalOfDependentBinder(binder);
493 const uint8_t* aDylibsExportsStart = binder->fLayout.getDyldInfoExports();
494 const uint8_t* aDylibsExportsEnd = &aDylibsExportsStart[binder->fDyldInfo->export_size()];
495 std::vector<mach_o::trie::Entry> aDylibsExports;
496 mach_o::trie::parseTrie(aDylibsExportsStart, aDylibsExportsEnd, aDylibsExports);
497 //fprintf(stderr, "%s re-exports %lu symbols from %s\n", this->fLayout.getFilePath(), aDylibsExports.size(), binder->getDylibID());
498 for(std::vector<mach_o::trie::Entry>::iterator eit = aDylibsExports.begin(); eit != aDylibsExports.end(); ++eit) {
499 mach_o::trie::Entry entry = *eit;
500 entry.flags |= EXPORT_SYMBOL_FLAGS_REEXPORT;
501 entry.other = ordinal;
502 entry.importName = NULL;
503 exports.push_back(entry);
504 }
505 }
506 // rebuild new combined trie
507 std::vector<uint8_t> newExportTrieBytes;
508 newExportTrieBytes.reserve(fDyldInfo->export_size());
509 mach_o::trie::makeTrie(exports, newExportTrieBytes);
510 //fprintf(stderr, "%s now exports %lu symbols from trie of size %lu\n", this->fLayout.getFilePath(), exports.size(), newExportTrieBytes.size());
511
512 // allocate new buffer and set export_off to use new buffer instead
513 uint32_t newExportsSize = newExportTrieBytes.size();
514 uint8_t* sideTrie = new uint8_t[newExportsSize];
515 memcpy(sideTrie, &newExportTrieBytes[0], newExportsSize);
516 this->fLayout.setDyldInfoExports(sideTrie);
517 ((macho_dyld_info_command<P>*)fDyldInfo)->set_export_off(0); // invalidate old trie
518 ((macho_dyld_info_command<P>*)fDyldInfo)->set_export_size(newExportsSize);
519 }
bac542e6
A
520}
521
412ebb8e 522
bac542e6 523template <typename A>
412ebb8e 524void Binder<A>::bind(std::vector<void*>& pointersInData)
bac542e6
A
525{
526 this->doSetUpDyldSection();
39a8cd10 527 if ( fDyldInfo != NULL ) {
412ebb8e
A
528 this->doBindDyldInfo(pointersInData);
529 this->doBindDyldLazyInfo(pointersInData);
530 this->hoistPrivateRexports();
39a8cd10
A
531 // weak bind info is processed at launch time
532 }
533 else {
534 this->doBindExternalRelocations();
535 this->doBindIndirectSymbols();
536 this->doSetPreboundUndefines();
537 }
bac542e6
A
538}
539
540
541template <typename A>
542void Binder<A>::doSetUpDyldSection()
543{
544 // find __DATA __dyld section
545 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)this->fHeader + sizeof(macho_header<P>));
546 const uint32_t cmd_count = this->fHeader->ncmds();
547 const macho_load_command<P>* cmd = cmds;
548 for (uint32_t i = 0; i < cmd_count; ++i) {
549 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
550 const macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
551 if ( strcmp(seg->segname(), "__DATA") == 0 ) {
552 const macho_section<P>* const sectionsStart = (macho_section<P>*)((uint8_t*)seg + sizeof(macho_segment_command<P>));
553 const macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
554 for (const macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
555 if ( (strcmp(sect->sectname(), "__dyld") == 0) && (sect->size() >= 2*sizeof(pint_t)) ) {
556 // set two values in __dyld section to point into dyld
557 pint_t* lazyBinder = this->mappedAddressForNewAddress(sect->addr());
558 pint_t* dyldFuncLookup = this->mappedAddressForNewAddress(sect->addr()+sizeof(pint_t));
559 A::P::setP(*lazyBinder, fDyldBaseAddress + 0x1000);
560 A::P::setP(*dyldFuncLookup, fDyldBaseAddress + 0x1008);
561 }
562 }
563 }
564 }
565 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
566 }
567}
568
39a8cd10 569template <typename A>
412ebb8e 570void Binder<A>::bindDyldInfoAt(uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, int libraryOrdinal,
832b6fce 571 int64_t addend, const char* symbolName, bool lazyPointer, bool weakImport, std::vector<void*>& pointersInData)
39a8cd10
A
572{
573 //printf("%d 0x%08llX type=%d, lib=%d, addend=%lld, symbol=%s\n", segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName);
574 const std::vector<MachOLayoutAbstraction::Segment>& segments = this->fLayout.getSegments();
575 if ( segmentIndex > segments.size() )
576 throw "bad segment index in rebase info";
577
578 if ( libraryOrdinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP )
412ebb8e 579 throw "dynamic lookup linkage not allowed in dyld shared cache";
39a8cd10
A
580
581 if ( libraryOrdinal == BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE )
582 throw "linkage to main executable not allowed in dyld shared cache";
583
584 if ( libraryOrdinal < 0 )
585 throw "bad mach-o binary, special library ordinal not allowd in dyld shared cache";
586
587 if ( (unsigned)libraryOrdinal > fDependentDylibs.size() )
588 throw "bad mach-o binary, library ordinal too big";
589
590 Binder<A>* binder;
591 if ( libraryOrdinal == BIND_SPECIAL_DYLIB_SELF )
592 binder = this;
593 else
594 binder = fDependentDylibs[libraryOrdinal-1].binder;
595 pint_t targetSymbolAddress;
2fd3f4e8
A
596 bool isResolverSymbol = false;
597 bool isAbsolute = false;
412ebb8e 598 Binder<A>* foundIn;
832b6fce
A
599 if ( weakImport && (binder == NULL) ) {
600 targetSymbolAddress = 0;
601 foundIn = NULL;
832b6fce
A
602 }
603 else {
2fd3f4e8 604 if ( ! binder->findExportedSymbolAddress(symbolName, &targetSymbolAddress, &foundIn, &isResolverSymbol, &isAbsolute) )
832b6fce
A
605 throwf("could not bind symbol %s in %s expected in %s", symbolName, this->getDylibID(), binder->getDylibID());
606 }
607
412ebb8e
A
608 // don't bind lazy pointers to resolver stubs in shared cache
609 if ( lazyPointer && isResolverSymbol ) {
2fd3f4e8
A
610 if ( foundIn != this ) {
611 // record that this dylib has a lazy pointer to a resolver function
412ebb8e
A
612 foundIn->addResolverClient(this, symbolName);
613 // fprintf(stderr, "have lazy pointer to resolver %s in %s\n", symbolName, this->getDylibID());
614 }
615 return;
616 }
39a8cd10
A
617
618 // do actual update
619 const MachOLayoutAbstraction::Segment& seg = segments[segmentIndex];
620 uint8_t* mappedAddr = (uint8_t*)seg.mappedAddress() + segmentOffset;
621 pint_t* mappedAddrP = (pint_t*)mappedAddr;
622 uint32_t* mappedAddr32 = (uint32_t*)mappedAddr;
623 int32_t svalue32new;
624 switch ( type ) {
625 case BIND_TYPE_POINTER:
626 P::setP(*mappedAddrP, targetSymbolAddress + addend);
627 break;
628
629 case BIND_TYPE_TEXT_ABSOLUTE32:
630 E::set32(*mappedAddr32, targetSymbolAddress + addend);
631 break;
632
633 case BIND_TYPE_TEXT_PCREL32:
634 svalue32new = seg.address() + segmentOffset + 4 - (targetSymbolAddress + addend);
635 E::set32(*mappedAddr32, svalue32new);
636 break;
637
638 default:
639 throw "bad bind type";
640 }
2fd3f4e8
A
641 if ( !isAbsolute )
642 pointersInData.push_back(mappedAddr);
39a8cd10
A
643}
644
645
646
647template <typename A>
412ebb8e 648void Binder<A>::doBindDyldLazyInfo(std::vector<void*>& pointersInData)
39a8cd10
A
649{
650 const uint8_t* p = &this->fLinkEditBase[fDyldInfo->lazy_bind_off()];
651 const uint8_t* end = &p[fDyldInfo->lazy_bind_size()];
652
653 uint8_t type = BIND_TYPE_POINTER;
654 uint64_t segmentOffset = 0;
655 uint8_t segmentIndex = 0;
656 const char* symbolName = NULL;
657 int libraryOrdinal = 0;
658 int64_t addend = 0;
832b6fce 659 bool weakImport = false;
39a8cd10
A
660 while ( p < end ) {
661 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
662 uint8_t opcode = *p & BIND_OPCODE_MASK;
663 ++p;
664 switch (opcode) {
665 case BIND_OPCODE_DONE:
666 // this opcode marks the end of each lazy pointer binding
667 break;
668 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
669 libraryOrdinal = immediate;
670 break;
671 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
672 libraryOrdinal = read_uleb128(p, end);
673 break;
674 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
675 // the special ordinals are negative numbers
676 if ( immediate == 0 )
677 libraryOrdinal = 0;
678 else {
679 int8_t signExtended = BIND_OPCODE_MASK | immediate;
680 libraryOrdinal = signExtended;
681 }
682 break;
683 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
832b6fce 684 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
39a8cd10
A
685 symbolName = (char*)p;
686 while (*p != '\0')
687 ++p;
688 ++p;
689 break;
690 case BIND_OPCODE_SET_ADDEND_SLEB:
691 addend = read_sleb128(p, end);
692 break;
693 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
694 segmentIndex = immediate;
695 segmentOffset = read_uleb128(p, end);
696 break;
697 case BIND_OPCODE_DO_BIND:
832b6fce 698 bindDyldInfoAt(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, true, weakImport, pointersInData);
39a8cd10
A
699 segmentOffset += sizeof(pint_t);
700 break;
701 case BIND_OPCODE_SET_TYPE_IMM:
702 case BIND_OPCODE_ADD_ADDR_ULEB:
703 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
704 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
705 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
706 default:
707 throwf("bad lazy bind opcode %d", *p);
708 }
709 }
710
711
712}
713
714template <typename A>
412ebb8e 715void Binder<A>::doBindDyldInfo(std::vector<void*>& pointersInData)
39a8cd10
A
716{
717 const uint8_t* p = &this->fLinkEditBase[fDyldInfo->bind_off()];
718 const uint8_t* end = &p[fDyldInfo->bind_size()];
719
720 uint8_t type = 0;
721 uint64_t segmentOffset = 0;
722 uint8_t segmentIndex = 0;
723 const char* symbolName = NULL;
724 int libraryOrdinal = 0;
725 int64_t addend = 0;
726 uint32_t count;
727 uint32_t skip;
832b6fce 728 bool weakImport = false;
39a8cd10
A
729 bool done = false;
730 while ( !done && (p < end) ) {
731 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
732 uint8_t opcode = *p & BIND_OPCODE_MASK;
733 ++p;
734 switch (opcode) {
735 case BIND_OPCODE_DONE:
736 done = true;
737 break;
738 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
739 libraryOrdinal = immediate;
740 break;
741 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
742 libraryOrdinal = read_uleb128(p, end);
743 break;
744 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
745 // the special ordinals are negative numbers
746 if ( immediate == 0 )
747 libraryOrdinal = 0;
748 else {
749 int8_t signExtended = BIND_OPCODE_MASK | immediate;
750 libraryOrdinal = signExtended;
751 }
752 break;
753 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
832b6fce 754 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
39a8cd10
A
755 symbolName = (char*)p;
756 while (*p != '\0')
757 ++p;
758 ++p;
759 break;
760 case BIND_OPCODE_SET_TYPE_IMM:
761 type = immediate;
762 break;
763 case BIND_OPCODE_SET_ADDEND_SLEB:
764 addend = read_sleb128(p, end);
765 break;
766 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
767 segmentIndex = immediate;
768 segmentOffset = read_uleb128(p, end);
769 break;
770 case BIND_OPCODE_ADD_ADDR_ULEB:
771 segmentOffset += read_uleb128(p, end);
772 break;
773 case BIND_OPCODE_DO_BIND:
832b6fce 774 bindDyldInfoAt(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, false, weakImport, pointersInData);
39a8cd10
A
775 segmentOffset += sizeof(pint_t);
776 break;
777 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
832b6fce 778 bindDyldInfoAt(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, false, weakImport, pointersInData);
39a8cd10
A
779 segmentOffset += read_uleb128(p, end) + sizeof(pint_t);
780 break;
781 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
832b6fce 782 bindDyldInfoAt(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, false, weakImport, pointersInData);
39a8cd10
A
783 segmentOffset += immediate*sizeof(pint_t) + sizeof(pint_t);
784 break;
785 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
786 count = read_uleb128(p, end);
787 skip = read_uleb128(p, end);
788 for (uint32_t i=0; i < count; ++i) {
832b6fce 789 bindDyldInfoAt(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, false, weakImport, pointersInData);
39a8cd10
A
790 segmentOffset += skip + sizeof(pint_t);
791 }
792 break;
793 default:
794 throwf("bad bind opcode %d", *p);
795 }
796 }
797
798
799
800}
801
bac542e6
A
802
803template <typename A>
804void Binder<A>::doSetPreboundUndefines()
805{
806 const macho_dysymtab_command<P>* dysymtab = NULL;
807 macho_nlist<P>* symbolTable = NULL;
808
809 // get symbol table info
810 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)this->fHeader + sizeof(macho_header<P>));
811 const uint32_t cmd_count = this->fHeader->ncmds();
812 const macho_load_command<P>* cmd = cmds;
813 for (uint32_t i = 0; i < cmd_count; ++i) {
814 switch (cmd->cmd()) {
815 case LC_SYMTAB:
816 {
817 const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
818 symbolTable = (macho_nlist<P>*)(&this->fLinkEditBase[symtab->symoff()]);
819 }
820 break;
821 case LC_DYSYMTAB:
822 dysymtab = (macho_dysymtab_command<P>*)cmd;
823 break;
824 }
825 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
826 }
827
828 // walk all undefines and set their prebound n_value
829 macho_nlist<P>* const lastUndefine = &symbolTable[dysymtab->iundefsym()+dysymtab->nundefsym()];
830 for (macho_nlist<P>* entry = &symbolTable[dysymtab->iundefsym()]; entry < lastUndefine; ++entry) {
831 if ( entry->n_type() & N_EXT ) {
bac542e6
A
832 //fprintf(stderr, "doSetPreboundUndefines: r_sym=%s, pbaddr=0x%08X, in %s\n",
833 // &fStrings[entry->n_strx()], pbaddr, this->getDylibID());
39a8cd10 834 pint_t pbaddr = this->resolveUndefined(entry);
bac542e6
A
835 entry->set_n_value(pbaddr);
836 }
837 }
838}
839
840
841template <typename A>
842void Binder<A>::doBindExternalRelocations()
843{
844 // get where reloc addresses start
845 // these address are always relative to first writable segment because they are in cache which always
846 // has writable segments far from read-only segments
847 pint_t firstWritableSegmentBaseAddress = 0;
848 const std::vector<MachOLayoutAbstraction::Segment>& segments = this->fLayout.getSegments();
849 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
850 const MachOLayoutAbstraction::Segment& seg = *it;
851 if ( seg.writable() ) {
852 firstWritableSegmentBaseAddress = seg.newAddress();
853 break;
854 }
855 }
856
857 // loop through all external relocation records and bind each
858 const macho_relocation_info<P>* const relocsStart = (macho_relocation_info<P>*)(&this->fLinkEditBase[fDynamicInfo->extreloff()]);
859 const macho_relocation_info<P>* const relocsEnd = &relocsStart[fDynamicInfo->nextrel()];
860 for (const macho_relocation_info<P>* reloc=relocsStart; reloc < relocsEnd; ++reloc) {
861 if ( reloc->r_length() != pointerRelocSize() )
862 throw "bad external relocation length";
863 if ( reloc->r_type() != pointerRelocType() )
864 throw "unknown external relocation type";
865 if ( reloc->r_pcrel() )
866 throw "r_pcrel external relocaiton not supported";
867
868 const macho_nlist<P>* undefinedSymbol = &fSymbolTable[reloc->r_symbolnum()];
869 pint_t* location;
870 try {
412ebb8e 871 location = this->mappedAddressForNewAddress(reloc->r_address() + firstWritableSegmentBaseAddress);
bac542e6
A
872 }
873 catch (const char* msg) {
874 throwf("%s processesing external relocation r_address 0x%08X", msg, reloc->r_address());
875 }
876 pint_t addend = P::getP(*location);
877 if ( fOriginallyPrebound ) {
878 // in a prebound binary, the n_value field of an undefined symbol is set to the address where the symbol was found when prebound
879 // so, subtracting that gives the initial displacement which we need to add to the newly found symbol address
880 // if mach-o relocation structs had an "addend" field this complication would not be necessary.
881 addend -= undefinedSymbol->n_value();
882 // To further complicate things, if this is defined symbol, then its n_value has already been adjust to the
883 // new base address, so we need to back off the slide too..
884 if ( (undefinedSymbol->n_type() & N_TYPE) == N_SECT ) {
885 addend += this->getSlideForNewAddress(undefinedSymbol->n_value());
886 }
887 }
888 pint_t symbolAddr = this->resolveUndefined(undefinedSymbol);
889 //fprintf(stderr, "external reloc: r_address=0x%08X, r_sym=%s, symAddr=0x%08llX, addend=0x%08llX in %s\n",
890 // reloc->r_address(), &fStrings[undefinedSymbol->n_strx()], (uint64_t)symbolAddr, (uint64_t)addend, this->getDylibID());
891 P::setP(*location, symbolAddr + addend);
892 }
893}
894
895
896// most architectures use pure code, unmodifiable stubs
897template <typename A>
898void Binder<A>::bindStub(uint8_t elementSize, uint8_t* location, pint_t vmlocation, pint_t value)
899{
900 // do nothing
901}
902
903// x86 supports fast stubs
904template <>
905void Binder<x86>::bindStub(uint8_t elementSize, uint8_t* location, pint_t vmlocation, pint_t value)
906{
907 // if the stub is not 5-bytes, it is an old slow stub
908 if ( elementSize == 5 ) {
909 uint32_t rel32 = value - (vmlocation + 5);
910 location[0] = 0xE9; // JMP rel32
911 location[1] = rel32 & 0xFF;
912 location[2] = (rel32 >> 8) & 0xFF;
913 location[3] = (rel32 >> 16) & 0xFF;
914 location[4] = (rel32 >> 24) & 0xFF;
915 }
916}
917
918template <typename A>
919void Binder<A>::doBindIndirectSymbols()
920{
921 const uint32_t* const indirectTable = (uint32_t*)&this->fLinkEditBase[fDynamicInfo->indirectsymoff()];
922 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)this->fHeader + sizeof(macho_header<P>));
923 const uint32_t cmd_count = this->fHeader->ncmds();
924 const macho_load_command<P>* cmd = cmds;
39a8cd10 925 //fprintf(stderr, "doBindIndirectSymbols() %s\n", this->fLayout.getFilePath());
bac542e6
A
926 for (uint32_t i = 0; i < cmd_count; ++i) {
927 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
928 const macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
929 const macho_section<P>* const sectionsStart = (macho_section<P>*)((uint8_t*)seg + sizeof(macho_segment_command<P>));
930 const macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
931 for (const macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
932 uint8_t elementSize = 0;
933 uint8_t sectionType = sect->flags() & SECTION_TYPE;
934 switch ( sectionType ) {
935 case S_SYMBOL_STUBS:
936 elementSize = sect->reserved2();
937 break;
938 case S_NON_LAZY_SYMBOL_POINTERS:
939 case S_LAZY_SYMBOL_POINTERS:
940 elementSize = sizeof(pint_t);
941 break;
942 }
943 if ( elementSize != 0 ) {
944 uint32_t elementCount = sect->size() / elementSize;
945 const uint32_t indirectTableOffset = sect->reserved1();
946 uint8_t* location = NULL;
947 if ( sect->size() != 0 )
948 location = (uint8_t*)this->mappedAddressForNewAddress(sect->addr());
949 pint_t vmlocation = sect->addr();
950 for (uint32_t j=0; j < elementCount; ++j, location += elementSize, vmlocation += elementSize) {
951 uint32_t symbolIndex = E::get32(indirectTable[indirectTableOffset + j]);
952 switch ( symbolIndex ) {
953 case INDIRECT_SYMBOL_ABS:
954 case INDIRECT_SYMBOL_LOCAL:
955 break;
956 default:
957 const macho_nlist<P>* undefinedSymbol = &fSymbolTable[symbolIndex];
39a8cd10 958 //fprintf(stderr, " sect=%s, index=%d, symbolIndex=%d, sym=%s\n", sect->sectname(), j, symbolIndex, &fStrings[undefinedSymbol->n_strx()]);
bac542e6
A
959 pint_t symbolAddr = this->resolveUndefined(undefinedSymbol);
960 switch ( sectionType ) {
961 case S_NON_LAZY_SYMBOL_POINTERS:
962 case S_LAZY_SYMBOL_POINTERS:
963 P::setP(*((pint_t*)location), symbolAddr);
964 break;
965 case S_SYMBOL_STUBS:
966 this->bindStub(elementSize, location, vmlocation, symbolAddr);
967 break;
968 }
969 break;
970 }
971 }
972 }
973 }
974 }
975 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
976 }
977}
978
979
980
981
982template <typename A>
983typename A::P::uint_t Binder<A>::resolveUndefined(const macho_nlist<P>* undefinedSymbol)
984{
985 if ( (undefinedSymbol->n_type() & N_TYPE) == N_SECT ) {
986 if ( (undefinedSymbol->n_type() & N_PEXT) != 0 ) {
987 // is a multi-module private_extern internal reference that the linker did not optimize away
412ebb8e 988 return runtimeAddressFromNList(undefinedSymbol);
bac542e6
A
989 }
990 if ( (undefinedSymbol->n_desc() & N_WEAK_DEF) != 0 ) {
991 // is a weak definition, we should prebind to this one in the same linkage unit
412ebb8e 992 return runtimeAddressFromNList(undefinedSymbol);
bac542e6
A
993 }
994 }
995 const char* symbolName = &fStrings[undefinedSymbol->n_strx()];
996 if ( (this->fHeader->flags() & MH_TWOLEVEL) == 0 ) {
997 // flat namespace binding
998 throw "flat namespace not supported";
999 }
1000 else {
1001 uint8_t ordinal = GET_LIBRARY_ORDINAL(undefinedSymbol->n_desc());
1002 Binder<A>* binder = NULL;
1003 switch ( ordinal ) {
1004 case EXECUTABLE_ORDINAL:
1005 case DYNAMIC_LOOKUP_ORDINAL:
1006 throw "magic ordineal not supported";
1007 case SELF_LIBRARY_ORDINAL:
1008 binder = this;
1009 break;
1010 default:
1011 if ( ordinal > fDependentDylibs.size() )
1012 throw "two-level ordinal out of range";
1013 binder = fDependentDylibs[ordinal-1].binder;
1014 }
39a8cd10 1015 pint_t addr;
412ebb8e 1016 bool isResolver;
2fd3f4e8 1017 bool isAbsolute;
412ebb8e 1018 Binder<A>* foundIn;
2fd3f4e8 1019 if ( ! binder->findExportedSymbolAddress(symbolName, &addr, &foundIn, &isResolver, &isAbsolute) )
412ebb8e 1020 throwf("could not resolve undefined symbol %s in %s expected in %s", symbolName, this->getDylibID(), binder->getDylibID());
39a8cd10 1021 return addr;
bac542e6
A
1022 }
1023}
1024
1025template <typename A>
2fd3f4e8 1026bool Binder<A>::findExportedSymbolAddress(const char* name, pint_t* result, Binder<A>** foundIn, bool* isResolverSymbol, bool* isAbsolute)
bac542e6 1027{
412ebb8e
A
1028 *foundIn = NULL;
1029 // since re-export chains can be any length, re-exports cannot be resolved in setDependencies()
1030 // instead we lazily, recursively update
1031 if ( !fReExportedSymbolsResolved ) {
1032
1033 // update fHashTable with any individual symbol re-exports
1034 for (typename std::vector<SymbolReExport>::iterator it=fReExportedSymbols.begin(); it != fReExportedSymbols.end(); ++it) {
1035 pint_t targetSymbolAddress;
1036 bool isResolver;
2fd3f4e8 1037 bool isAb;
412ebb8e
A
1038
1039 if ( it->dylibOrdinal <= 0 )
1040 throw "bad mach-o binary, special library ordinal not allowed in re-exported symbols in dyld shared cache";
1041
1042 Binder<A>* binder = fDependentDylibs[it->dylibOrdinal-1].binder;
1043
2fd3f4e8 1044 if ( ! binder->findExportedSymbolAddress(it->importName, &targetSymbolAddress, foundIn, &isResolver, &isAb) )
412ebb8e
A
1045 throwf("could not bind symbol %s in %s expected in %s", it->importName, this->getDylibID(), binder->getDylibID());
1046
2fd3f4e8
A
1047 if ( isResolver )
1048 fSymbolResolvers.insert(name);
412ebb8e
A
1049
1050 fHashTable[it->exportName] = targetSymbolAddress;
1051 }
1052 // mark as done
1053 fReExportedSymbolsResolved = true;
1054 }
1055
1056 *isResolverSymbol = false;
1057 if ( !fSymbolResolvers.empty() && fSymbolResolvers.count(name) ) {
1058 // lazy pointers should be left unbound, rather than bind to resolver stub
1059 *isResolverSymbol = true;
1060 }
1061
2fd3f4e8 1062 // search this dylib
39a8cd10
A
1063 typename NameToAddrMap::iterator pos = fHashTable.find(name);
1064 if ( pos != fHashTable.end() ) {
1065 *result = pos->second;
1066 //fprintf(stderr, "findExportedSymbolAddress(%s) => 0x%08llX in %s\n", name, (uint64_t)*result, this->getDylibID());
412ebb8e 1067 *foundIn = this;
2fd3f4e8 1068 *isAbsolute = (fAbsoluteSymbols.count(name) != 0);
39a8cd10
A
1069 return true;
1070 }
bac542e6 1071
2fd3f4e8 1072 // search re-exported dylibs
bac542e6
A
1073 for (typename std::vector<BinderAndReExportFlag>::iterator it = fDependentDylibs.begin(); it != fDependentDylibs.end(); ++it) {
1074 if ( it->reExport ) {
2fd3f4e8 1075 if ( it->binder->findExportedSymbolAddress(name, result, foundIn, isResolverSymbol, isAbsolute) )
39a8cd10 1076 return true;
bac542e6
A
1077 }
1078 }
39a8cd10
A
1079 //fprintf(stderr, "findExportedSymbolAddress(%s) => not found in %s\n", name, this->getDylibID());
1080 return false;
bac542e6
A
1081}
1082
412ebb8e
A
1083// record which dylibs will be using this dylibs lazy pointer
1084template <typename A>
1085void Binder<A>::addResolverClient(Binder<A>* clientDylib, const char* symbolName)
1086{
1087 ClientAndSymbol x;
1088 x.client = clientDylib;
1089 x.symbolName = symbolName;
1090 fClientAndSymbols.push_back(x);
1091}
1092
412ebb8e
A
1093
1094template <typename A>
1095typename A::P::uint_t Binder<A>::findLazyPointerFor(const char* symbolName)
1096{
2fd3f4e8
A
1097 static const bool log = false;
1098
1099 // first check cache
1100 typename NameToAddrMap::iterator pos = fResolverLazyPointers.find(symbolName);
1101 if ( pos != fResolverLazyPointers.end() ) {
1102 if ( log ) fprintf(stderr, "found cached shared lazy pointer at 0x%llX for %s in %s\n", (uint64_t)(pos->second), symbolName, this->getDylibID());
1103 return pos->second;
1104 }
1105
1106 // do slow lookup in lazy pointer section
1107 const uint32_t* const indirectTable = (uint32_t*)&this->fLinkEditBase[fDynamicInfo->indirectsymoff()];
1108 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)this->fHeader + sizeof(macho_header<P>));
1109 const uint32_t cmd_count = this->fHeader->ncmds();
1110 const macho_load_command<P>* cmd = cmds;
1111 for (uint32_t i = 0; i < cmd_count; ++i) {
1112 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1113 const macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
1114 const macho_section<P>* const sectionsStart = (macho_section<P>*)((uint8_t*)seg + sizeof(macho_segment_command<P>));
1115 const macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
1116 for (const macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
1117 uint8_t sectionType = sect->flags() & SECTION_TYPE;
1118 if ( sectionType == S_LAZY_SYMBOL_POINTERS) {
1119 uint32_t elementCount = sect->size() / sizeof(pint_t);
1120 const uint32_t indirectTableOffset = sect->reserved1();
1121 pint_t vmlocation = sect->addr();
1122 for (uint32_t j=0; j < elementCount; ++j, vmlocation += sizeof(pint_t)) {
1123 uint32_t symbolIndex = E::get32(indirectTable[indirectTableOffset + j]);
1124 switch ( symbolIndex ) {
1125 case INDIRECT_SYMBOL_ABS:
1126 case INDIRECT_SYMBOL_LOCAL:
1127 break;
1128 default:
1129 const macho_nlist<P>* aSymbol = &fSymbolTable[symbolIndex];
1130 const char* aName = &fStrings[aSymbol->n_strx()];
1131 //fprintf(stderr, " sect=%s, index=%d, symbolIndex=%d, sym=%s\n", sect->sectname(), j, symbolIndex, &fStrings[undefinedSymbol->n_strx()]);
1132 if ( strcmp(aName, symbolName) == 0 ) {
1133 fResolverLazyPointers[symbolName] = vmlocation;
1134 if ( log ) fprintf(stderr, "found slow-path shared lazy pointer at 0x%llX for %s in %s\n", (uint64_t)vmlocation, symbolName, this->getDylibID());
1135 return vmlocation;
1136 }
1137 break;
1138 }
1139 }
1140 }
1141 }
1142 }
1143 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
1144 }
19894a12
A
1145
1146 if ( log ) fprintf(stderr, "not found shared lazy pointer for %s in %s, checking for re-export symbol\n", symbolName, this->getDylibID());
1147 for (typename std::vector<SymbolReExport>::iterator it=fReExportedSymbols.begin(); it != fReExportedSymbols.end(); ++it) {
1148 if ( strcmp(it->exportName, symbolName) != 0 )
1149 continue;
1150
1151 if ( it->dylibOrdinal <= 0 )
1152 throw "bad mach-o binary, special library ordinal not allowed in re-exported symbols in dyld shared cache";
1153
1154 Binder<A>* binder = fDependentDylibs[it->dylibOrdinal-1].binder;
1155 return binder->findLazyPointerFor(it->importName);
1156 }
1157
1158 if ( log ) fprintf(stderr, "not found shared lazy pointer for %s in %s, checking re-export dylibs\n", symbolName, this->getDylibID());
1159 for (typename std::vector<BinderAndReExportFlag>::iterator it = fDependentDylibs.begin(); it != fDependentDylibs.end(); ++it) {
1160 if ( it->reExport ) {
1161 pint_t result = it->binder->findLazyPointerFor(symbolName);
1162 if ( result != 0 )
1163 return result;
1164 }
1165 }
1166
2fd3f4e8 1167 if ( log ) fprintf(stderr, "NOT found shared lazy pointer for %s in %s\n", symbolName, this->getDylibID());
412ebb8e
A
1168 return 0;
1169}
1170
1171// called after all binding is done to optimize lazy pointers
1172template <typename A>
1173void Binder<A>::optimize()
1174{
1175 for (typename std::vector<ClientAndSymbol>::iterator it = fClientAndSymbols.begin(); it != fClientAndSymbols.end(); ++it) {
1176 pint_t lpVMAddr = findLazyPointerFor(it->symbolName);
1177 if ( lpVMAddr != 0 ) {
1178 it->client->optimizeStub(it->symbolName, lpVMAddr);
1179 }
1180 else {
19894a12 1181 fprintf(stderr, "not able to optimize lazy pointer for %s in %s\n", it->symbolName, it->client->getDylibID());
412ebb8e
A
1182 }
1183
1184 }
1185}
1186
1187template <>
1188void Binder<arm>::optimizeStub(uint8_t* stubMappedAddress, pint_t stubVMAddress, uint32_t stubSize, pint_t lpVMAddr)
1189{
1190 if ( stubSize != 16 ) {
1191 fprintf(stderr, "could not optimize ARM stub to resolver function in %s because it is wrong size\n", this->getDylibID());
1192 return;
1193 }
1194 uint32_t* instructions = (uint32_t*)stubMappedAddress;
1195 if ( (E::get32(instructions[0]) != 0xe59fc004)
1196 || (E::get32(instructions[1]) != 0xe08fc00c)
1197 || (E::get32(instructions[2]) != 0xe59cf000)
1198 ) {
1199 fprintf(stderr, "could not optimize ARM stub to resolver function in %s because instructions are not as expected\n", this->getDylibID());
1200 return;
1201 }
1202 // last .long in stub is: lazyPtr - (stub+8)
1203 // alter to point to more optimal lazy pointer
1204 uint32_t betterOffset = lpVMAddr - (stubVMAddress + 12);
1205 E::set32(instructions[3], betterOffset);
1206}
1207
1208
1209template <>
1210void Binder<x86_64>::optimizeStub(uint8_t* stubMappedAddress, pint_t stubVMAddress, uint32_t stubSize, pint_t lpVMAddr)
1211{
1212 if ( stubSize != 6 ) {
1213 fprintf(stderr, "could not optimize x86_64 stub to resolver function in %s because it is wrong size\n", this->getDylibID());
1214 return;
1215 }
1216 if ( (stubMappedAddress[0] != 0xFF) || (stubMappedAddress[1] != 0x25) ) {
1217 fprintf(stderr, "could not optimize stub to resolver function in %s because instructions are not as expected\n", this->getDylibID());
1218 return;
1219 }
1220 // last four bytes in stub is RIP relative offset to lazy pointer
1221 // alter to point to more optimal lazy pointer
1222 uint32_t betterOffset = lpVMAddr - (stubVMAddress + 6);
1223 E::set32(*((uint32_t*)(&stubMappedAddress[2])), betterOffset);
1224}
1225
1226template <typename A>
1227void Binder<A>::optimizeStub(uint8_t* stubMappedAddress, pint_t stubVMAddress, uint32_t stubSize, pint_t lpVMAddress)
1228{
1229 // Remaining architectures are not optimized
1230 //fprintf(stderr, "optimize stub at %p in %s to use lazyPointer at 0x%llX\n", stubMappedAddress, this->getDylibID(), (uint64_t)lpVMAddress);
1231}
1232
1233// search for stub in this image that call target symbol name and then optimize its lazy pointer
1234template <typename A>
1235void Binder<A>::optimizeStub(const char* stubName, pint_t lpVMAddr)
1236{
1237 // find named stub
1238 const uint32_t* const indirectTable = (uint32_t*)&this->fLinkEditBase[fDynamicInfo->indirectsymoff()];
1239 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)this->fHeader + sizeof(macho_header<P>));
1240 const uint32_t cmd_count = this->fHeader->ncmds();
1241 const macho_load_command<P>* cmd = cmds;
1242 for (uint32_t i = 0; i < cmd_count; ++i) {
1243 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1244 macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
1245 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
1246 macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
1247 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
1248 if ( ((sect->flags() & SECTION_TYPE) == S_SYMBOL_STUBS) && (sect->size() != 0) ) {
1249 pint_t stubsVMStart = sect->addr();
1250 uint8_t* stubsMappingStart = (uint8_t*)this->mappedAddressForNewAddress(stubsVMStart);
1251 const uint32_t indirectTableOffset = sect->reserved1();
1252 const uint32_t stubSize = sect->reserved2();
1253 uint32_t elementCount = sect->size() / stubSize;
1254 pint_t stubVMAddr = stubsVMStart;
1255 uint8_t* stubMappedAddr = stubsMappingStart;
1256 for (uint32_t j=0; j < elementCount; ++j, stubMappedAddr += stubSize, stubVMAddr += stubSize) {
1257 uint32_t symbolIndex = E::get32(indirectTable[indirectTableOffset + j]);
1258 switch ( symbolIndex ) {
1259 case INDIRECT_SYMBOL_ABS:
1260 case INDIRECT_SYMBOL_LOCAL:
1261 break;
1262 default:
1263 {
1264 const macho_nlist<P>* sym = &this->fSymbolTable[symbolIndex];
1265 const char* symName = &fStrings[sym->n_strx()];
1266 if ( strcmp(symName, stubName) == 0 )
1267 this->optimizeStub(stubMappedAddr, stubVMAddr, stubSize, lpVMAddr);
1268 }
1269 break;
1270 }
1271 }
1272 }
1273 }
1274 }
1275 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
1276 }
1277}
bac542e6
A
1278
1279
1280#endif // __MACHO_BINDER__
1281
1282
1283
1284