dyld-732.8.tar.gz
[apple/dyld.git] / dyld3 / shared-cache / OptimizerBranches.cpp
1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*-
2 *
3 * Copyright (c) 2015 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <sys/mman.h>
29 #include <limits.h>
30 #include <stdarg.h>
31 #include <stdio.h>
32 #include <unistd.h>
33 #include <CommonCrypto/CommonDigest.h>
34
35 #include <string>
36 #include <unordered_map>
37 #include <unordered_set>
38
39 #include "StringUtils.h"
40 #include "Trie.hpp"
41 #include "MachOFileAbstraction.hpp"
42 #include "MachOAnalyzer.h"
43 #include "Diagnostics.h"
44 #include "DyldSharedCache.h"
45 #include "CacheBuilder.h"
46
47 static const bool verbose = false;
48
49
50
51
52 template <typename P>
53 class StubOptimizer {
54 public:
55 StubOptimizer(const DyldSharedCache* cache, macho_header<P>* mh, Diagnostics& diags);
56 void buildStubMap(const std::unordered_set<std::string>& neverStubEliminate);
57 void optimizeStubs();
58 void optimizeCallSites(std::unordered_map<uint64_t, uint64_t>& targetAddrToOptStubAddr);
59 const char* installName() { return _installName; }
60 const uint8_t* exportsTrie() {
61 if ( _dyldInfo != nullptr )
62 return &_linkeditBias[_dyldInfo->export_off()];
63 else
64 return &_linkeditBias[_exportTrie->dataoff()];
65 }
66 uint32_t exportsTrieSize() {
67 if ( _dyldInfo != nullptr )
68 return _dyldInfo->export_size();
69 else
70 return _exportTrie->datasize();
71 }
72
73 uint32_t _stubCount = 0;
74 uint32_t _stubOptimizedCount = 0;
75 uint32_t _stubsLeftInterposable = 0;
76 uint32_t _branchToStubCount = 0;
77 uint32_t _branchOptimizedToDirectCount = 0;
78 uint32_t _branchToOptimizedStubCount = 0;
79 uint32_t _branchToReUsedOptimizedStubCount = 0;
80
81 private:
82 Diagnostics _diagnostics;
83
84 typedef std::function<bool(uint8_t callSiteKind, uint64_t callSiteAddr, uint64_t stubAddr, uint32_t& instruction)> CallSiteHandler;
85 typedef typename P::uint_t pint_t;
86 typedef typename P::E E;
87
88 void forEachCallSiteToAStub(CallSiteHandler);
89 void optimizeArm64CallSites(std::unordered_map<uint64_t, uint64_t>& targetAddrToOptStubAddr);
90 void optimizeArm64Stubs();
91 #if SUPPORT_ARCH_arm64e
92 void optimizeArm64eStubs();
93 #endif
94 #if SUPPORT_ARCH_arm64_32
95 void optimizeArm64_32Stubs();
96 #endif
97 void optimizeArmCallSites(std::unordered_map<uint64_t, uint64_t>& targetAddrToOptStubAddr);
98 void optimizeArmStubs();
99 uint64_t lazyPointerAddrFromArm64Stub(const uint8_t* stubInstructions, uint64_t stubVMAddr);
100 #if SUPPORT_ARCH_arm64e
101 uint64_t lazyPointerAddrFromArm64eStub(const uint8_t* stubInstructions, uint64_t stubVMAddr);
102 #endif
103 #if SUPPORT_ARCH_arm64_32
104 uint64_t lazyPointerAddrFromArm64_32Stub(const uint8_t* stubInstructions, uint64_t stubVMAddr);
105 #endif
106 uint32_t lazyPointerAddrFromArmStub(const uint8_t* stubInstructions, uint32_t stubVMAddr);
107 int32_t getDisplacementFromThumbBranch(uint32_t instruction, uint32_t instrAddr);
108 uint32_t setDisplacementInThumbBranch(uint32_t instruction, uint32_t instrAddr,
109 int32_t displacement, bool targetIsThumb);
110
111
112 struct AddressAndName { pint_t targetVMAddr; const char* targetName; };
113 typedef std::unordered_map<pint_t, AddressAndName> StubVMAddrToTarget;
114
115 static const int64_t b128MegLimit = 0x07FFFFFF;
116 static const int64_t b16MegLimit = 0x00FFFFFF;
117
118
119
120 macho_header<P>* _mh;
121 int64_t _cacheSlide = 0;
122 uint64_t _cacheUnslideAddr = 0;
123 bool _chainedFixups = false;
124 uint32_t _linkeditSize = 0;
125 uint64_t _linkeditAddr = 0;
126 const uint8_t* _linkeditBias = nullptr;
127 const char* _installName = nullptr;
128 const macho_symtab_command<P>* _symTabCmd = nullptr;
129 const macho_dysymtab_command<P>* _dynSymTabCmd = nullptr;
130 const macho_dyld_info_command<P>* _dyldInfo = nullptr;
131 const macho_linkedit_data_command<P>* _exportTrie = nullptr;
132 macho_linkedit_data_command<P>* _splitSegInfoCmd = nullptr;
133 const macho_section<P>* _textSection = nullptr;
134 const macho_section<P>* _stubSection = nullptr;
135 uint32_t _textSectionIndex = 0;
136 uint32_t _stubSectionIndex = 0;
137 pint_t _textSegStartAddr = 0;
138 std::vector<macho_segment_command<P>*> _segCmds;
139 std::unordered_map<pint_t, pint_t> _stubAddrToLPAddr;
140 std::unordered_map<pint_t, pint_t> _lpAddrToTargetAddr;
141 std::unordered_map<pint_t, const char*> _targetAddrToName;
142 std::unordered_set<uint64_t> _stubsToOptimize;
143 };
144
145
146 template <typename P>
147 StubOptimizer<P>::StubOptimizer(const DyldSharedCache* cache, macho_header<P>* mh, Diagnostics& diags)
148 : _mh(mh), _diagnostics(diags)
149 {
150 _cacheSlide = (long)cache - cache->unslidLoadAddress();
151 _cacheUnslideAddr = cache->unslidLoadAddress();
152 #if SUPPORT_ARCH_arm64e
153 _chainedFixups = (strcmp(cache->archName(), "arm64e") == 0);
154 #else
155 _chainedFixups = false;
156 #endif
157 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
158 const uint32_t cmd_count = mh->ncmds();
159 macho_segment_command<P>* segCmd;
160 uint32_t sectionIndex = 0;
161 const macho_load_command<P>* cmd = cmds;
162 for (uint32_t i = 0; i < cmd_count; ++i) {
163 switch (cmd->cmd()) {
164 case LC_ID_DYLIB:
165 _installName = ((macho_dylib_command<P>*)cmd)->name();
166 break;
167 case LC_SYMTAB:
168 _symTabCmd = (macho_symtab_command<P>*)cmd;
169 break;
170 case LC_DYSYMTAB:
171 _dynSymTabCmd = (macho_dysymtab_command<P>*)cmd;
172 break;
173 case LC_SEGMENT_SPLIT_INFO:
174 _splitSegInfoCmd = (macho_linkedit_data_command<P>*)cmd;
175 break;
176 case LC_DYLD_INFO:
177 case LC_DYLD_INFO_ONLY:
178 _dyldInfo = (macho_dyld_info_command<P>*)cmd;
179 break;
180 case LC_DYLD_EXPORTS_TRIE:
181 _exportTrie = (macho_linkedit_data_command<P>*)cmd;
182 break;
183 case macho_segment_command<P>::CMD:
184 segCmd =( macho_segment_command<P>*)cmd;
185 _segCmds.push_back(segCmd);
186 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
187 _linkeditBias = (uint8_t*)(segCmd->vmaddr() + _cacheSlide - segCmd->fileoff());
188 _linkeditSize = (uint32_t)segCmd->vmsize();
189 _linkeditAddr = segCmd->vmaddr();
190 }
191 else if ( strcmp(segCmd->segname(), "__TEXT") == 0 ) {
192 _textSegStartAddr = (pint_t)segCmd->vmaddr();
193 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
194 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
195 for (const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
196 ++sectionIndex;
197 if ( strcmp(sect->sectname(), "__text") == 0 ) {
198 _textSection = sect;
199 _textSectionIndex = sectionIndex;
200 }
201 else if ( ((sect->flags() & SECTION_TYPE) == S_SYMBOL_STUBS) && (sect->size() != 0) ) {
202 _stubSection = sect;
203 _stubSectionIndex = sectionIndex;
204 }
205 }
206 }
207 break;
208 }
209 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
210 }
211 }
212
213
214
215 template <typename P>
216 uint32_t StubOptimizer<P>::lazyPointerAddrFromArmStub(const uint8_t* stubInstructions, uint32_t stubVMAddr)
217 {
218 uint32_t stubInstr1 = E::get32(*(uint32_t*)stubInstructions);
219 uint32_t stubInstr2 = E::get32(*(uint32_t*)(stubInstructions+4));
220 uint32_t stubInstr3 = E::get32(*(uint32_t*)(stubInstructions+8));
221 int32_t stubData = E::get32(*(uint32_t*)(stubInstructions+12));
222 if ( stubInstr1 != 0xe59fc004 ) {
223 _diagnostics.warning("first instruction of stub (0x%08X) is not 'ldr ip, pc + 12' for stub at addr 0x%0llX in %s",
224 stubInstr1, (uint64_t)stubVMAddr, _installName);
225 return 0;
226 }
227 if ( stubInstr2 != 0xe08fc00c ) {
228 _diagnostics.warning("second instruction of stub (0x%08X) is not 'add ip, pc, ip' for stub at addr 0x%0llX in %s",
229 stubInstr1, (uint64_t)stubVMAddr, _installName);
230 return 0;
231 }
232 if ( stubInstr3 != 0xe59cf000 ) {
233 _diagnostics.warning("third instruction of stub (0x%08X) is not 'ldr pc, [ip]' for stub at addr 0x%0llX in %s",
234 stubInstr1, (uint64_t)stubVMAddr, _installName);
235 return 0;
236 }
237 return stubVMAddr + 12 + stubData;
238 }
239
240
241 template <typename P>
242 uint64_t StubOptimizer<P>::lazyPointerAddrFromArm64Stub(const uint8_t* stubInstructions, uint64_t stubVMAddr)
243 {
244 uint32_t stubInstr1 = E::get32(*(uint32_t*)stubInstructions);
245 if ( (stubInstr1 & 0x9F00001F) != 0x90000010 ) {
246 _diagnostics.warning("first instruction of stub (0x%08X) is not ADRP for stub at addr 0x%0llX in %s",
247 stubInstr1, (uint64_t)stubVMAddr, _installName);
248 return 0;
249 }
250 int32_t adrpValue = ((stubInstr1 & 0x00FFFFE0) >> 3) | ((stubInstr1 & 0x60000000) >> 29);
251 if ( stubInstr1 & 0x00800000 )
252 adrpValue |= 0xFFF00000;
253 uint32_t stubInstr2 = E::get32(*(uint32_t*)(stubInstructions + 4));
254 if ( (stubInstr2 & 0xFFC003FF) != 0xF9400210 ) {
255 _diagnostics.warning("second instruction of stub (0x%08X) is not LDR for stub at addr 0x%0llX in %s",
256 stubInstr2, (uint64_t)stubVMAddr, _installName);
257 return 0;
258 }
259 uint32_t ldrValue = ((stubInstr2 >> 10) & 0x00000FFF);
260 return (stubVMAddr & (-4096)) + adrpValue*4096 + ldrValue*8;
261 }
262
263 #if SUPPORT_ARCH_arm64_32
264 template <typename P>
265 uint64_t StubOptimizer<P>::lazyPointerAddrFromArm64_32Stub(const uint8_t* stubInstructions, uint64_t stubVMAddr)
266 {
267 uint32_t stubInstr1 = E::get32(*(uint32_t*)stubInstructions);
268 if ( (stubInstr1 & 0x9F00001F) != 0x90000010 ) {
269 _diagnostics.warning("first instruction of stub (0x%08X) is not ADRP for stub at addr 0x%0llX in %s",
270 stubInstr1, (uint64_t)stubVMAddr, _installName);
271 return 0;
272 }
273 int32_t adrpValue = ((stubInstr1 & 0x00FFFFE0) >> 3) | ((stubInstr1 & 0x60000000) >> 29);
274 if ( stubInstr1 & 0x00800000 )
275 adrpValue |= 0xFFF00000;
276 uint32_t stubInstr2 = E::get32(*(uint32_t*)(stubInstructions + 4));
277 if ( (stubInstr2 & 0xFFC003FF) != 0xB9400210 ) {
278 _diagnostics.warning("second instruction of stub (0x%08X) is not LDR for stub at addr 0x%0llX in %s",
279 stubInstr2, (uint64_t)stubVMAddr, _installName);
280 return 0;
281 }
282 uint32_t ldrValue = ((stubInstr2 >> 10) & 0x00000FFF);
283 return (stubVMAddr & (-4096)) + adrpValue*4096 + ldrValue*4; // LDR Wn has a scale factor of 4
284
285 }
286 #endif
287
288
289 #if SUPPORT_ARCH_arm64e
290 template <typename P>
291 uint64_t StubOptimizer<P>::lazyPointerAddrFromArm64eStub(const uint8_t* stubInstructions, uint64_t stubVMAddr)
292 {
293 uint32_t stubInstr1 = E::get32(*(uint32_t*)stubInstructions);
294 // ADRP X17, dyld_mageLoaderCache@page
295 if ( (stubInstr1 & 0x9F00001F) != 0x90000011 ) {
296 _diagnostics.warning("first instruction of stub (0x%08X) is not ADRP for stub at addr 0x%0llX in %s",
297 stubInstr1, (uint64_t)stubVMAddr, _installName);
298 return 0;
299 }
300 int32_t adrpValue = ((stubInstr1 & 0x00FFFFE0) >> 3) | ((stubInstr1 & 0x60000000) >> 29);
301 if ( stubInstr1 & 0x00800000 )
302 adrpValue |= 0xFFF00000;
303
304 // ADD X17, X17, dyld_mageLoaderCache@pageoff
305 uint32_t stubInstr2 = E::get32(*(uint32_t*)(stubInstructions + 4));
306 if ( (stubInstr2 & 0xFFC003FF) != 0x91000231 ) {
307 _diagnostics.warning("second instruction of stub (0x%08X) is not ADD for stub at addr 0x%0llX in %s",
308 stubInstr2, (uint64_t)stubVMAddr, _installName);
309 return 0;
310 }
311 uint32_t addValue = ((stubInstr2 & 0x003FFC00) >> 10);
312
313 // LDR X16, [X17]
314 uint32_t stubInstr3 = E::get32(*(uint32_t*)(stubInstructions + 8));
315 if ( stubInstr3 != 0xF9400230 ) {
316 _diagnostics.warning("second instruction of stub (0x%08X) is not LDR for stub at addr 0x%0llX in %s",
317 stubInstr2, (uint64_t)stubVMAddr, _installName);
318 return 0;
319 }
320 return (stubVMAddr & (-4096)) + adrpValue*4096 + addValue;
321 }
322 #endif
323
324
325 template <typename P>
326 void StubOptimizer<P>::buildStubMap(const std::unordered_set<std::string>& neverStubEliminate)
327 {
328 // find all stubs and lazy pointers
329 const macho_nlist<P>* symbolTable = (const macho_nlist<P>*)(&_linkeditBias[_symTabCmd->symoff()]);
330 const char* symbolStrings = (char*)(&_linkeditBias[_symTabCmd->stroff()]);
331 const uint32_t* const indirectTable = (uint32_t*)(&_linkeditBias[_dynSymTabCmd->indirectsymoff()]);
332 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)_mh + sizeof(macho_header<P>));
333 const uint32_t cmd_count = _mh->ncmds();
334 const macho_load_command<P>* cmd = cmds;
335 for (uint32_t i = 0; i < cmd_count; ++i) {
336 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
337 macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
338 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
339 macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
340 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
341 if ( sect->size() == 0 )
342 continue;
343 unsigned sectionType = (sect->flags() & SECTION_TYPE);
344 const uint32_t indirectTableOffset = sect->reserved1();
345 if ( sectionType == S_SYMBOL_STUBS ) {
346 const uint32_t stubSize = sect->reserved2();
347 _stubCount = (uint32_t)(sect->size() / stubSize);
348 pint_t stubVMAddr = (pint_t)sect->addr();
349 for (uint32_t j=0; j < _stubCount; ++j, stubVMAddr += stubSize) {
350 uint32_t symbolIndex = E::get32(indirectTable[indirectTableOffset + j]);
351 switch ( symbolIndex ) {
352 case INDIRECT_SYMBOL_ABS:
353 case INDIRECT_SYMBOL_LOCAL:
354 case INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL:
355 break;
356 default:
357 if ( symbolIndex >= _symTabCmd->nsyms() ) {
358 _diagnostics.warning("symbol index out of range (%d of %d) for stub at addr 0x%0llX in %s",
359 symbolIndex, _symTabCmd->nsyms(), (uint64_t)stubVMAddr, _installName);
360 continue;
361 }
362 const macho_nlist<P>* sym = &symbolTable[symbolIndex];
363 uint32_t stringOffset = sym->n_strx();
364 if ( stringOffset > _symTabCmd->strsize() ) {
365 _diagnostics.warning("symbol string offset out of range (%u of %u) for stub at addr 0x%0llX in %s",
366 stringOffset, sym->n_strx(), (uint64_t)stubVMAddr, _installName);
367 continue;
368 }
369 const char* symName = &symbolStrings[stringOffset];
370 if ( neverStubEliminate.count(symName) ) {
371 //fprintf(stderr, "stubVMAddr=0x%llX, not bypassing stub to %s in %s because target is interposable\n", (uint64_t)stubVMAddr, symName, _installName);
372 _stubsLeftInterposable++;
373 continue;
374 }
375 const uint8_t* stubInstrs = (uint8_t*)(long)stubVMAddr + _cacheSlide;
376 pint_t targetLPAddr = 0;
377 switch ( _mh->cputype() ) {
378 case CPU_TYPE_ARM64:
379 case CPU_TYPE_ARM64_32:
380 #if SUPPORT_ARCH_arm64e
381 if (_mh->cpusubtype() == CPU_SUBTYPE_ARM64E)
382 targetLPAddr = (pint_t)lazyPointerAddrFromArm64eStub(stubInstrs, stubVMAddr);
383 else
384 #endif
385 #if SUPPORT_ARCH_arm64_32
386 if (_mh->cputype() == CPU_TYPE_ARM64_32)
387 targetLPAddr = (pint_t)lazyPointerAddrFromArm64_32Stub(stubInstrs, stubVMAddr);
388 else
389 #endif
390 targetLPAddr = (pint_t)lazyPointerAddrFromArm64Stub(stubInstrs, stubVMAddr);
391 break;
392 case CPU_TYPE_ARM:
393 targetLPAddr = (pint_t)lazyPointerAddrFromArmStub(stubInstrs, (uint32_t)stubVMAddr);
394 break;
395 }
396 if ( targetLPAddr != 0 )
397 _stubAddrToLPAddr[stubVMAddr] = targetLPAddr;
398 break;
399 }
400 }
401 }
402 else if ( (sectionType == S_LAZY_SYMBOL_POINTERS) || (sectionType == S_NON_LAZY_SYMBOL_POINTERS) ) {
403 pint_t lpVMAddr;
404 pint_t* lpContent = (pint_t*)(sect->addr() + _cacheSlide);
405 uint32_t elementCount = (uint32_t)(sect->size() / sizeof(pint_t));
406 uint64_t textSegStartAddr = _segCmds[0]->vmaddr();
407 uint64_t textSegEndAddr = _segCmds[0]->vmaddr() + _segCmds[0]->vmsize();
408 pint_t lpValue;
409 for (uint32_t j=0; j < elementCount; ++j) {
410 uint32_t symbolIndex = E::get32(indirectTable[indirectTableOffset + j]);
411 switch ( symbolIndex ) {
412 case INDIRECT_SYMBOL_ABS:
413 case INDIRECT_SYMBOL_LOCAL:
414 case INDIRECT_SYMBOL_LOCAL|INDIRECT_SYMBOL_ABS:
415 break;
416 default:
417 lpValue = (pint_t)P::getP(lpContent[j]);
418
419 // Fixup threaded rebase/bind
420 if ( _chainedFixups ) {
421 dyld3::MachOLoaded::ChainedFixupPointerOnDisk ptr;
422 ptr.raw64 = lpValue;
423 assert(ptr.arm64e.authRebase.bind == 0);
424 if ( ptr.arm64e.authRebase.auth ) {
425 lpValue = (pint_t)(_cacheUnslideAddr + ptr.arm64e.authRebase.target);
426 }
427 else {
428 lpValue = (pint_t)ptr.arm64e.unpackTarget();
429 }
430 }
431
432 lpVMAddr = (pint_t)sect->addr() + j * sizeof(pint_t);
433 if ( symbolIndex >= _symTabCmd->nsyms() ) {
434 _diagnostics.warning("symbol index out of range (%d of %d) for lazy pointer at addr 0x%0llX in %s",
435 symbolIndex, _symTabCmd->nsyms(), (uint64_t)lpVMAddr, _installName);
436 continue;
437 }
438 const macho_nlist<P>* sym = &symbolTable[symbolIndex];
439 uint32_t stringOffset = sym->n_strx();
440 if ( stringOffset > _symTabCmd->strsize() ) {
441 _diagnostics.warning("symbol string offset out of range (%u of %u) for lazy pointer at addr 0x%0llX in %s",
442 stringOffset, sym->n_strx(), (uint64_t)lpVMAddr, _installName);
443 continue;
444 }
445 const char* symName = &symbolStrings[stringOffset];
446 if ( (lpValue > textSegStartAddr) && (lpValue< textSegEndAddr) ) {
447 //fprintf(stderr, "skipping lazy pointer at 0x%0lX to %s in %s because target is within dylib\n", (long)lpVMAddr, symName, _installName);
448 }
449 else if ( (sizeof(pint_t) == 8) && ((lpValue % 4) != 0) ) {
450 _diagnostics.warning("lazy pointer at 0x%0llX does not point to 4-byte aligned address(0x%0llX) in %s",
451 (uint64_t)lpVMAddr, (uint64_t)lpValue, _installName);
452 }
453 else {
454 _lpAddrToTargetAddr[lpVMAddr] = lpValue;
455 _targetAddrToName[lpValue] = symName;
456 }
457 break;
458 }
459 }
460 }
461 }
462 }
463 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
464 }
465 }
466
467
468 template <typename P>
469 void StubOptimizer<P>::forEachCallSiteToAStub(CallSiteHandler handler)
470 {
471 if (_diagnostics.hasError())
472 return;
473 const uint8_t* infoStart = &_linkeditBias[_splitSegInfoCmd->dataoff()];
474 const uint8_t* infoEnd = &infoStart[_splitSegInfoCmd->datasize()];
475 if ( *infoStart++ != DYLD_CACHE_ADJ_V2_FORMAT ) {
476 _diagnostics.error("malformed split seg info in %s", _installName);
477 return;
478 }
479
480 uint8_t* textSectionContent = (uint8_t*)(_textSection->addr() + _cacheSlide);
481
482 // Whole :== <count> FromToSection+
483 // FromToSection :== <from-sect-index> <to-sect-index> <count> ToOffset+
484 // ToOffset :== <to-sect-offset-delta> <count> FromOffset+
485 // FromOffset :== <kind> <count> <from-sect-offset-delta>
486 const uint8_t* p = infoStart;
487 uint64_t sectionCount = read_uleb128(p, infoEnd);
488 for (uint64_t i=0; i < sectionCount; ++i) {
489 uint64_t fromSectionIndex = read_uleb128(p, infoEnd);
490 uint64_t toSectionIndex = read_uleb128(p, infoEnd);
491 uint64_t toOffsetCount = read_uleb128(p, infoEnd);
492 uint64_t toSectionOffset = 0;
493 for (uint64_t j=0; j < toOffsetCount; ++j) {
494 uint64_t toSectionDelta = read_uleb128(p, infoEnd);
495 uint64_t fromOffsetCount = read_uleb128(p, infoEnd);
496 toSectionOffset += toSectionDelta;
497 for (uint64_t k=0; k < fromOffsetCount; ++k) {
498 uint64_t kind = read_uleb128(p, infoEnd);
499 if ( kind > 13 ) {
500 _diagnostics.error("bad kind (%llu) value in %s\n", kind, _installName);
501 }
502 uint64_t fromSectDeltaCount = read_uleb128(p, infoEnd);
503 uint64_t fromSectionOffset = 0;
504 for (uint64_t l=0; l < fromSectDeltaCount; ++l) {
505 uint64_t delta = read_uleb128(p, infoEnd);
506 fromSectionOffset += delta;
507 if ( (fromSectionIndex == _textSectionIndex) && (toSectionIndex == _stubSectionIndex) ) {
508 uint32_t* instrPtr = (uint32_t*)(textSectionContent + fromSectionOffset);
509 uint64_t instrAddr = _textSection->addr() + fromSectionOffset;
510 uint64_t stubAddr = _stubSection->addr() + toSectionOffset;
511 uint32_t instruction = E::get32(*instrPtr);
512 _branchToStubCount++;
513 if ( handler(kind, instrAddr, stubAddr, instruction) ) {
514 E::set32(*instrPtr, instruction);
515 }
516 }
517 }
518 }
519 }
520 }
521 }
522
523
524 /// Extract displacement from a thumb b/bl/blx instruction.
525 template <typename P>
526 int32_t StubOptimizer<P>::getDisplacementFromThumbBranch(uint32_t instruction, uint32_t instrAddr)
527 {
528 bool is_blx = ((instruction & 0xD000F800) == 0xC000F000);
529 uint32_t s = (instruction >> 10) & 0x1;
530 uint32_t j1 = (instruction >> 29) & 0x1;
531 uint32_t j2 = (instruction >> 27) & 0x1;
532 uint32_t imm10 = instruction & 0x3FF;
533 uint32_t imm11 = (instruction >> 16) & 0x7FF;
534 uint32_t i1 = (j1 == s);
535 uint32_t i2 = (j2 == s);
536 uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
537 int32_t sdis = dis;
538 int32_t result = s ? (sdis | 0xFE000000) : sdis;
539 if ( is_blx && (instrAddr & 0x2) ) {
540 // The thumb blx instruction always has low bit of imm11 as zero. The way
541 // a 2-byte aligned blx can branch to a 4-byte aligned ARM target is that
542 // the blx instruction always 4-byte aligns the pc before adding the
543 // displacement from the blx. We must emulate that when decoding this.
544 result -= 2;
545 }
546 return result;
547 }
548
549 /// Update a thumb b/bl/blx instruction, switching bl <-> blx as needed.
550 template <typename P>
551 uint32_t StubOptimizer<P>::setDisplacementInThumbBranch(uint32_t instruction, uint32_t instrAddr,
552 int32_t displacement, bool targetIsThumb) {
553 if ( (displacement > 16777214) || (displacement < (-16777216)) ) {
554 _diagnostics.error("thumb branch out of range at 0x%0X in %s", instrAddr, _installName);
555 return 0;
556 }
557 bool is_bl = ((instruction & 0xD000F800) == 0xD000F000);
558 bool is_blx = ((instruction & 0xD000F800) == 0xC000F000);
559 bool is_b = ((instruction & 0xD000F800) == 0x9000F000);
560 uint32_t newInstruction = (instruction & 0xD000F800);
561 if (is_bl || is_blx) {
562 if (targetIsThumb) {
563 newInstruction = 0xD000F000; // Use bl
564 }
565 else {
566 newInstruction = 0xC000F000; // Use blx
567 // See note in getDisplacementFromThumbBranch() about blx.
568 if (instrAddr & 0x2)
569 displacement += 2;
570 }
571 }
572 else if (is_b) {
573 if ( !targetIsThumb ) {
574 _diagnostics.error("no pc-rel thumb branch instruction that switches to arm mode at 0x%0X in %s", instrAddr, _installName);
575 return 0;
576 }
577 }
578 else {
579 _diagnostics.error("not b/bl/blx at 0x%0X in %s", instrAddr, _installName);
580 return 0;
581 }
582 uint32_t s = (uint32_t)(displacement >> 24) & 0x1;
583 uint32_t i1 = (uint32_t)(displacement >> 23) & 0x1;
584 uint32_t i2 = (uint32_t)(displacement >> 22) & 0x1;
585 uint32_t imm10 = (uint32_t)(displacement >> 12) & 0x3FF;
586 uint32_t imm11 = (uint32_t)(displacement >> 1) & 0x7FF;
587 uint32_t j1 = (i1 == s);
588 uint32_t j2 = (i2 == s);
589 uint32_t nextDisp = (j1 << 13) | (j2 << 11) | imm11;
590 uint32_t firstDisp = (s << 10) | imm10;
591 newInstruction |= (nextDisp << 16) | firstDisp;
592 return newInstruction;
593 }
594
595
596 template <typename P>
597 void StubOptimizer<P>::optimizeArmCallSites(std::unordered_map<uint64_t, uint64_t>& targetAddrToOptStubAddr)
598 {
599 forEachCallSiteToAStub([&](uint8_t kind, uint64_t callSiteAddr, uint64_t stubAddr, uint32_t& instruction) -> bool {
600 if ( kind == DYLD_CACHE_ADJ_V2_THUMB_BR22 ) {
601 bool is_bl = ((instruction & 0xD000F800) == 0xD000F000);
602 bool is_blx = ((instruction & 0xD000F800) == 0xC000F000);
603 bool is_b = ((instruction & 0xD000F800) == 0x9000F000);
604 if ( !is_bl && !is_blx && !is_b ){
605 _diagnostics.warning("non-branch instruction at 0x%0llX in %s", callSiteAddr, _installName);
606 return false;
607 }
608 int32_t brDelta = getDisplacementFromThumbBranch(instruction, (uint32_t)callSiteAddr);
609 pint_t targetAddr = (pint_t)callSiteAddr + 4 + brDelta;
610 if ( targetAddr != stubAddr ) {
611 _diagnostics.warning("stub target mismatch at callsite 0x%0llX in %s", callSiteAddr, _installName);
612 return false;
613 }
614 // ignore branch if not to a known stub
615 const auto& pos = _stubAddrToLPAddr.find(targetAddr);
616 if ( pos == _stubAddrToLPAddr.end() )
617 return false;
618
619 // ignore branch if lazy pointer is not known (resolver or interposable)
620 uint64_t lpAddr = pos->second;
621 const auto& pos2 = _lpAddrToTargetAddr.find((pint_t)lpAddr);
622 if ( pos2 == _lpAddrToTargetAddr.end() )
623 return false;
624
625 uint64_t finalTargetAddr = pos2->second;
626 int64_t deltaToFinalTarget = finalTargetAddr - (callSiteAddr + 4);
627 // if final target within range, change to branch there directly
628 if ( (deltaToFinalTarget > -b16MegLimit) && (deltaToFinalTarget < b16MegLimit) ) {
629 bool targetIsThumb = (finalTargetAddr & 1);
630 instruction = setDisplacementInThumbBranch(instruction, (uint32_t)callSiteAddr, (int32_t)deltaToFinalTarget, targetIsThumb);
631 if (_diagnostics.hasError())
632 return false;
633 _branchOptimizedToDirectCount++;
634 return true;
635 }
636
637 // try to re-use an existing optimized stub
638 const auto& pos3 = targetAddrToOptStubAddr.find(finalTargetAddr);
639 if ( pos3 != targetAddrToOptStubAddr.end() ) {
640 uint64_t existingStub = pos3->second;
641 if ( existingStub != stubAddr ) {
642 int64_t deltaToOptStub = existingStub - (callSiteAddr + 4);
643 if ( (deltaToOptStub > -b16MegLimit) && (deltaToOptStub < b16MegLimit) ) {
644 bool targetIsThumb = (existingStub & 1);
645 instruction = setDisplacementInThumbBranch(instruction, (uint32_t)callSiteAddr, (int32_t)deltaToOptStub, targetIsThumb);
646 if (_diagnostics.hasError())
647 return false;
648 _branchToReUsedOptimizedStubCount++;
649 return true;
650 }
651 }
652 }
653
654 // leave as BL to stub, but optimize the stub
655 _stubsToOptimize.insert(stubAddr);
656 targetAddrToOptStubAddr[finalTargetAddr] = stubAddr;
657 _branchToOptimizedStubCount++;
658 return false;
659 }
660 else if ( kind == DYLD_CACHE_ADJ_V2_ARM_BR24 ) {
661 // too few of these to be worth trying to optimize
662 }
663
664 return false;
665 });
666 if (_diagnostics.hasError())
667 return;
668 }
669
670
671 template <typename P>
672 void StubOptimizer<P>::optimizeArmStubs()
673 {
674 for (const auto& stubEntry : _stubAddrToLPAddr) {
675 pint_t stubVMAddr = stubEntry.first;
676 pint_t lpVMAddr = stubEntry.second;
677 const auto& pos = _lpAddrToTargetAddr.find(lpVMAddr);
678 if ( pos == _lpAddrToTargetAddr.end() )
679 return;
680 pint_t targetVMAddr = pos->second;
681
682 int32_t delta = (int32_t)(targetVMAddr - (stubVMAddr + 12));
683 uint32_t* stubInstructions = (uint32_t*)((uint8_t*)(long)stubVMAddr + _cacheSlide);
684 assert(stubInstructions[0] == 0xe59fc004);
685 stubInstructions[0] = 0xe59fc000; // ldr ip, L0
686 stubInstructions[1] = 0xe08ff00c; // add pc, pc, ip
687 stubInstructions[2] = delta; // L0: .long xxxx
688 stubInstructions[3] = 0xe7ffdefe; // trap
689 _stubOptimizedCount++;
690 }
691 }
692
693
694
695 template <typename P>
696 void StubOptimizer<P>::optimizeArm64Stubs()
697 {
698 for (const uint64_t stubVMAddr : _stubsToOptimize ) {
699 pint_t lpVMAddr = _stubAddrToLPAddr[(pint_t)stubVMAddr];
700 const auto& pos = _lpAddrToTargetAddr.find(lpVMAddr);
701 if ( pos == _lpAddrToTargetAddr.end() )
702 return;
703 pint_t targetVMAddr = pos->second;
704
705 int64_t adrpDelta = (targetVMAddr & -4096) - (stubVMAddr & -4096);
706 // Note: ADRP/ADD can only span +/-4GB
707 uint32_t* stubInstructions = (uint32_t*)((uint8_t*)(long)stubVMAddr + _cacheSlide);
708 bool rightInstr1 = ((stubInstructions[0] & 0x9F00001F) == 0x90000010); // ADRP X16, lp@page
709 bool rightInstr2 = ((stubInstructions[1] & 0xFFC003FF) == 0xF9400210); // LDR X16, [X16, lp@pageoff]
710 bool rightInstr3 = (stubInstructions[2] == 0xD61F0200); // BR X16
711
712 if ( rightInstr1 && rightInstr2 && rightInstr3 ) {
713 uint32_t immhi = (adrpDelta >> 9) & (0x00FFFFE0);
714 uint32_t immlo = (adrpDelta << 17) & (0x60000000);
715 uint32_t newADRP = (0x90000010) | immlo | immhi;
716 uint32_t off12 = (targetVMAddr & 0xFFF);
717 uint32_t newADD = (0x91000210) | (off12 << 10);
718
719 stubInstructions[0] = newADRP; // ADRP X16, target@page
720 stubInstructions[1] = newADD; // ADD X16, X16, target@pageoff
721 stubInstructions[2] = 0xD61F0200; // BR X16
722 _stubOptimizedCount++;
723 }
724 }
725 }
726
727 #if SUPPORT_ARCH_arm64e
728 template <typename P>
729 void StubOptimizer<P>::optimizeArm64eStubs()
730 {
731 for (const uint64_t stubVMAddr : _stubsToOptimize ) {
732 pint_t lpVMAddr = _stubAddrToLPAddr[(pint_t)stubVMAddr];
733 const auto& pos = _lpAddrToTargetAddr.find(lpVMAddr);
734 if ( pos == _lpAddrToTargetAddr.end() )
735 return;
736 pint_t targetVMAddr = pos->second;
737
738 int64_t adrpDelta = (targetVMAddr & -4096) - (stubVMAddr & -4096);
739 // Note: ADRP/ADD can only span +/-4GB
740 uint32_t* stubInstructions = (uint32_t*)((uint8_t*)(long)stubVMAddr + _cacheSlide);
741 bool rightInstr1 = ((stubInstructions[0] & 0x9F00001F) == 0x90000011); // ADRP X17, lp@page
742 bool rightInstr2 = ((stubInstructions[1] & 0xFFC003FF) == 0x91000231); // ADD X17, [X17, lp@pageoff]
743 bool rightInstr3 = (stubInstructions[2] == 0xF9400230); // LDR X16, [X17]
744 bool rightInstr4 = (stubInstructions[3] == 0xD71F0A11); // BRAA X16, X17
745
746 if ( rightInstr1 && rightInstr2 && rightInstr3 && rightInstr4) {
747 uint32_t immhi = (adrpDelta >> 9) & (0x00FFFFE0);
748 uint32_t immlo = (adrpDelta << 17) & (0x60000000);
749 uint32_t newADRP = (0x90000010) | immlo | immhi;
750 uint32_t off12 = (targetVMAddr & 0xFFF);
751 uint32_t newADD = (0x91000210) | (off12 << 10);
752
753 stubInstructions[0] = newADRP; // ADRP X16, target@page
754 stubInstructions[1] = newADD; // ADD X16, X16, target@pageoff
755 stubInstructions[2] = 0xD61F0200; // BR X16
756 stubInstructions[3] = 0xD4200020; // TRAP
757 _stubOptimizedCount++;
758 }
759 }
760 }
761 #endif
762
763 #if SUPPORT_ARCH_arm64_32
764 template <typename P>
765 void StubOptimizer<P>::optimizeArm64_32Stubs()
766 {
767 for (const uint64_t stubVMAddr : _stubsToOptimize ) {
768 pint_t lpVMAddr = _stubAddrToLPAddr[(pint_t)stubVMAddr];
769 const auto& pos = _lpAddrToTargetAddr.find(lpVMAddr);
770 if ( pos == _lpAddrToTargetAddr.end() )
771 return;
772 pint_t targetVMAddr = pos->second;
773
774 int64_t adrpDelta = (targetVMAddr & -4096) - (stubVMAddr & -4096);
775 uint32_t* stubInstructions = (uint32_t*)((uint8_t*)(long)stubVMAddr + _cacheSlide);
776 bool rightInstr1 = ((stubInstructions[0] & 0x9F00001F) == 0x90000010); // ADRP X16, lp@page
777 bool rightInstr2 = ((stubInstructions[1] & 0xFFC003FF) == 0xB9400210); // LDR W16, [X16, lp@pageoff]
778 bool rightInstr3 = (stubInstructions[2] == 0xD61F0200); // BR X16
779
780 if ( rightInstr1 && rightInstr2 && rightInstr3 ) {
781 uint32_t immhi = (adrpDelta >> 9) & (0x00FFFFE0);
782 uint32_t immlo = (adrpDelta << 17) & (0x60000000);
783 uint32_t newADRP = (0x90000010) | immlo | immhi;
784 uint32_t off12 = (targetVMAddr & 0xFFF);
785 uint32_t newADD = (0x91000210) | (off12 << 10);
786
787 stubInstructions[0] = newADRP; // ADRP X16, target@page
788 stubInstructions[1] = newADD; // ADD X16, X16, target@pageoff
789 stubInstructions[2] = 0xD61F0200; // BR X16
790 _stubOptimizedCount++;
791 }
792 }
793 }
794 #endif
795
796
797 template <typename P>
798 void StubOptimizer<P>::optimizeArm64CallSites(std::unordered_map<uint64_t, uint64_t>& targetAddrToOptStubAddr)
799 {
800 forEachCallSiteToAStub([&](uint8_t kind, uint64_t callSiteAddr, uint64_t stubAddr, uint32_t& instruction) -> bool {
801 if ( kind != DYLD_CACHE_ADJ_V2_ARM64_BR26 )
802 return false;
803 // skip all but BL or B
804 if ( (instruction & 0x7C000000) != 0x14000000 )
805 return false;
806 // compute target of branch instruction
807 int32_t brDelta = (instruction & 0x03FFFFFF) << 2;
808 if ( brDelta & 0x08000000 )
809 brDelta |= 0xF0000000;
810 uint64_t targetAddr = callSiteAddr + (int64_t)brDelta;
811 if ( targetAddr != stubAddr ) {
812 _diagnostics.warning("stub target mismatch");
813 return false;
814 }
815 // ignore branch if not to a known stub
816 const auto& pos = _stubAddrToLPAddr.find((pint_t)targetAddr);
817 if ( pos == _stubAddrToLPAddr.end() )
818 return false;
819
820 // ignore branch if lazy pointer is not known (resolver or interposable)
821 uint64_t lpAddr = pos->second;
822 const auto& pos2 = _lpAddrToTargetAddr.find((pint_t)lpAddr);
823 if ( pos2 == _lpAddrToTargetAddr.end() )
824 return false;
825
826 uint64_t finalTargetAddr = pos2->second;
827 int64_t deltaToFinalTarget = finalTargetAddr - callSiteAddr;
828 // if final target within range, change to branch there directly
829 if ( (deltaToFinalTarget > -b128MegLimit) && (deltaToFinalTarget < b128MegLimit) ) {
830 instruction= (instruction & 0xFC000000) | ((deltaToFinalTarget >> 2) & 0x03FFFFFF);
831 _branchOptimizedToDirectCount++;
832 return true;
833 }
834
835 // try to re-use an existing optimized stub
836 const auto& pos3 = targetAddrToOptStubAddr.find((pint_t)finalTargetAddr);
837 if ( pos3 != targetAddrToOptStubAddr.end() ) {
838 uint64_t existingStub = pos3->second;
839 if ( existingStub != stubAddr ) {
840 int64_t deltaToOptStub = existingStub - callSiteAddr;
841 if ( (deltaToOptStub > -b128MegLimit) && (deltaToOptStub < b128MegLimit) ) {
842 instruction = (instruction & 0xFC000000) | ((deltaToOptStub >> 2) & 0x03FFFFFF);
843 _branchToReUsedOptimizedStubCount++;
844 return true;
845 }
846 }
847 }
848
849 // leave as BL to stub, but optimize the stub
850 _stubsToOptimize.insert(stubAddr);
851 targetAddrToOptStubAddr[(pint_t)finalTargetAddr] = (pint_t)stubAddr;
852 _branchToOptimizedStubCount++;
853 return false;
854 });
855 if (_diagnostics.hasError())
856 return;
857 }
858
859
860 template <typename P>
861 void StubOptimizer<P>::optimizeCallSites(std::unordered_map<uint64_t, uint64_t>& targetAddrToOptStubAddr)
862 {
863 if ( _textSection == NULL )
864 return;
865 if ( _stubSection == NULL )
866 return;
867
868
869 switch ( _mh->cputype() ) {
870 case CPU_TYPE_ARM64:
871 optimizeArm64CallSites(targetAddrToOptStubAddr);
872 #if SUPPORT_ARCH_arm64e
873 if (_mh->cpusubtype() == CPU_SUBTYPE_ARM64E)
874 optimizeArm64eStubs();
875 else
876 #endif
877 optimizeArm64Stubs();
878 break;
879 #if SUPPORT_ARCH_arm64_32
880 case CPU_TYPE_ARM64_32:
881 optimizeArm64CallSites(targetAddrToOptStubAddr);
882 optimizeArm64_32Stubs();
883 break;
884 #endif
885 case CPU_TYPE_ARM:
886 optimizeArmCallSites(targetAddrToOptStubAddr);
887 optimizeArmStubs();
888 break;
889 }
890 if ( verbose ) {
891 _diagnostics.verbose("dylib has %6u BLs to %4u stubs. Changed %5u, %5u, %5u BLs to use direct branch, optimized stub, neighbor's optimized stub. "
892 "%5u stubs left interposable, %4u stubs optimized. path=%s\n",
893 _branchToStubCount, _stubCount, _branchOptimizedToDirectCount, _branchToOptimizedStubCount, _branchToReUsedOptimizedStubCount,
894 _stubsLeftInterposable, _stubOptimizedCount, _installName);
895 }
896
897 }
898
899 template <typename P>
900 void bypassStubs(DyldSharedCache* cache, const std::string& archName, std::unordered_map<uint64_t, uint64_t>& targetAddrToOptStubAddr,
901 const char* const neverStubEliminateDylibs[], const char* const neverStubEliminateSymbols[],
902 Diagnostics& diags)
903 {
904 diags.verbose("Stub elimination optimization:\n");
905
906 // construct a StubOptimizer for each image
907 __block std::vector<StubOptimizer<P>*> optimizers;
908 cache->forEachImage(^(const mach_header* mh, const char* installName) {
909 optimizers.push_back(new StubOptimizer<P>(cache, (macho_header<P>*)mh, diags));
910 });
911
912 // build set of functions to never stub-eliminate because tools may need to override them
913 std::unordered_set<std::string> neverStubEliminate;
914 for (const char* const* p=neverStubEliminateSymbols; *p != nullptr; ++p) {
915 neverStubEliminate.insert(*p);
916 }
917 for (const char* const* d=neverStubEliminateDylibs; *d != nullptr; ++d) {
918 for (StubOptimizer<P>* op : optimizers) {
919 if ( strcmp(op->installName(), *d) == 0 ) {
920 // add all exports
921 const uint8_t* exportsStart = op->exportsTrie();
922 const uint8_t* exportsEnd = exportsStart + op->exportsTrieSize();
923 std::vector<ExportInfoTrie::Entry> exports;
924 if ( !ExportInfoTrie::parseTrie(exportsStart, exportsEnd, exports) ) {
925 diags.error("malformed exports trie in %s", *d);
926 return;
927 }
928 for(const ExportInfoTrie::Entry& entry : exports) {
929 neverStubEliminate.insert(entry.name);
930 }
931 }
932 }
933 }
934
935 // build maps of stubs-to-lp and lp-to-target
936 for (StubOptimizer<P>* op : optimizers)
937 op->buildStubMap(neverStubEliminate);
938
939 // optimize call sites to by-pass stubs or jump through island
940 for (StubOptimizer<P>* op : optimizers)
941 op->optimizeCallSites(targetAddrToOptStubAddr);
942
943 // write total optimization info
944 uint32_t callSiteCount = 0;
945 uint32_t callSiteDirectOptCount = 0;
946 for (StubOptimizer<P>* op : optimizers) {
947 callSiteCount += op->_branchToStubCount;
948 callSiteDirectOptCount += op->_branchOptimizedToDirectCount;
949 }
950 diags.verbose(" cache contains %u call sites of which %u were direct bound\n", callSiteCount, callSiteDirectOptCount);
951
952 // clean up
953 for (StubOptimizer<P>* op : optimizers)
954 delete op;
955 }
956
957 void CacheBuilder::optimizeAwayStubs()
958 {
959 std::unordered_map<uint64_t, uint64_t> targetAddrToOptStubAddr;
960
961 DyldSharedCache* dyldCache = (DyldSharedCache*)_readExecuteRegion.buffer;
962 std::string archName = dyldCache->archName();
963 #if SUPPORT_ARCH_arm64_32
964 if ( startsWith(archName, "arm64_32") )
965 bypassStubs<Pointer32<LittleEndian> >(dyldCache, archName, targetAddrToOptStubAddr, _s_neverStubEliminateDylibs, _s_neverStubEliminateSymbols, _diagnostics);
966 else
967 #endif
968 if ( startsWith(archName, "arm64") )
969 bypassStubs<Pointer64<LittleEndian> >(dyldCache, archName, targetAddrToOptStubAddr, _s_neverStubEliminateDylibs, _s_neverStubEliminateSymbols, _diagnostics);
970 else if ( archName == "armv7k" )
971 bypassStubs<Pointer32<LittleEndian>>(dyldCache, archName, targetAddrToOptStubAddr, _s_neverStubEliminateDylibs, _s_neverStubEliminateSymbols, _diagnostics);
972 // no stub optimization done for other arches
973 }