dyld-625.13.tar.gz
[apple/dyld.git] / dyld3 / shared-cache / OptimizerBranches.cpp
1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*-
2 *
3 * Copyright (c) 2015 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <sys/mman.h>
29 #include <limits.h>
30 #include <stdarg.h>
31 #include <stdio.h>
32 #include <unistd.h>
33 #include <CommonCrypto/CommonDigest.h>
34
35 #include <string>
36 #include <unordered_map>
37 #include <unordered_set>
38
39 #include "StringUtils.h"
40 #include "Trie.hpp"
41 #include "MachOFileAbstraction.hpp"
42 #include "MachOAnalyzer.h"
43 #include "Diagnostics.h"
44 #include "DyldSharedCache.h"
45 #include "CacheBuilder.h"
46
47 static const bool verbose = false;
48
49 // These are functions that are interposed by Instruments.app or ASan
50 static const char* sNeverStubEliminateSymbols[] = {
51 "___bzero",
52 "___cxa_atexit",
53 "___cxa_throw",
54 "__longjmp",
55 "__objc_autoreleasePoolPop",
56 "_accept",
57 "_access",
58 "_asctime",
59 "_asctime_r",
60 "_asprintf",
61 "_atoi",
62 "_atol",
63 "_atoll",
64 "_calloc",
65 "_chmod",
66 "_chown",
67 "_close",
68 "_confstr",
69 "_ctime",
70 "_ctime_r",
71 "_dispatch_after",
72 "_dispatch_after_f",
73 "_dispatch_async",
74 "_dispatch_async_f",
75 "_dispatch_barrier_async_f",
76 "_dispatch_group_async",
77 "_dispatch_group_async_f",
78 "_dispatch_source_set_cancel_handler",
79 "_dispatch_source_set_event_handler",
80 "_dispatch_sync_f",
81 "_dlclose",
82 "_dlopen",
83 "_dup",
84 "_dup2",
85 "_endgrent",
86 "_endpwent",
87 "_ether_aton",
88 "_ether_hostton",
89 "_ether_line",
90 "_ether_ntoa",
91 "_ether_ntohost",
92 "_fchmod",
93 "_fchown",
94 "_fclose",
95 "_fdopen",
96 "_fflush",
97 "_fopen",
98 "_fork",
99 "_fprintf",
100 "_free",
101 "_freopen",
102 "_frexp",
103 "_frexpf",
104 "_frexpl",
105 "_fscanf",
106 "_fstat",
107 "_fstatfs",
108 "_fstatfs64",
109 "_fsync",
110 "_ftime",
111 "_getaddrinfo",
112 "_getattrlist",
113 "_getcwd",
114 "_getgrent",
115 "_getgrgid",
116 "_getgrgid_r",
117 "_getgrnam",
118 "_getgrnam_r",
119 "_getgroups",
120 "_gethostbyaddr",
121 "_gethostbyname",
122 "_gethostbyname2",
123 "_gethostent",
124 "_getifaddrs",
125 "_getitimer",
126 "_getnameinfo",
127 "_getpass",
128 "_getpeername",
129 "_getpwent",
130 "_getpwnam",
131 "_getpwnam_r",
132 "_getpwuid",
133 "_getpwuid_r",
134 "_getsockname",
135 "_getsockopt",
136 "_gmtime",
137 "_gmtime_r",
138 "_if_indextoname",
139 "_if_nametoindex",
140 "_index",
141 "_inet_aton",
142 "_inet_ntop",
143 "_inet_pton",
144 "_initgroups",
145 "_ioctl",
146 "_lchown",
147 "_lgamma",
148 "_lgammaf",
149 "_lgammal",
150 "_link",
151 "_listxattr",
152 "_localtime",
153 "_localtime_r",
154 "_longjmp",
155 "_lseek",
156 "_lstat",
157 "_malloc",
158 "_malloc_create_zone",
159 "_malloc_default_purgeable_zone",
160 "_malloc_default_zone",
161 "_malloc_good_size",
162 "_malloc_make_nonpurgeable",
163 "_malloc_make_purgeable",
164 "_malloc_set_zone_name",
165 "_mbsnrtowcs",
166 "_mbsrtowcs",
167 "_mbstowcs",
168 "_memchr",
169 "_memcmp",
170 "_memcpy",
171 "_memmove",
172 "_memset",
173 "_mktime",
174 "_mlock",
175 "_mlockall",
176 "_modf",
177 "_modff",
178 "_modfl",
179 "_munlock",
180 "_munlockall",
181 "_objc_autoreleasePoolPop",
182 "_objc_setProperty",
183 "_objc_setProperty_atomic",
184 "_objc_setProperty_atomic_copy",
185 "_objc_setProperty_nonatomic",
186 "_objc_setProperty_nonatomic_copy",
187 "_objc_storeStrong",
188 "_open",
189 "_opendir",
190 "_poll",
191 "_posix_memalign",
192 "_pread",
193 "_printf",
194 "_pthread_attr_getdetachstate",
195 "_pthread_attr_getguardsize",
196 "_pthread_attr_getinheritsched",
197 "_pthread_attr_getschedparam",
198 "_pthread_attr_getschedpolicy",
199 "_pthread_attr_getscope",
200 "_pthread_attr_getstack",
201 "_pthread_attr_getstacksize",
202 "_pthread_condattr_getpshared",
203 "_pthread_create",
204 "_pthread_getschedparam",
205 "_pthread_join",
206 "_pthread_mutex_lock",
207 "_pthread_mutex_unlock",
208 "_pthread_mutexattr_getprioceiling",
209 "_pthread_mutexattr_getprotocol",
210 "_pthread_mutexattr_getpshared",
211 "_pthread_mutexattr_gettype",
212 "_pthread_rwlockattr_getpshared",
213 "_pwrite",
214 "_rand_r",
215 "_read",
216 "_readdir",
217 "_readdir_r",
218 "_readv",
219 "_readv$UNIX2003",
220 "_realloc",
221 "_realpath",
222 "_recv",
223 "_recvfrom",
224 "_recvmsg",
225 "_remquo",
226 "_remquof",
227 "_remquol",
228 "_scanf",
229 "_send",
230 "_sendmsg",
231 "_sendto",
232 "_setattrlist",
233 "_setgrent",
234 "_setitimer",
235 "_setlocale",
236 "_setpwent",
237 "_shm_open",
238 "_shm_unlink",
239 "_sigaction",
240 "_sigemptyset",
241 "_sigfillset",
242 "_siglongjmp",
243 "_signal",
244 "_sigpending",
245 "_sigprocmask",
246 "_sigwait",
247 "_snprintf",
248 "_sprintf",
249 "_sscanf",
250 "_stat",
251 "_statfs",
252 "_statfs64",
253 "_strcasecmp",
254 "_strcat",
255 "_strchr",
256 "_strcmp",
257 "_strcpy",
258 "_strdup",
259 "_strerror",
260 "_strerror_r",
261 "_strlen",
262 "_strncasecmp",
263 "_strncat",
264 "_strncmp",
265 "_strncpy",
266 "_strptime",
267 "_strtoimax",
268 "_strtol",
269 "_strtoll",
270 "_strtoumax",
271 "_tempnam",
272 "_time",
273 "_times",
274 "_tmpnam",
275 "_tsearch",
276 "_unlink",
277 "_valloc",
278 "_vasprintf",
279 "_vfprintf",
280 "_vfscanf",
281 "_vprintf",
282 "_vscanf",
283 "_vsnprintf",
284 "_vsprintf",
285 "_vsscanf",
286 "_wait",
287 "_wait$UNIX2003",
288 "_wait3",
289 "_wait4",
290 "_waitid",
291 "_waitid$UNIX2003",
292 "_waitpid",
293 "_waitpid$UNIX2003",
294 "_wcslen",
295 "_wcsnrtombs",
296 "_wcsrtombs",
297 "_wcstombs",
298 "_wordexp",
299 "_write",
300 "_writev",
301 "_writev$UNIX2003",
302 // <rdar://problem/22050956> always use stubs for C++ symbols that can be overridden
303 "__ZdaPv",
304 "__ZdlPv",
305 "__Znam",
306 "__Znwm",
307
308 nullptr
309 };
310
311
312 static uint64_t branchPoolTextSize(const std::string& archName)
313 {
314 if ( startsWith(archName, "arm64") )
315 return 0x0000C000; // 48KB
316 else
317 return 0;
318 }
319
320 static uint64_t branchPoolLinkEditSize(const std::string& archName)
321 {
322 if ( startsWith(archName, "arm64") )
323 return 0x00100000; // 1MB
324 else
325 return 0;
326 }
327
328
329 template <typename P>
330 class BranchPoolDylib {
331 public:
332 BranchPoolDylib(DyldSharedCache* cache, uint64_t startAddr,
333 uint64_t textRegionStartAddr, uint64_t poolLinkEditStartAddr, uint64_t poolLinkEditFileOffset, Diagnostics& diags);
334
335 uint64_t addr() { return _startAddr; }
336 uint64_t getForwardBranch(uint64_t finalTargetAddr, const char* name, std::vector<BranchPoolDylib<P>*>& branchIslandPools);
337 uint64_t getBackBranch(uint64_t finalTargetAddr, const char* name, std::vector<BranchPoolDylib<P>*>& branchIslandPools);
338 void finalizeLoadCommands();
339 void printStats();
340
341 private:
342 Diagnostics& _diagnostics;
343 uint64_t indexToAddr(uint32_t index) { return _startAddr + _firstStubOffset + sizeof(uint32_t)*index; }
344
345 static const int64_t b128MegLimit = 0x07FFFFFF;
346
347 typedef typename P::uint_t pint_t;
348 typedef typename P::E E;
349
350 DyldSharedCache* _cacheBuffer;
351 uint64_t _startAddr;
352 std::unordered_map<uint64_t, uint32_t> _targetToIslandIndex;
353 std::unordered_map<uint32_t, const char*> _islandIndexToName;
354 macho_symtab_command<P>* _symbolTableCmd;
355 macho_dysymtab_command<P>* _dynamicSymbolTableCmd;
356 macho_uuid_command<P>* _uuidCmd;
357 uint32_t _maxStubs;
358 uint32_t _nextIndex;
359 uint32_t _firstStubOffset;
360 uint32_t* _stubInstructions;
361 macho_nlist<P>* _symbolTable;
362 char* _nextString;
363 char* _stringPoolStart;
364 char* _stringPoolEnd;
365 };
366
367 template <typename P>
368 BranchPoolDylib<P>::BranchPoolDylib(DyldSharedCache* cache, uint64_t poolStartAddr,
369 uint64_t textRegionStartAddr, uint64_t poolLinkEditStartAddr, uint64_t poolLinkEditFileOffset, Diagnostics& diags)
370 : _cacheBuffer(cache), _startAddr(poolStartAddr), _nextIndex(0), _firstStubOffset(0x280), _diagnostics(diags)
371 {
372 std::string archName = cache->archName();
373 bool is64 = (sizeof(typename P::uint_t) == 8);
374
375 const int64_t cacheSlide = (long)cache - cache->unslidLoadAddress();
376 const uint64_t textSegSize = branchPoolTextSize(archName);
377 const uint64_t linkEditSegSize = branchPoolLinkEditSize(archName);
378 const unsigned stubCount = (unsigned)((textSegSize - _firstStubOffset)/sizeof(uint32_t));
379 const uint32_t linkeditOffsetSymbolTable = 0;
380 const uint32_t linkeditOffsetIndirectSymbolTable = stubCount*sizeof(macho_nlist<P>);
381 const uint32_t linkeditOffsetSymbolPoolOffset = linkeditOffsetIndirectSymbolTable + stubCount*sizeof(uint32_t);
382 _maxStubs = stubCount;
383
384 // write mach_header and load commands for pseudo dylib
385 macho_header<P>* mh = (macho_header<P>*)((uint8_t*)cache + poolStartAddr - textRegionStartAddr);
386 mh->set_magic(is64 ? MH_MAGIC_64 : MH_MAGIC);
387 mh->set_cputype(dyld3::MachOFile::cpuTypeFromArchName(archName.c_str()));
388 mh->set_cpusubtype(dyld3::MachOFile::cpuSubtypeFromArchName(archName.c_str()));
389 mh->set_filetype(MH_DYLIB);
390 mh->set_ncmds(6);
391 mh->set_sizeofcmds(is64 ? 0x210 : 100); // FIXME: 32-bit size
392 mh->set_flags(0x80000000);
393 // LC_SEGMENT
394 macho_load_command<P>* cmd = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
395 macho_segment_command<P>* textSegCmd = (macho_segment_command<P>*)cmd;
396 textSegCmd->set_cmd(is64 ? LC_SEGMENT_64 : LC_SEGMENT);
397 textSegCmd->set_cmdsize(sizeof(macho_segment_command<P>)*2+sizeof(macho_section<P>));
398 textSegCmd->set_segname("__TEXT");
399 textSegCmd->set_vmaddr(poolStartAddr);
400 textSegCmd->set_vmsize(textSegSize);
401 textSegCmd->set_fileoff(poolStartAddr - textRegionStartAddr);
402 textSegCmd->set_filesize(branchPoolTextSize(archName));
403 textSegCmd->set_maxprot(PROT_READ|PROT_EXEC);
404 textSegCmd->set_initprot(PROT_READ|PROT_EXEC);
405 textSegCmd->set_nsects(1);
406 textSegCmd->set_flags(0);
407 macho_section<P>* stubSection = (macho_section<P>*)((uint8_t*)textSegCmd + sizeof(macho_segment_command<P>));
408 stubSection->set_sectname("__stubs");
409 stubSection->set_segname("__TEXT");
410 stubSection->set_addr(poolStartAddr + _firstStubOffset);
411 stubSection->set_size(textSegSize - _firstStubOffset);
412 stubSection->set_offset((uint32_t)(poolStartAddr + _firstStubOffset - textRegionStartAddr));
413 stubSection->set_align(2);
414 stubSection->set_reloff(0);
415 stubSection->set_nreloc(0);
416 stubSection->set_flags(S_SYMBOL_STUBS | S_ATTR_SOME_INSTRUCTIONS | S_ATTR_PURE_INSTRUCTIONS);
417 stubSection->set_reserved1(0); // start index in indirect table
418 stubSection->set_reserved2(4); // size of stubs
419 // LC_SEGMENT
420 cmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
421 macho_segment_command<P>* linkEditSegCmd = (macho_segment_command<P>*)cmd;
422 linkEditSegCmd->set_cmd(is64 ? LC_SEGMENT_64 : LC_SEGMENT);
423 linkEditSegCmd->set_cmdsize(sizeof(macho_segment_command<P>));
424 linkEditSegCmd->set_segname("__LINKEDIT");
425 linkEditSegCmd->set_vmaddr(poolLinkEditStartAddr);
426 linkEditSegCmd->set_vmsize(linkEditSegSize);
427 linkEditSegCmd->set_fileoff(poolLinkEditFileOffset);
428 linkEditSegCmd->set_filesize(linkEditSegSize);
429 linkEditSegCmd->set_maxprot(PROT_READ);
430 linkEditSegCmd->set_initprot(PROT_READ);
431 linkEditSegCmd->set_nsects(0);
432 linkEditSegCmd->set_flags(0);
433 // LC_ID_DYLIB
434 cmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
435 macho_dylib_command<P>* installNameCmd = (macho_dylib_command<P>*)cmd;
436 installNameCmd->set_cmd(LC_ID_DYLIB);
437 installNameCmd->set_cmdsize(sizeof(macho_dylib_command<P>) + 48);
438 installNameCmd->set_timestamp(2);
439 installNameCmd->set_current_version(0x10000);
440 installNameCmd->set_compatibility_version(0x10000);
441 installNameCmd->set_name_offset();
442 strcpy((char*)cmd + sizeof(macho_dylib_command<P>), "dyld_shared_cache_branch_islands");
443 // LC_SYMTAB
444 cmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
445 _symbolTableCmd = (macho_symtab_command<P>*)cmd;
446 _symbolTableCmd->set_cmd(LC_SYMTAB);
447 _symbolTableCmd->set_cmdsize(sizeof(macho_symtab_command<P>));
448 _symbolTableCmd->set_nsyms(stubCount);
449 _symbolTableCmd->set_symoff((uint32_t)(poolLinkEditFileOffset + linkeditOffsetSymbolTable));
450 _symbolTableCmd->set_stroff((uint32_t)(poolLinkEditFileOffset + linkeditOffsetSymbolPoolOffset));
451 _symbolTableCmd->set_strsize((uint32_t)(linkEditSegSize - linkeditOffsetSymbolPoolOffset));
452 // LC_DYSYMTAB
453 cmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
454 _dynamicSymbolTableCmd = (macho_dysymtab_command<P>*)cmd;
455 _dynamicSymbolTableCmd->set_cmd(LC_DYSYMTAB);
456 _dynamicSymbolTableCmd->set_cmdsize(sizeof(macho_dysymtab_command<P>));
457 _dynamicSymbolTableCmd->set_ilocalsym(0);
458 _dynamicSymbolTableCmd->set_nlocalsym(0);
459 _dynamicSymbolTableCmd->set_iextdefsym(0);
460 _dynamicSymbolTableCmd->set_nextdefsym(0);
461 _dynamicSymbolTableCmd->set_iundefsym(0);
462 _dynamicSymbolTableCmd->set_nundefsym(stubCount);
463 _dynamicSymbolTableCmd->set_tocoff(0);
464 _dynamicSymbolTableCmd->set_ntoc(0);
465 _dynamicSymbolTableCmd->set_modtaboff(0);
466 _dynamicSymbolTableCmd->set_nmodtab(0);
467 _dynamicSymbolTableCmd->set_extrefsymoff(0);
468 _dynamicSymbolTableCmd->set_nextrefsyms(0);
469 _dynamicSymbolTableCmd->set_indirectsymoff((uint32_t)(poolLinkEditFileOffset + linkeditOffsetIndirectSymbolTable));
470 _dynamicSymbolTableCmd->set_nindirectsyms(stubCount);
471 _dynamicSymbolTableCmd->set_extreloff(0);
472 _dynamicSymbolTableCmd->set_nextrel(0);
473 _dynamicSymbolTableCmd->set_locreloff(0);
474 _dynamicSymbolTableCmd->set_nlocrel(0);
475 cmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
476 // LC_UUID
477 _uuidCmd = (macho_uuid_command<P>*)cmd;
478 _uuidCmd->set_cmd(LC_UUID);
479 _uuidCmd->set_cmdsize(sizeof(macho_uuid_command<P>));
480 cmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
481
482 // write stubs section content
483 _stubInstructions = (uint32_t*)((uint8_t*)mh + _firstStubOffset);
484 for (unsigned i=0; i < stubCount; ++i) {
485 E::set32(_stubInstructions[i], 0xD4200000);
486 }
487
488 // write linkedit content
489 uint8_t* linkeditBufferStart = (uint8_t*)poolLinkEditStartAddr + cacheSlide;
490 // write symbol table
491 _symbolTable = (macho_nlist<P>*)(linkeditBufferStart);
492 for (unsigned i=0; i < stubCount; ++i) {
493 _symbolTable[i].set_n_strx(1);
494 _symbolTable[i].set_n_type(N_UNDF | N_EXT);
495 _symbolTable[i].set_n_sect(0);
496 _symbolTable[i].set_n_desc(0);
497 _symbolTable[i].set_n_value(0);
498 }
499 // write indirect symbol table
500 uint32_t* indirectSymboTable = (uint32_t*)(linkeditBufferStart + linkeditOffsetIndirectSymbolTable);
501 for (unsigned i=0; i < stubCount; ++i) {
502 P::E::set32(indirectSymboTable[i], i);
503 }
504 // write string pool
505 _stringPoolStart = (char*)(linkeditBufferStart + linkeditOffsetSymbolPoolOffset);
506 _stringPoolEnd = _stringPoolStart + linkEditSegSize - linkeditOffsetSymbolPoolOffset;
507 _stringPoolStart[0] = '\0';
508 strcpy(&_stringPoolStart[1], "<unused>");
509 _nextString = &_stringPoolStart[10];
510 }
511
512
513 template <typename P>
514 void BranchPoolDylib<P>::finalizeLoadCommands()
515 {
516 _symbolTableCmd->set_nsyms(_nextIndex);
517 _symbolTableCmd->set_strsize((uint32_t)(_nextString - _stringPoolStart));
518 _dynamicSymbolTableCmd->set_nundefsym(_nextIndex);
519
520 uint8_t digest[CC_MD5_DIGEST_LENGTH];
521 CC_MD5(_stubInstructions, _maxStubs*sizeof(uint32_t), digest);
522 _uuidCmd->set_uuid(digest);
523
524 if ( verbose ) {
525 _diagnostics.verbose("branch islands in image at 0x%0llX:\n", _startAddr);
526 for (uint32_t i=0; i < _nextIndex; ++i) {
527 _diagnostics.verbose(" 0x%llX %s\n", indexToAddr(i), _islandIndexToName[i]);
528 }
529 }
530 }
531
532 template <typename P>
533 uint64_t BranchPoolDylib<P>::getForwardBranch(uint64_t finalTargetAddr, const char* name, std::vector<BranchPoolDylib<P>*>& branchIslandPools)
534 {
535 // check if we can re-used existing branch island
536 const auto& pos = _targetToIslandIndex.find(finalTargetAddr);
537 if ( pos != _targetToIslandIndex.end() )
538 return indexToAddr(pos->second);
539
540 // skip if instruction pool is full
541 if ( _nextIndex >= _maxStubs )
542 return 0;
543
544 // skip if string pool is full
545 if ( (_nextString + strlen(name)+1) >= _stringPoolEnd )
546 return 0;
547
548 uint64_t branchIslandTargetAddr = finalTargetAddr;
549 // if final target is too far, we need to use branch island in next pool
550 if ( (finalTargetAddr - _startAddr) > b128MegLimit ) {
551 BranchPoolDylib<P>* nextPool = nullptr;
552 for (size_t i=0; i < branchIslandPools.size()-1; ++i) {
553 if ( branchIslandPools[i] == this ) {
554 nextPool = branchIslandPools[i+1];
555 break;
556 }
557 }
558
559 if (nextPool == nullptr) {
560 _diagnostics.warning("BranchPoolDylib<P>::getForwardBranch: nextPool unreachable");
561 return 0;
562 }
563
564 branchIslandTargetAddr = nextPool->getForwardBranch(finalTargetAddr, name, branchIslandPools);
565 if ( branchIslandTargetAddr == 0 )
566 return 0; // next pool is full
567 }
568
569 // write branch instruction in stubs section
570 uint32_t index = _nextIndex++;
571 int64_t branchDelta = branchIslandTargetAddr - indexToAddr(index);
572 uint32_t branchInstr = 0x14000000 + ((branchDelta/4) & 0x03FFFFFF);
573 E::set32(_stubInstructions[index], branchInstr);
574
575 // update symbol table
576 _symbolTable[index].set_n_strx((uint32_t)(_nextString - _stringPoolStart));
577 strcpy(_nextString, name);
578 _nextString += (strlen(name) +1);
579
580 // record island
581 _targetToIslandIndex[finalTargetAddr] = index;
582 _islandIndexToName[index] = name;
583 return indexToAddr(index);
584 }
585
586 template <typename P>
587 uint64_t BranchPoolDylib<P>::getBackBranch(uint64_t finalTargetAddr, const char* name, std::vector<BranchPoolDylib<P>*>& branchIslandPools)
588 {
589 // check if we can re-used existing branch island
590 const auto& pos = _targetToIslandIndex.find(finalTargetAddr);
591 if ( pos != _targetToIslandIndex.end() )
592 return indexToAddr(pos->second);
593
594 // skip if instruction pool is full
595 if ( _nextIndex >= _maxStubs )
596 return 0;
597
598 // skip if string pool is full
599 if ( (_nextString + strlen(name)+1) >= _stringPoolEnd )
600 return 0;
601
602 uint64_t branchIslandTargetAddr = finalTargetAddr;
603 // if final target is too far, we need to use branch island in next pool
604 if ( (indexToAddr(_nextIndex) - finalTargetAddr) > b128MegLimit ) {
605 BranchPoolDylib<P>* nextPool = nullptr;
606 for (long i=branchIslandPools.size()-1; i > 0; --i) {
607 if ( branchIslandPools[i] == this ) {
608 nextPool = branchIslandPools[i-1];
609 break;
610 }
611 }
612
613 if (nextPool == nullptr) {
614 _diagnostics.warning("BranchPoolDylib<P>::getBackBranch: nextPool unreachable");
615 return 0;
616 }
617
618 branchIslandTargetAddr = nextPool->getBackBranch(finalTargetAddr, name, branchIslandPools);
619 if ( branchIslandTargetAddr == 0 )
620 return 0; // next pool is full
621 }
622
623 // write branch instruction in stubs section
624 uint32_t index = _nextIndex++;
625 int64_t branchDelta = branchIslandTargetAddr - indexToAddr(index);
626 uint32_t branchInstr = 0x14000000 + ((branchDelta/4) & 0x03FFFFFF);
627 E::set32(_stubInstructions[index], branchInstr);
628
629 // update symbol table
630 _symbolTable[index].set_n_strx((uint32_t)(_nextString - _stringPoolStart));
631 strcpy(_nextString, name);
632 _nextString += (strlen(name) +1);
633
634 // record island
635 _targetToIslandIndex[finalTargetAddr] = index;
636 _islandIndexToName[index] = name;
637 return indexToAddr(index);
638 }
639
640 template <typename P>
641 void BranchPoolDylib<P>::printStats()
642 {
643 _diagnostics.verbose(" island pool at 0x%0llX has %u stubs and stringPool size=%lu\n", _startAddr, _nextIndex, _nextString - _stringPoolStart);
644 }
645
646
647
648 template <typename P>
649 class StubOptimizer {
650 public:
651 StubOptimizer(const DyldSharedCache* cache, macho_header<P>* mh, Diagnostics& diags);
652 void buildStubMap(const std::unordered_set<std::string>& neverStubEliminate);
653 void optimizeStubs(std::unordered_map<uint64_t,std::vector<uint64_t>>& targetToBranchIslands);
654 void optimizeCallSites(std::vector<BranchPoolDylib<P>*>& branchIslandPools);
655 const char* installName() { return _installName; }
656 const uint8_t* exportsTrie() { return &_linkeditBias[_dyldInfo->export_off()]; }
657 uint32_t exportsTrieSize() { return _dyldInfo->export_size(); }
658
659 uint32_t _stubCount = 0;
660 uint32_t _stubOptimizedCount = 0;
661 uint32_t _branchesCount = 0;
662 uint32_t _branchesModifiedCount = 0;
663 uint32_t _branchesDirectCount = 0;
664 uint32_t _branchesIslandCount = 0;
665
666 private:
667 Diagnostics _diagnostics;
668 typedef std::function<bool(uint8_t callSiteKind, uint64_t callSiteAddr, uint64_t stubAddr, uint32_t& instruction)> CallSiteHandler;
669 typedef typename P::uint_t pint_t;
670 typedef typename P::E E;
671
672 void forEachCallSiteToAStub(CallSiteHandler);
673 void optimizeArm64CallSites(std::vector<BranchPoolDylib<P>*>& branchIslandPools);
674 void optimizeArmCallSites();
675 void optimizeArmStubs();
676 uint64_t lazyPointerAddrFromArm64Stub(const uint8_t* stubInstructions, uint64_t stubVMAddr);
677 #if SUPPORT_ARCH_arm64e
678 uint64_t lazyPointerAddrFromArm64eStub(const uint8_t* stubInstructions, uint64_t stubVMAddr);
679 #endif
680 uint32_t lazyPointerAddrFromArmStub(const uint8_t* stubInstructions, uint32_t stubVMAddr);
681 int32_t getDisplacementFromThumbBranch(uint32_t instruction, uint32_t instrAddr);
682 uint32_t setDisplacementInThumbBranch(uint32_t instruction, uint32_t instrAddr,
683 int32_t displacement, bool targetIsThumb);
684
685
686 struct AddressAndName { pint_t targetVMAddr; const char* targetName; };
687 typedef std::unordered_map<pint_t, AddressAndName> StubVMAddrToTarget;
688
689 static const int64_t b128MegLimit = 0x07FFFFFF;
690 static const int64_t b16MegLimit = 0x00FFFFFF;
691
692
693 macho_header<P>* _mh;
694 int64_t _cacheSlide = 0;
695 uint64_t _cacheUnslideAddr = 0;
696 bool _chainedFixups = false;
697 uint32_t _linkeditSize = 0;
698 uint64_t _linkeditAddr = 0;
699 const uint8_t* _linkeditBias = nullptr;
700 const char* _installName = nullptr;
701 const macho_symtab_command<P>* _symTabCmd = nullptr;
702 const macho_dysymtab_command<P>* _dynSymTabCmd = nullptr;
703 const macho_dyld_info_command<P>* _dyldInfo = nullptr;
704 macho_linkedit_data_command<P>* _splitSegInfoCmd = nullptr;
705 const macho_section<P>* _textSection = nullptr;
706 const macho_section<P>* _stubSection = nullptr;
707 uint32_t _textSectionIndex = 0;
708 uint32_t _stubSectionIndex = 0;
709 pint_t _textSegStartAddr = 0;
710 std::vector<macho_segment_command<P>*> _segCmds;
711 std::unordered_map<pint_t, pint_t> _stubAddrToLPAddr;
712 std::unordered_map<pint_t, pint_t> _lpAddrToTargetAddr;
713 std::unordered_map<pint_t, const char*> _targetAddrToName;
714 };
715
716 template <typename P>
717 StubOptimizer<P>::StubOptimizer(const DyldSharedCache* cache, macho_header<P>* mh, Diagnostics& diags)
718 : _mh(mh), _diagnostics(diags)
719 {
720 _cacheSlide = (long)cache - cache->unslidLoadAddress();
721 _cacheUnslideAddr = cache->unslidLoadAddress();
722 #if SUPPORT_ARCH_arm64e
723 _chainedFixups = (strcmp(cache->archName(), "arm64e") == 0);
724 #else
725 _chainedFixups = false;
726 #endif
727 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
728 const uint32_t cmd_count = mh->ncmds();
729 macho_segment_command<P>* segCmd;
730 uint32_t sectionIndex = 0;
731 const macho_load_command<P>* cmd = cmds;
732 for (uint32_t i = 0; i < cmd_count; ++i) {
733 switch (cmd->cmd()) {
734 case LC_ID_DYLIB:
735 _installName = ((macho_dylib_command<P>*)cmd)->name();
736 break;
737 case LC_SYMTAB:
738 _symTabCmd = (macho_symtab_command<P>*)cmd;
739 break;
740 case LC_DYSYMTAB:
741 _dynSymTabCmd = (macho_dysymtab_command<P>*)cmd;
742 break;
743 case LC_SEGMENT_SPLIT_INFO:
744 _splitSegInfoCmd = (macho_linkedit_data_command<P>*)cmd;
745 break;
746 case LC_DYLD_INFO:
747 case LC_DYLD_INFO_ONLY:
748 _dyldInfo = (macho_dyld_info_command<P>*)cmd;
749 break;
750 case macho_segment_command<P>::CMD:
751 segCmd =( macho_segment_command<P>*)cmd;
752 _segCmds.push_back(segCmd);
753 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
754 _linkeditBias = (uint8_t*)(segCmd->vmaddr() + _cacheSlide - segCmd->fileoff());
755 _linkeditSize = (uint32_t)segCmd->vmsize();
756 _linkeditAddr = segCmd->vmaddr();
757 }
758 else if ( strcmp(segCmd->segname(), "__TEXT") == 0 ) {
759 _textSegStartAddr = (pint_t)segCmd->vmaddr();
760 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
761 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
762 for (const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
763 ++sectionIndex;
764 if ( strcmp(sect->sectname(), "__text") == 0 ) {
765 _textSection = sect;
766 _textSectionIndex = sectionIndex;
767 }
768 else if ( ((sect->flags() & SECTION_TYPE) == S_SYMBOL_STUBS) && (sect->size() != 0) ) {
769 _stubSection = sect;
770 _stubSectionIndex = sectionIndex;
771 }
772 }
773 }
774 break;
775 }
776 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
777 }
778 }
779
780
781
782 template <typename P>
783 uint32_t StubOptimizer<P>::lazyPointerAddrFromArmStub(const uint8_t* stubInstructions, uint32_t stubVMAddr)
784 {
785 uint32_t stubInstr1 = E::get32(*(uint32_t*)stubInstructions);
786 uint32_t stubInstr2 = E::get32(*(uint32_t*)(stubInstructions+4));
787 uint32_t stubInstr3 = E::get32(*(uint32_t*)(stubInstructions+8));
788 int32_t stubData = E::get32(*(uint32_t*)(stubInstructions+12));
789 if ( stubInstr1 != 0xe59fc004 ) {
790 _diagnostics.warning("first instruction of stub (0x%08X) is not 'ldr ip, pc + 12' for stub at addr 0x%0llX in %s",
791 stubInstr1, (uint64_t)stubVMAddr, _installName);
792 return 0;
793 }
794 if ( stubInstr2 != 0xe08fc00c ) {
795 _diagnostics.warning("second instruction of stub (0x%08X) is not 'add ip, pc, ip' for stub at addr 0x%0llX in %s",
796 stubInstr1, (uint64_t)stubVMAddr, _installName);
797 return 0;
798 }
799 if ( stubInstr3 != 0xe59cf000 ) {
800 _diagnostics.warning("third instruction of stub (0x%08X) is not 'ldr pc, [ip]' for stub at addr 0x%0llX in %s",
801 stubInstr1, (uint64_t)stubVMAddr, _installName);
802 return 0;
803 }
804 return stubVMAddr + 12 + stubData;
805 }
806
807
808 template <typename P>
809 uint64_t StubOptimizer<P>::lazyPointerAddrFromArm64Stub(const uint8_t* stubInstructions, uint64_t stubVMAddr)
810 {
811 uint32_t stubInstr1 = E::get32(*(uint32_t*)stubInstructions);
812 if ( (stubInstr1 & 0x9F00001F) != 0x90000010 ) {
813 _diagnostics.warning("first instruction of stub (0x%08X) is not ADRP for stub at addr 0x%0llX in %s",
814 stubInstr1, (uint64_t)stubVMAddr, _installName);
815 return 0;
816 }
817 int32_t adrpValue = ((stubInstr1 & 0x00FFFFE0) >> 3) | ((stubInstr1 & 0x60000000) >> 29);
818 if ( stubInstr1 & 0x00800000 )
819 adrpValue |= 0xFFF00000;
820 uint32_t stubInstr2 = E::get32(*(uint32_t*)(stubInstructions + 4));
821 if ( (stubInstr2 & 0xFFC003FF) != 0xF9400210 ) {
822 _diagnostics.warning("second instruction of stub (0x%08X) is not LDR for stub at addr 0x%0llX in %s",
823 stubInstr2, (uint64_t)stubVMAddr, _installName);
824 return 0;
825 }
826 uint32_t ldrValue = ((stubInstr2 >> 10) & 0x00000FFF);
827 return (stubVMAddr & (-4096)) + adrpValue*4096 + ldrValue*8;
828 }
829
830 #if SUPPORT_ARCH_arm64e
831 template <typename P>
832 uint64_t StubOptimizer<P>::lazyPointerAddrFromArm64eStub(const uint8_t* stubInstructions, uint64_t stubVMAddr)
833 {
834 uint32_t stubInstr1 = E::get32(*(uint32_t*)stubInstructions);
835 // ADRP X17, dyld_mageLoaderCache@page
836 if ( (stubInstr1 & 0x9F00001F) != 0x90000011 ) {
837 _diagnostics.warning("first instruction of stub (0x%08X) is not ADRP for stub at addr 0x%0llX in %s",
838 stubInstr1, (uint64_t)stubVMAddr, _installName);
839 return 0;
840 }
841 int32_t adrpValue = ((stubInstr1 & 0x00FFFFE0) >> 3) | ((stubInstr1 & 0x60000000) >> 29);
842 if ( stubInstr1 & 0x00800000 )
843 adrpValue |= 0xFFF00000;
844
845 // ADD X17, X17, dyld_mageLoaderCache@pageoff
846 uint32_t stubInstr2 = E::get32(*(uint32_t*)(stubInstructions + 4));
847 if ( (stubInstr2 & 0xFFC003FF) != 0x91000231 ) {
848 _diagnostics.warning("second instruction of stub (0x%08X) is not ADD for stub at addr 0x%0llX in %s",
849 stubInstr2, (uint64_t)stubVMAddr, _installName);
850 return 0;
851 }
852 uint32_t addValue = ((stubInstr2 & 0x003FFC00) >> 10);
853
854 // LDR X16, [X17]
855 uint32_t stubInstr3 = E::get32(*(uint32_t*)(stubInstructions + 8));
856 if ( stubInstr3 != 0xF9400230 ) {
857 _diagnostics.warning("second instruction of stub (0x%08X) is not LDR for stub at addr 0x%0llX in %s",
858 stubInstr2, (uint64_t)stubVMAddr, _installName);
859 return 0;
860 }
861 return (stubVMAddr & (-4096)) + adrpValue*4096 + addValue;
862 }
863 #endif
864
865
866
867 template <typename P>
868 void StubOptimizer<P>::buildStubMap(const std::unordered_set<std::string>& neverStubEliminate)
869 {
870 // find all stubs and lazy pointers
871 const macho_nlist<P>* symbolTable = (const macho_nlist<P>*)(&_linkeditBias[_symTabCmd->symoff()]);
872 const char* symbolStrings = (char*)(&_linkeditBias[_symTabCmd->stroff()]);
873 const uint32_t* const indirectTable = (uint32_t*)(&_linkeditBias[_dynSymTabCmd->indirectsymoff()]);
874 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)_mh + sizeof(macho_header<P>));
875 const uint32_t cmd_count = _mh->ncmds();
876 const macho_load_command<P>* cmd = cmds;
877 for (uint32_t i = 0; i < cmd_count; ++i) {
878 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
879 macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
880 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
881 macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
882 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
883 if ( sect->size() == 0 )
884 continue;
885 unsigned sectionType = (sect->flags() & SECTION_TYPE);
886 const uint32_t indirectTableOffset = sect->reserved1();
887 if ( sectionType == S_SYMBOL_STUBS ) {
888 const uint32_t stubSize = sect->reserved2();
889 _stubCount = (uint32_t)(sect->size() / stubSize);
890 pint_t stubVMAddr = (pint_t)sect->addr();
891 for (uint32_t j=0; j < _stubCount; ++j, stubVMAddr += stubSize) {
892 uint32_t symbolIndex = E::get32(indirectTable[indirectTableOffset + j]);
893 switch ( symbolIndex ) {
894 case INDIRECT_SYMBOL_ABS:
895 case INDIRECT_SYMBOL_LOCAL:
896 case INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL:
897 break;
898 default:
899 if ( symbolIndex >= _symTabCmd->nsyms() ) {
900 _diagnostics.warning("symbol index out of range (%d of %d) for stub at addr 0x%0llX in %s",
901 symbolIndex, _symTabCmd->nsyms(), (uint64_t)stubVMAddr, _installName);
902 continue;
903 }
904 const macho_nlist<P>* sym = &symbolTable[symbolIndex];
905 uint32_t stringOffset = sym->n_strx();
906 if ( stringOffset > _symTabCmd->strsize() ) {
907 _diagnostics.warning("symbol string offset out of range (%u of %u) for stub at addr 0x%0llX in %s",
908 stringOffset, sym->n_strx(), (uint64_t)stubVMAddr, _installName);
909 continue;
910 }
911 const char* symName = &symbolStrings[stringOffset];
912 if ( neverStubEliminate.count(symName) ) {
913 //fprintf(stderr, "not bypassing stub to %s in %s because target is interposable\n", symName, _installName);
914 continue;
915 }
916 const uint8_t* stubInstrs = (uint8_t*)(long)stubVMAddr + _cacheSlide;
917 pint_t targetLPAddr = 0;
918 switch ( _mh->cputype() ) {
919 case CPU_TYPE_ARM64:
920 case CPU_TYPE_ARM64_32:
921 #if SUPPORT_ARCH_arm64e
922 if (_mh->cpusubtype() == CPU_SUBTYPE_ARM64_E)
923 targetLPAddr = (pint_t)lazyPointerAddrFromArm64eStub(stubInstrs, stubVMAddr);
924 else
925 #endif
926 targetLPAddr = (pint_t)lazyPointerAddrFromArm64Stub(stubInstrs, stubVMAddr);
927 break;
928 case CPU_TYPE_ARM:
929 targetLPAddr = (pint_t)lazyPointerAddrFromArmStub(stubInstrs, (uint32_t)stubVMAddr);
930 break;
931 }
932 if ( targetLPAddr != 0 )
933 _stubAddrToLPAddr[stubVMAddr] = targetLPAddr;
934 break;
935 }
936 }
937 }
938 else if ( (sectionType == S_LAZY_SYMBOL_POINTERS) || (sectionType == S_NON_LAZY_SYMBOL_POINTERS) ) {
939 pint_t lpVMAddr;
940 pint_t* lpContent = (pint_t*)(sect->addr() + _cacheSlide);
941 uint32_t elementCount = (uint32_t)(sect->size() / sizeof(pint_t));
942 uint64_t textSegStartAddr = _segCmds[0]->vmaddr();
943 uint64_t textSegEndAddr = _segCmds[0]->vmaddr() + _segCmds[0]->vmsize();
944 pint_t lpValue;
945 for (uint32_t j=0; j < elementCount; ++j) {
946 uint32_t symbolIndex = E::get32(indirectTable[indirectTableOffset + j]);
947 switch ( symbolIndex ) {
948 case INDIRECT_SYMBOL_ABS:
949 case INDIRECT_SYMBOL_LOCAL:
950 case INDIRECT_SYMBOL_LOCAL|INDIRECT_SYMBOL_ABS:
951 break;
952 default:
953 lpValue = (pint_t)P::getP(lpContent[j]);
954
955 // Fixup threaded rebase/bind
956 if ( _chainedFixups ) {
957 dyld3::MachOLoaded::ChainedFixupPointerOnDisk ptr;
958 ptr.raw = lpValue;
959 assert(ptr.authRebase.bind == 0);
960 if ( ptr.authRebase.auth ) {
961 lpValue = (pint_t)(_cacheUnslideAddr + ptr.authRebase.target);
962 }
963 else {
964 lpValue = (pint_t)ptr.plainRebase.signExtendedTarget();
965 }
966 }
967
968 lpVMAddr = (pint_t)sect->addr() + j * sizeof(pint_t);
969 if ( symbolIndex >= _symTabCmd->nsyms() ) {
970 _diagnostics.warning("symbol index out of range (%d of %d) for lazy pointer at addr 0x%0llX in %s",
971 symbolIndex, _symTabCmd->nsyms(), (uint64_t)lpVMAddr, _installName);
972 continue;
973 }
974 const macho_nlist<P>* sym = &symbolTable[symbolIndex];
975 uint32_t stringOffset = sym->n_strx();
976 if ( stringOffset > _symTabCmd->strsize() ) {
977 _diagnostics.warning("symbol string offset out of range (%u of %u) for lazy pointer at addr 0x%0llX in %s",
978 stringOffset, sym->n_strx(), (uint64_t)lpVMAddr, _installName);
979 continue;
980 }
981 const char* symName = &symbolStrings[stringOffset];
982 if ( (lpValue > textSegStartAddr) && (lpValue< textSegEndAddr) ) {
983 //fprintf(stderr, "skipping lazy pointer at 0x%0lX to %s in %s because target is within dylib\n", (long)lpVMAddr, symName, _installName);
984 }
985 else if ( (sizeof(pint_t) == 8) && ((lpValue % 4) != 0) ) {
986 _diagnostics.warning("lazy pointer at 0x%0llX does not point to 4-byte aligned address(0x%0llX) in %s",
987 (uint64_t)lpVMAddr, (uint64_t)lpValue, _installName);
988 }
989 else {
990 _lpAddrToTargetAddr[lpVMAddr] = lpValue;
991 _targetAddrToName[lpValue] = symName;
992 }
993 break;
994 }
995 }
996 }
997 }
998 }
999 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
1000 }
1001 }
1002
1003
1004 template <typename P>
1005 void StubOptimizer<P>::forEachCallSiteToAStub(CallSiteHandler handler)
1006 {
1007 if (_diagnostics.hasError())
1008 return;
1009 const uint8_t* infoStart = &_linkeditBias[_splitSegInfoCmd->dataoff()];
1010 const uint8_t* infoEnd = &infoStart[_splitSegInfoCmd->datasize()];
1011 if ( *infoStart++ != DYLD_CACHE_ADJ_V2_FORMAT ) {
1012 _diagnostics.error("malformed split seg info in %s", _installName);
1013 return;
1014 }
1015
1016 uint8_t* textSectionContent = (uint8_t*)(_textSection->addr() + _cacheSlide);
1017
1018 // Whole :== <count> FromToSection+
1019 // FromToSection :== <from-sect-index> <to-sect-index> <count> ToOffset+
1020 // ToOffset :== <to-sect-offset-delta> <count> FromOffset+
1021 // FromOffset :== <kind> <count> <from-sect-offset-delta>
1022 const uint8_t* p = infoStart;
1023 uint64_t sectionCount = read_uleb128(p, infoEnd);
1024 for (uint64_t i=0; i < sectionCount; ++i) {
1025 uint64_t fromSectionIndex = read_uleb128(p, infoEnd);
1026 uint64_t toSectionIndex = read_uleb128(p, infoEnd);
1027 uint64_t toOffsetCount = read_uleb128(p, infoEnd);
1028 uint64_t toSectionOffset = 0;
1029 for (uint64_t j=0; j < toOffsetCount; ++j) {
1030 uint64_t toSectionDelta = read_uleb128(p, infoEnd);
1031 uint64_t fromOffsetCount = read_uleb128(p, infoEnd);
1032 toSectionOffset += toSectionDelta;
1033 for (uint64_t k=0; k < fromOffsetCount; ++k) {
1034 uint64_t kind = read_uleb128(p, infoEnd);
1035 if ( kind > 13 ) {
1036 _diagnostics.error("bad kind (%llu) value in %s\n", kind, _installName);
1037 }
1038 uint64_t fromSectDeltaCount = read_uleb128(p, infoEnd);
1039 uint64_t fromSectionOffset = 0;
1040 for (uint64_t l=0; l < fromSectDeltaCount; ++l) {
1041 uint64_t delta = read_uleb128(p, infoEnd);
1042 fromSectionOffset += delta;
1043 if ( (fromSectionIndex == _textSectionIndex) && (toSectionIndex == _stubSectionIndex) ) {
1044 uint32_t* instrPtr = (uint32_t*)(textSectionContent + fromSectionOffset);
1045 uint64_t instrAddr = _textSection->addr() + fromSectionOffset;
1046 uint64_t stubAddr = _stubSection->addr() + toSectionOffset;
1047 uint32_t instruction = E::get32(*instrPtr);
1048 _branchesCount++;
1049 if ( handler(kind, instrAddr, stubAddr, instruction) ) {
1050 _branchesModifiedCount++;
1051 E::set32(*instrPtr, instruction);
1052 }
1053 }
1054 }
1055 }
1056 }
1057 }
1058 }
1059
1060
1061 /// Extract displacement from a thumb b/bl/blx instruction.
1062 template <typename P>
1063 int32_t StubOptimizer<P>::getDisplacementFromThumbBranch(uint32_t instruction, uint32_t instrAddr)
1064 {
1065 bool is_blx = ((instruction & 0xD000F800) == 0xC000F000);
1066 uint32_t s = (instruction >> 10) & 0x1;
1067 uint32_t j1 = (instruction >> 29) & 0x1;
1068 uint32_t j2 = (instruction >> 27) & 0x1;
1069 uint32_t imm10 = instruction & 0x3FF;
1070 uint32_t imm11 = (instruction >> 16) & 0x7FF;
1071 uint32_t i1 = (j1 == s);
1072 uint32_t i2 = (j2 == s);
1073 uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
1074 int32_t sdis = dis;
1075 int32_t result = s ? (sdis | 0xFE000000) : sdis;
1076 if ( is_blx && (instrAddr & 0x2) ) {
1077 // The thumb blx instruction always has low bit of imm11 as zero. The way
1078 // a 2-byte aligned blx can branch to a 4-byte aligned ARM target is that
1079 // the blx instruction always 4-byte aligns the pc before adding the
1080 // displacement from the blx. We must emulate that when decoding this.
1081 result -= 2;
1082 }
1083 return result;
1084 }
1085
1086 /// Update a thumb b/bl/blx instruction, switching bl <-> blx as needed.
1087 template <typename P>
1088 uint32_t StubOptimizer<P>::setDisplacementInThumbBranch(uint32_t instruction, uint32_t instrAddr,
1089 int32_t displacement, bool targetIsThumb) {
1090 if ( (displacement > 16777214) || (displacement < (-16777216)) ) {
1091 _diagnostics.error("thumb branch out of range at 0x%0X in %s", instrAddr, _installName);
1092 return 0;
1093 }
1094 bool is_bl = ((instruction & 0xD000F800) == 0xD000F000);
1095 bool is_blx = ((instruction & 0xD000F800) == 0xC000F000);
1096 bool is_b = ((instruction & 0xD000F800) == 0x9000F000);
1097 uint32_t newInstruction = (instruction & 0xD000F800);
1098 if (is_bl || is_blx) {
1099 if (targetIsThumb) {
1100 newInstruction = 0xD000F000; // Use bl
1101 }
1102 else {
1103 newInstruction = 0xC000F000; // Use blx
1104 // See note in getDisplacementFromThumbBranch() about blx.
1105 if (instrAddr & 0x2)
1106 displacement += 2;
1107 }
1108 }
1109 else if (is_b) {
1110 if ( !targetIsThumb ) {
1111 _diagnostics.error("no pc-rel thumb branch instruction that switches to arm mode at 0x%0X in %s", instrAddr, _installName);
1112 return 0;
1113 }
1114 }
1115 else {
1116 _diagnostics.error("not b/bl/blx at 0x%0X in %s", instrAddr, _installName);
1117 return 0;
1118 }
1119 uint32_t s = (uint32_t)(displacement >> 24) & 0x1;
1120 uint32_t i1 = (uint32_t)(displacement >> 23) & 0x1;
1121 uint32_t i2 = (uint32_t)(displacement >> 22) & 0x1;
1122 uint32_t imm10 = (uint32_t)(displacement >> 12) & 0x3FF;
1123 uint32_t imm11 = (uint32_t)(displacement >> 1) & 0x7FF;
1124 uint32_t j1 = (i1 == s);
1125 uint32_t j2 = (i2 == s);
1126 uint32_t nextDisp = (j1 << 13) | (j2 << 11) | imm11;
1127 uint32_t firstDisp = (s << 10) | imm10;
1128 newInstruction |= (nextDisp << 16) | firstDisp;
1129 return newInstruction;
1130 }
1131
1132
1133 template <typename P>
1134 void StubOptimizer<P>::optimizeArmCallSites()
1135 {
1136 forEachCallSiteToAStub([&](uint8_t kind, uint64_t callSiteAddr, uint64_t stubAddr, uint32_t& instruction) -> bool {
1137 if ( kind == DYLD_CACHE_ADJ_V2_THUMB_BR22 ) {
1138 bool is_bl = ((instruction & 0xD000F800) == 0xD000F000);
1139 bool is_blx = ((instruction & 0xD000F800) == 0xC000F000);
1140 bool is_b = ((instruction & 0xD000F800) == 0x9000F000);
1141 if ( !is_bl && !is_blx && !is_b ){
1142 _diagnostics.warning("non-branch instruction at 0x%0llX in %s", callSiteAddr, _installName);
1143 return false;
1144 }
1145 int32_t brDelta = getDisplacementFromThumbBranch(instruction, (uint32_t)callSiteAddr);
1146 pint_t targetAddr = (pint_t)callSiteAddr + 4 + brDelta;
1147 if ( targetAddr != stubAddr ) {
1148 _diagnostics.warning("stub target mismatch at callsite 0x%0llX in %s", callSiteAddr, _installName);
1149 return false;
1150 }
1151 // ignore branch if not to a known stub
1152 const auto& pos = _stubAddrToLPAddr.find(targetAddr);
1153 if ( pos == _stubAddrToLPAddr.end() )
1154 return false;
1155 // ignore branch if lazy pointer is not known (could be resolver based)
1156 pint_t lpAddr = pos->second;
1157 const auto& pos2 = _lpAddrToTargetAddr.find(lpAddr);
1158 if ( pos2 == _lpAddrToTargetAddr.end() )
1159 return false;
1160 uint64_t finalTargetAddr = pos2->second;
1161 int64_t deltaToFinalTarget = finalTargetAddr - (callSiteAddr + 4);
1162 // if final target within range, change to branch there directly
1163 if ( (deltaToFinalTarget > -b16MegLimit) && (deltaToFinalTarget < b16MegLimit) ) {
1164 bool targetIsThumb = finalTargetAddr & 1;
1165 instruction = setDisplacementInThumbBranch(instruction, (uint32_t)callSiteAddr, (int32_t)deltaToFinalTarget, targetIsThumb);
1166 if (_diagnostics.hasError())
1167 return false;
1168 _branchesDirectCount++;
1169 return true;
1170 }
1171 }
1172 else if ( kind == DYLD_CACHE_ADJ_V2_ARM_BR24 ) {
1173 // too few of these to be worth trying to optimize
1174 }
1175
1176 return false;
1177 });
1178 if (_diagnostics.hasError())
1179 return;
1180 }
1181
1182
1183 template <typename P>
1184 void StubOptimizer<P>::optimizeArmStubs()
1185 {
1186 for (const auto& stubEntry : _stubAddrToLPAddr) {
1187 pint_t stubVMAddr = stubEntry.first;
1188 pint_t lpVMAddr = stubEntry.second;
1189 const auto& pos = _lpAddrToTargetAddr.find(lpVMAddr);
1190 if ( pos == _lpAddrToTargetAddr.end() )
1191 return;
1192 pint_t targetVMAddr = pos->second;
1193
1194 int32_t delta = (int32_t)(targetVMAddr - (stubVMAddr + 12));
1195 uint32_t* stubInstructions = (uint32_t*)((uint8_t*)(long)stubVMAddr + _cacheSlide);
1196 assert(stubInstructions[0] == 0xe59fc004);
1197 stubInstructions[0] = 0xe59fc000; // ldr ip, L0
1198 stubInstructions[1] = 0xe08ff00c; // add pc, pc, ip
1199 stubInstructions[2] = delta; // L0: .long xxxx
1200 stubInstructions[3] = 0xe7ffdefe; // trap
1201 _stubOptimizedCount++;
1202 }
1203 }
1204
1205
1206
1207
1208 template <typename P>
1209 void StubOptimizer<P>::optimizeArm64CallSites(std::vector<BranchPoolDylib<P>*>& branchIslandPools)
1210 {
1211 forEachCallSiteToAStub([&](uint8_t kind, uint64_t callSiteAddr, uint64_t stubAddr, uint32_t& instruction) -> bool {
1212 if ( kind != DYLD_CACHE_ADJ_V2_ARM64_BR26 )
1213 return false;
1214 // skip all but BL or B
1215 if ( (instruction & 0x7C000000) != 0x14000000 )
1216 return false;
1217 // compute target of branch instruction
1218 int32_t brDelta = (instruction & 0x03FFFFFF) << 2;
1219 if ( brDelta & 0x08000000 )
1220 brDelta |= 0xF0000000;
1221 uint64_t targetAddr = callSiteAddr + (int64_t)brDelta;
1222 if ( targetAddr != stubAddr ) {
1223 _diagnostics.warning("stub target mismatch");
1224 return false;
1225 }
1226 // ignore branch if not to a known stub
1227 const auto& pos = _stubAddrToLPAddr.find((pint_t)targetAddr);
1228 if ( pos == _stubAddrToLPAddr.end() )
1229 return false;
1230 // ignore branch if lazy pointer is not known (could be resolver based)
1231 uint64_t lpAddr = pos->second;
1232 const auto& pos2 = _lpAddrToTargetAddr.find((pint_t)lpAddr);
1233 if ( pos2 == _lpAddrToTargetAddr.end() )
1234 return false;
1235 uint64_t finalTargetAddr = pos2->second;
1236 int64_t deltaToFinalTarget = finalTargetAddr - callSiteAddr;
1237 // if final target within range, change to branch there directly
1238 if ( (deltaToFinalTarget > -b128MegLimit) && (deltaToFinalTarget < b128MegLimit) ) {
1239 instruction= (instruction & 0xFC000000) | ((deltaToFinalTarget >> 2) & 0x03FFFFFF);
1240 _branchesDirectCount++;
1241 return true;
1242 }
1243 // find closest branch island pool between instruction and target and get island
1244 const auto& pos3 = _targetAddrToName.find((pint_t)finalTargetAddr);
1245 if ( pos3 == _targetAddrToName.end() )
1246 return false;
1247 const char* targetName = pos3->second;
1248 if ( finalTargetAddr > callSiteAddr ) {
1249 // target is after branch so find first pool after branch
1250 for ( BranchPoolDylib<P>* pool : branchIslandPools ) {
1251 if ( (pool->addr() > callSiteAddr) && (pool->addr() < finalTargetAddr) ) {
1252 uint64_t brIslandAddr = pool->getForwardBranch(finalTargetAddr, targetName, branchIslandPools);
1253 if ( brIslandAddr == 0 ) {
1254 // branch island pool full
1255 _diagnostics.warning("pool full. Can't optimizer branch to %s from 0x%llX in %s\n", targetName, callSiteAddr, _installName);
1256 break;
1257 }
1258 int64_t deltaToTarget = brIslandAddr - callSiteAddr;
1259 instruction = (instruction & 0xFC000000) | ((deltaToTarget >> 2) & 0x03FFFFFF);
1260 _branchesIslandCount++;
1261 return true;
1262 }
1263 }
1264 }
1265 else {
1266 // target is before branch so find closest pool before branch
1267 for (size_t j = branchIslandPools.size(); j > 0; --j) {
1268 BranchPoolDylib<P>* pool = branchIslandPools[j-1];
1269 if ( (pool->addr() < callSiteAddr) && (pool->addr() > finalTargetAddr) ) {
1270 uint64_t brIslandAddr = pool->getBackBranch(finalTargetAddr, targetName, branchIslandPools);
1271 if ( brIslandAddr == 0 ) {
1272 // branch island pool full
1273 _diagnostics.warning("pool full. Can't optimizer branch to %s from 0x%llX in %s\n", targetName, callSiteAddr, _installName);
1274 break;
1275 }
1276 int64_t deltaToTarget = brIslandAddr - callSiteAddr;
1277 instruction = (instruction & 0xFC000000) | ((deltaToTarget >> 2) & 0x03FFFFFF);
1278 _branchesIslandCount++;
1279 return true;
1280 }
1281 }
1282 }
1283 return false;
1284 });
1285 if (_diagnostics.hasError())
1286 return;
1287 }
1288
1289
1290 template <typename P>
1291 void StubOptimizer<P>::optimizeCallSites(std::vector<BranchPoolDylib<P>*>& branchIslandPools)
1292 {
1293 if ( _textSection == NULL )
1294 return;
1295 if ( _stubSection == NULL )
1296 return;
1297
1298
1299 switch ( _mh->cputype() ) {
1300 case CPU_TYPE_ARM64:
1301 case CPU_TYPE_ARM64_32:
1302 optimizeArm64CallSites(branchIslandPools);
1303 if ( verbose ) {
1304 _diagnostics.verbose("%5u branches in __text, %5u changed to direct branches, %5u changed to use islands for %s\n",
1305 _branchesCount, _branchesDirectCount, _branchesIslandCount, _installName);
1306 }
1307 break;
1308 case CPU_TYPE_ARM:
1309 optimizeArmCallSites();
1310 optimizeArmStubs();
1311 if ( verbose ) {
1312 _diagnostics.verbose("%3u of %3u stubs optimized. %5u branches in __text, %5u changed to direct branches for %s\n",
1313 _stubOptimizedCount, _stubCount, _branchesCount, _branchesDirectCount, _installName);
1314 }
1315 break;
1316 }
1317 }
1318
1319 template <typename P>
1320 void bypassStubs(DyldSharedCache* cache, const std::string& archName, const std::vector<uint64_t>& branchPoolStartAddrs,
1321 uint64_t branchPoolsLinkEditStartAddr, uint64_t branchPoolsLinkEditStartFileOffset,
1322 const char* const neverStubEliminateDylibs[], Diagnostics& diags)
1323 {
1324 diags.verbose("Stub elimination optimization:\n");
1325
1326 // construct a StubOptimizer for each image
1327 __block std::vector<StubOptimizer<P>*> optimizers;
1328 cache->forEachImage(^(const mach_header* mh, const char* installName) {
1329 optimizers.push_back(new StubOptimizer<P>(cache, (macho_header<P>*)mh, diags));
1330 });
1331
1332 // construct a BranchPoolDylib for each pool
1333 std::vector<BranchPoolDylib<P>*> pools;
1334
1335 if ( startsWith(archName, "arm64") ) {
1336 // Find hole at end of linkedit region for branch pool linkedits
1337 __block uint64_t textRegionStartAddr = 0;
1338 __block uint64_t linkEditRegionStartAddr = 0;
1339 __block uint64_t linkEditRegionEndAddr = 0;
1340 __block uint64_t linkEditRegionStartCacheOffset = 0;
1341 cache->forEachRegion(^(const void* content, uint64_t vmAddr, uint64_t size, uint32_t permissions) {
1342 if ( permissions == (PROT_READ|PROT_EXEC) ) {
1343 textRegionStartAddr = vmAddr;
1344 }
1345 else if ( permissions == PROT_READ ) {
1346 linkEditRegionStartAddr = vmAddr;
1347 linkEditRegionEndAddr = vmAddr + size;
1348 linkEditRegionStartCacheOffset = (char*)content - (char*)cache;
1349 }
1350 });
1351 __block uint64_t lastLinkEditRegionUsedOffset = 0;
1352 cache->forEachImage(^(const mach_header* mh, const char* installName) {
1353 ((dyld3::MachOFile*)mh)->forEachSegment(^(const dyld3::MachOFile::SegmentInfo& info, bool &stop) {
1354 if ( strcmp(info.segName, "__LINKEDIT") == 0 ) {
1355 if ( info.fileOffset >= lastLinkEditRegionUsedOffset )
1356 lastLinkEditRegionUsedOffset = info.fileOffset + info.vmSize;
1357 }
1358 });
1359 });
1360 uint64_t allPoolsLinkEditStartAddr = branchPoolsLinkEditStartAddr;
1361 if ( !branchPoolStartAddrs.empty() ) {
1362 uint64_t poolLinkEditStartAddr = allPoolsLinkEditStartAddr;
1363 uint64_t poolLinkEditFileOffset = branchPoolsLinkEditStartFileOffset;
1364 const uint64_t poolSize = branchPoolLinkEditSize("arm64");
1365 for (uint64_t poolAddr : branchPoolStartAddrs) {
1366 pools.push_back(new BranchPoolDylib<P>(cache, poolAddr, textRegionStartAddr, poolLinkEditStartAddr, poolLinkEditFileOffset, diags));
1367 poolLinkEditStartAddr += poolSize;
1368 poolLinkEditFileOffset += poolSize;
1369 }
1370 }
1371 }
1372
1373 // build set of functions to never stub-eliminate because tools may need to override them
1374 std::unordered_set<std::string> neverStubEliminate;
1375 for (const char** p=sNeverStubEliminateSymbols; *p != nullptr; ++p) {
1376 neverStubEliminate.insert(*p);
1377 }
1378 for (const char* const* d=neverStubEliminateDylibs; *d != nullptr; ++d) {
1379 for (StubOptimizer<P>* op : optimizers) {
1380 if ( strcmp(op->installName(), *d) == 0 ) {
1381 // add all exports
1382 const uint8_t* exportsStart = op->exportsTrie();
1383 const uint8_t* exportsEnd = exportsStart + op->exportsTrieSize();
1384 std::vector<ExportInfoTrie::Entry> exports;
1385 if ( !ExportInfoTrie::parseTrie(exportsStart, exportsEnd, exports) ) {
1386 diags.error("malformed exports trie in %s", *d);
1387 return;
1388 }
1389 for(const ExportInfoTrie::Entry& entry : exports) {
1390 neverStubEliminate.insert(entry.name);
1391 }
1392 }
1393 }
1394 }
1395
1396 // build maps of stubs-to-lp and lp-to-target
1397 for (StubOptimizer<P>* op : optimizers)
1398 op->buildStubMap(neverStubEliminate);
1399
1400 // optimize call sites to by-pass stubs or jump through island
1401 for (StubOptimizer<P>* op : optimizers)
1402 op->optimizeCallSites(pools);
1403
1404 // final fix ups in branch pools
1405 for (BranchPoolDylib<P>* pool : pools) {
1406 pool->finalizeLoadCommands();
1407 pool->printStats();
1408 }
1409
1410 // write total optimization info
1411 uint32_t callSiteCount = 0;
1412 uint32_t callSiteDirectOptCount = 0;
1413 uint32_t callSiteOneHopOptCount = 0;
1414 for (StubOptimizer<P>* op : optimizers) {
1415 callSiteCount += op->_branchesCount;
1416 callSiteDirectOptCount += op->_branchesDirectCount;
1417 callSiteOneHopOptCount += op->_branchesIslandCount;
1418 }
1419 diags.verbose(" cache contains %u call sites of which %u were direct bound and %u were bound through islands\n", callSiteCount, callSiteDirectOptCount, callSiteOneHopOptCount);
1420
1421 // clean up
1422 for (StubOptimizer<P>* op : optimizers)
1423 delete op;
1424 for (BranchPoolDylib<P>* p : pools)
1425 delete p;
1426
1427 }
1428
1429 void CacheBuilder::optimizeAwayStubs(const std::vector<uint64_t>& branchPoolStartAddrs, uint64_t branchPoolsLinkEditStartAddr)
1430 {
1431 DyldSharedCache* dyldCache = (DyldSharedCache*)_readExecuteRegion.buffer;
1432 uint64_t branchPoolsLinkEditStartFileOffset = _readOnlyRegion.cacheFileOffset + branchPoolsLinkEditStartAddr - _readOnlyRegion.unslidLoadAddress;
1433 std::string archName = dyldCache->archName();
1434 #if SUPPORT_ARCH_arm64_32
1435 if ( startsWith(archName, "arm64_32") )
1436 bypassStubs<Pointer32<LittleEndian> >(dyldCache, archName, branchPoolStartAddrs, branchPoolsLinkEditStartAddr, branchPoolsLinkEditStartFileOffset, _s_neverStubEliminate, _diagnostics);
1437 else
1438 #endif
1439 if ( startsWith(archName, "arm64") )
1440 bypassStubs<Pointer64<LittleEndian> >(dyldCache, archName, branchPoolStartAddrs, branchPoolsLinkEditStartAddr, branchPoolsLinkEditStartFileOffset, _s_neverStubEliminate, _diagnostics);
1441 else if ( archName == "armv7k" )
1442 bypassStubs<Pointer32<LittleEndian>>(dyldCache, archName, branchPoolStartAddrs, branchPoolsLinkEditStartAddr, branchPoolsLinkEditStartFileOffset, _s_neverStubEliminate, _diagnostics);
1443 // no stub optimization done for other arches
1444 }
1445
1446
1447 /*
1448 template <typename P>
1449 void StubOptimizer<P>::optimizeStubs(std::unordered_map<uint64_t,std::vector<uint64_t>>& targetToBranchIslands)
1450 {
1451 for (const auto& stubEntry : _stubAddrToLPAddr) {
1452 pint_t stubVMAddr = stubEntry.first;
1453 pint_t lpVMAddr = stubEntry.second;
1454 const auto& pos = _lpAddrToTargetAddr.find(lpVMAddr);
1455 if ( pos == _lpAddrToTargetAddr.end() )
1456 continue;
1457 pint_t targetVMAddr = pos->second;
1458 int64_t delta = targetVMAddr - stubVMAddr;
1459 if ( (delta > -b128MegLimit) && (delta < b128MegLimit) ) {
1460 // target within reach, change stub to direct branch
1461 uint32_t* stubInstructions = (uint32_t*)((uint8_t*)_cacheBuffer + _textSegCacheOffset + stubVMAddr -_textSegStartAddr);
1462 uint32_t stubInstr1 = E::get32(stubInstructions[0]);
1463 if ( (stubInstr1 & 0x9F00001F) != 0x90000010 ) {
1464 warning("first instruction of stub (0x%08X) is no longer ADRP for stub at addr 0x%0X in %s\n",
1465 stubInstr1, stubVMAddr, _installName);
1466 continue;
1467 }
1468 uint32_t directBranchInstr = 0x14000000 + ((delta/4) & 0x03FFFFFF);
1469 E::set32(stubInstructions[0], directBranchInstr);
1470 uint32_t brkInstr = 0xD4200000;
1471 E::set32(stubInstructions[1], brkInstr);
1472 E::set32(stubInstructions[2], brkInstr);
1473 _stubOptimizedCount++;
1474 targetToBranchIslands[targetVMAddr].push_back(stubVMAddr);
1475 }
1476 }
1477 verboseLog("%3u of %3u stubs optimized for %s\n", _stubOptimizedCount, _stubCount, _installName);
1478 }
1479
1480
1481 template <typename P>
1482 void StubOptimizer<P>::bypassStubs(std::unordered_map<uint64_t,std::vector<uint64_t>>& targetToBranchIslands)
1483 {
1484 if ( _textSection == NULL )
1485 return;
1486
1487 // scan __text section looking for B(L) instructions that branch to a stub
1488 unsigned instructionCount = (unsigned)(_textSection->size() / 4);
1489 uint32_t* instructions = (uint32_t*)((uint8_t*)_cacheBuffer + _textSegCacheOffset + _textSection->addr() -_textSegStartAddr);
1490 for (unsigned i=0; i < instructionCount; ++i) {
1491 uint32_t instr = E::get32(instructions[i]);
1492 // skip all but BL or B
1493 if ( (instr & 0x7C000000) != 0x14000000 )
1494 continue;
1495 // compute target of branch instruction
1496 int32_t brDelta = (instr & 0x03FFFFFF) << 2;
1497 if ( brDelta & 0x08000000 )
1498 brDelta |= 0xF0000000;
1499 uint64_t branchAddr = _textSection->addr() + i*4;
1500 uint64_t targetAddr = branchAddr + (int64_t)brDelta;
1501 // ignore branch if not to a known stub
1502 const auto& pos = _stubAddrToLPAddr.find(targetAddr);
1503 if ( pos == _stubAddrToLPAddr.end() )
1504 continue;
1505 _branchesCount++;
1506 // ignore branch if lazy pointer is not known (could be resolver based)
1507 const auto& pos2 = _lpAddrToTargetAddr.find(pos->second);
1508 if ( pos2 == _lpAddrToTargetAddr.end() )
1509 continue;
1510 uint64_t finalTargetAddr = pos2->second;
1511 int64_t deltaToFinalTarget = finalTargetAddr - branchAddr;
1512 // if final target within range, change to branch there directly
1513 if ( (deltaToFinalTarget > -b128MegLimit) && (deltaToFinalTarget < b128MegLimit) ) {
1514 uint32_t newInstr = (instr & 0xFC000000) | ((deltaToFinalTarget >> 2) & 0x03FFFFFF);
1515 E::set32(instructions[i], newInstr);
1516 _branchesDirectCount++;
1517 continue;
1518 }
1519 // see if there is an existing branch island in range that can be used
1520 std::vector<uint64_t>& existingBranchIslands = targetToBranchIslands[finalTargetAddr];
1521 for (uint64_t branchIslandAddr : existingBranchIslands) {
1522 int64_t deltaToBranchIsland = branchIslandAddr - branchAddr;
1523 // if final target within range, change to branch deltaToBranchIsland directly
1524 if ( (deltaToBranchIsland > -b128MegLimit) && (deltaToFinalTarget < b128MegLimit) ) {
1525 uint32_t newInstr = (instr & 0xFC000000) | ((deltaToBranchIsland >> 2) & 0x03FFFFFF);
1526 E::set32(instructions[i], newInstr);
1527 _branchesIslandCount++;
1528 break;
1529 }
1530 }
1531 }
1532 if ( verbose ) {
1533 verboseLog("%5u branches in __text, %5u changed to direct branches, %5u changed to indirect for %s\n",
1534 _branchesCount, _branchesDirectCount, _branchesIslandCount, _installName);
1535 }
1536 }
1537 */
1538