]> git.saurik.com Git - apple/dyld.git/blob - interlinked-dylibs/OptimizerBranches.cpp
dyld-421.2.tar.gz
[apple/dyld.git] / interlinked-dylibs / OptimizerBranches.cpp
1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*-
2 *
3 * Copyright (c) 2015 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 #include "OptimizerBranches.h"
26 #include "Trie.hpp"
27 #include "MachOFileAbstraction.hpp"
28 #include "Logging.h"
29
30 #include "dyld_cache_config.h"
31
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/mman.h>
35 #include <limits.h>
36 #include <stdarg.h>
37 #include <stdio.h>
38 #include <unistd.h>
39
40 #include <string>
41 #include <unordered_map>
42 #include <unordered_set>
43
44 #include <CommonCrypto/CommonDigest.h>
45
46
47 static const bool verbose = false;
48
49 // These are functions that are interposed by Instruments.app or ASan
50 static const char* sNeverStubEliminateSymbols[] = {
51 "___bzero",
52 "___cxa_atexit",
53 "___cxa_throw",
54 "__longjmp",
55 "__objc_autoreleasePoolPop",
56 "_accept",
57 "_access",
58 "_asctime",
59 "_asctime_r",
60 "_asprintf",
61 "_atoi",
62 "_atol",
63 "_atoll",
64 "_calloc",
65 "_chmod",
66 "_chown",
67 "_close",
68 "_confstr",
69 "_ctime",
70 "_ctime_r",
71 "_dispatch_after",
72 "_dispatch_after_f",
73 "_dispatch_async",
74 "_dispatch_async_f",
75 "_dispatch_barrier_async_f",
76 "_dispatch_group_async",
77 "_dispatch_group_async_f",
78 "_dispatch_source_set_cancel_handler",
79 "_dispatch_source_set_event_handler",
80 "_dispatch_sync_f",
81 "_dlclose",
82 "_dlopen",
83 "_dup",
84 "_dup2",
85 "_endgrent",
86 "_endpwent",
87 "_ether_aton",
88 "_ether_hostton",
89 "_ether_line",
90 "_ether_ntoa",
91 "_ether_ntohost",
92 "_fchmod",
93 "_fchown",
94 "_fclose",
95 "_fdopen",
96 "_fflush",
97 "_fopen",
98 "_fork",
99 "_fprintf",
100 "_free",
101 "_freopen",
102 "_frexp",
103 "_frexpf",
104 "_frexpl",
105 "_fscanf",
106 "_fstat",
107 "_fstatfs",
108 "_fstatfs64",
109 "_fsync",
110 "_ftime",
111 "_getaddrinfo",
112 "_getattrlist",
113 "_getcwd",
114 "_getgrent",
115 "_getgrgid",
116 "_getgrgid_r",
117 "_getgrnam",
118 "_getgrnam_r",
119 "_getgroups",
120 "_gethostbyaddr",
121 "_gethostbyname",
122 "_gethostbyname2",
123 "_gethostent",
124 "_getifaddrs",
125 "_getitimer",
126 "_getnameinfo",
127 "_getpass",
128 "_getpeername",
129 "_getpwent",
130 "_getpwnam",
131 "_getpwnam_r",
132 "_getpwuid",
133 "_getpwuid_r",
134 "_getsockname",
135 "_getsockopt",
136 "_gmtime",
137 "_gmtime_r",
138 "_if_indextoname",
139 "_if_nametoindex",
140 "_index",
141 "_inet_aton",
142 "_inet_ntop",
143 "_inet_pton",
144 "_initgroups",
145 "_ioctl",
146 "_lchown",
147 "_lgamma",
148 "_lgammaf",
149 "_lgammal",
150 "_link",
151 "_listxattr",
152 "_localtime",
153 "_localtime_r",
154 "_longjmp",
155 "_lseek",
156 "_lstat",
157 "_malloc",
158 "_malloc_create_zone",
159 "_malloc_default_purgeable_zone",
160 "_malloc_default_zone",
161 "_malloc_good_size",
162 "_malloc_make_nonpurgeable",
163 "_malloc_make_purgeable",
164 "_malloc_set_zone_name",
165 "_mbsnrtowcs",
166 "_mbsrtowcs",
167 "_mbstowcs",
168 "_memchr",
169 "_memcmp",
170 "_memcpy",
171 "_memmove",
172 "_memset",
173 "_mktime",
174 "_mlock",
175 "_mlockall",
176 "_modf",
177 "_modff",
178 "_modfl",
179 "_munlock",
180 "_munlockall",
181 "_objc_autoreleasePoolPop",
182 "_objc_setProperty",
183 "_objc_setProperty_atomic",
184 "_objc_setProperty_atomic_copy",
185 "_objc_setProperty_nonatomic",
186 "_objc_setProperty_nonatomic_copy",
187 "_objc_storeStrong",
188 "_open",
189 "_opendir",
190 "_poll",
191 "_posix_memalign",
192 "_pread",
193 "_printf",
194 "_pthread_attr_getdetachstate",
195 "_pthread_attr_getguardsize",
196 "_pthread_attr_getinheritsched",
197 "_pthread_attr_getschedparam",
198 "_pthread_attr_getschedpolicy",
199 "_pthread_attr_getscope",
200 "_pthread_attr_getstack",
201 "_pthread_attr_getstacksize",
202 "_pthread_condattr_getpshared",
203 "_pthread_create",
204 "_pthread_getschedparam",
205 "_pthread_join",
206 "_pthread_mutex_lock",
207 "_pthread_mutex_unlock",
208 "_pthread_mutexattr_getprioceiling",
209 "_pthread_mutexattr_getprotocol",
210 "_pthread_mutexattr_getpshared",
211 "_pthread_mutexattr_gettype",
212 "_pthread_rwlockattr_getpshared",
213 "_pwrite",
214 "_rand_r",
215 "_read",
216 "_readdir",
217 "_readdir_r",
218 "_readv",
219 "_readv$UNIX2003",
220 "_realloc",
221 "_realpath",
222 "_recv",
223 "_recvfrom",
224 "_recvmsg",
225 "_remquo",
226 "_remquof",
227 "_remquol",
228 "_scanf",
229 "_send",
230 "_sendmsg",
231 "_sendto",
232 "_setattrlist",
233 "_setgrent",
234 "_setitimer",
235 "_setlocale",
236 "_setpwent",
237 "_shm_open",
238 "_shm_unlink",
239 "_sigaction",
240 "_sigemptyset",
241 "_sigfillset",
242 "_siglongjmp",
243 "_signal",
244 "_sigpending",
245 "_sigprocmask",
246 "_sigwait",
247 "_snprintf",
248 "_sprintf",
249 "_sscanf",
250 "_stat",
251 "_statfs",
252 "_statfs64",
253 "_strcasecmp",
254 "_strcat",
255 "_strchr",
256 "_strcmp",
257 "_strcpy",
258 "_strdup",
259 "_strerror",
260 "_strerror_r",
261 "_strlen",
262 "_strncasecmp",
263 "_strncat",
264 "_strncmp",
265 "_strncpy",
266 "_strptime",
267 "_strtoimax",
268 "_strtol",
269 "_strtoll",
270 "_strtoumax",
271 "_tempnam",
272 "_time",
273 "_times",
274 "_tmpnam",
275 "_tsearch",
276 "_unlink",
277 "_valloc",
278 "_vasprintf",
279 "_vfprintf",
280 "_vfscanf",
281 "_vprintf",
282 "_vscanf",
283 "_vsnprintf",
284 "_vsprintf",
285 "_vsscanf",
286 "_wait",
287 "_wait$UNIX2003",
288 "_wait3",
289 "_wait4",
290 "_waitid",
291 "_waitid$UNIX2003",
292 "_waitpid",
293 "_waitpid$UNIX2003",
294 "_wcslen",
295 "_wcsnrtombs",
296 "_wcsrtombs",
297 "_wcstombs",
298 "_wordexp",
299 "_write",
300 "_writev",
301 "_writev$UNIX2003",
302 // <rdar://problem/22050956> always use stubs for C++ symbols that can be overridden
303 "__ZdaPv",
304 "__ZdlPv",
305 "__Znam",
306 "__Znwm",
307
308 nullptr
309 };
310
311
312 // These are dylibs that are interposed and stubs calling into them should never be bypassed
313 static const char* sNeverStubEliminateDylibs[] = {
314 "/usr/lib/system/libdispatch.dylib",
315 nullptr
316 };
317
318
319
320 uint64_t branchPoolTextSize(ArchPair arch)
321 {
322 if ( arch.arch == CPU_TYPE_ARM64 )
323 return 0x0000C000; // 48KB
324 else
325 return 0;
326 }
327
328 uint64_t branchPoolLinkEditSize(ArchPair arch)
329 {
330 if ( arch.arch == CPU_TYPE_ARM64 )
331 return 0x00100000; // 1MB
332 else
333 return 0;
334 }
335
336
337 uint64_t branchReach(ArchPair arch)
338 {
339 if ( arch.arch == CPU_TYPE_ARM64 )
340 return 0x08000000 - (2*branchPoolTextSize(arch)); // 128MB
341 else
342 return 0;
343 }
344
345 template <typename P>
346 class BranchPoolDylib {
347 public:
348 BranchPoolDylib(ArchPair arch, void* cacheBuffer, uint64_t cacheSize, uint64_t startAddr,
349 uint64_t textRegionStartAddr, uint64_t poolLinkEditStartAddr, uint64_t poolLinkEditStartOffset);
350
351 uint64_t addr() { return _startAddr; }
352 uint64_t getForwardBranch(uint64_t finalTargetAddr, const char* name, std::vector<BranchPoolDylib<P>*>& branchIslandPools);
353 uint64_t getBackBranch(uint64_t finalTargetAddr, const char* name, std::vector<BranchPoolDylib<P>*>& branchIslandPools);
354 void finalizeLoadCommands();
355 void printStats();
356
357 private:
358 uint64_t indexToAddr(uint32_t index) { return _startAddr + _firstStubOffset + sizeof(uint32_t)*index; }
359
360 static const int64_t b128MegLimit = 0x07FFFFFF;
361
362 typedef typename P::uint_t pint_t;
363 typedef typename P::E E;
364
365 ArchPair _arch;
366 void* _cacheBuffer;
367 uint64_t _startAddr;
368 std::unordered_map<uint64_t, uint32_t> _targetToIslandIndex;
369 std::unordered_map<uint32_t, const char*> _islandIndexToName;
370 macho_symtab_command<P>* _symbolTableCmd;
371 macho_dysymtab_command<P>* _dynamicSymbolTableCmd;
372 macho_uuid_command<P>* _uuidCmd;
373 uint32_t _maxStubs;
374 uint32_t _nextIndex;
375 uint32_t _firstStubOffset;
376 uint32_t* _stubInstructions;
377 macho_nlist<P>* _symbolTable;
378 char* _nextString;
379 char* _stringPoolStart;
380 char* _stringPoolEnd;
381 };
382
383
384 template <typename P>
385 BranchPoolDylib<P>::BranchPoolDylib(ArchPair arch, void* cacheBuffer, uint64_t cacheSize, uint64_t poolStartAddr,
386 uint64_t textRegionStartAddr, uint64_t poolLinkEditStartAddr, uint64_t poolLinkEditStartOffset)
387 : _arch(arch), _cacheBuffer(cacheBuffer),_startAddr(poolStartAddr), _nextIndex(0), _firstStubOffset(0x280)
388 {
389 bool is64 = (sizeof(typename P::uint_t) == 8);
390
391 const uint64_t textSegSize = branchPoolTextSize(arch);
392 const uint64_t linkEditSegSize = branchPoolLinkEditSize(arch);
393 const unsigned stubCount = (unsigned)((textSegSize - _firstStubOffset)/4);
394 const uint32_t linkeditOffsetSymbolTable = 0;
395 const uint32_t linkeditOffsetIndirectSymbolTable = stubCount*sizeof(macho_nlist<P>);
396 const uint32_t linkeditOffsetSymbolPoolOffset = linkeditOffsetIndirectSymbolTable + stubCount*sizeof(uint32_t);
397 _maxStubs = stubCount;
398
399 // write mach_header and load commands for pseudo dylib
400 macho_header<P>* mh = (macho_header<P>*)((uint8_t*)cacheBuffer + poolStartAddr - textRegionStartAddr);
401 mh->set_magic(is64 ? MH_MAGIC_64 : MH_MAGIC);
402 mh->set_cputype(arch.arch);
403 mh->set_cpusubtype(arch.subtype);
404 mh->set_filetype(MH_DYLIB);
405 mh->set_ncmds(6);
406 mh->set_sizeofcmds(is64 ? 0x210 : 100); // FIXME: 32-bit size
407 mh->set_flags(0x80000000);
408 // LC_SEGMENT
409 macho_load_command<P>* cmd = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
410 macho_segment_command<P>* textSegCmd = (macho_segment_command<P>*)cmd;
411 textSegCmd->set_cmd(is64 ? LC_SEGMENT_64 : LC_SEGMENT);
412 textSegCmd->set_cmdsize(sizeof(macho_segment_command<P>)*2+sizeof(macho_section<P>));
413 textSegCmd->set_segname("__TEXT");
414 textSegCmd->set_vmaddr(poolStartAddr);
415 textSegCmd->set_vmsize(textSegSize);
416 textSegCmd->set_fileoff(poolStartAddr - textRegionStartAddr);
417 textSegCmd->set_filesize(branchPoolTextSize(arch));
418 textSegCmd->set_maxprot(PROT_READ|PROT_EXEC);
419 textSegCmd->set_initprot(PROT_READ|PROT_EXEC);
420 textSegCmd->set_nsects(1);
421 textSegCmd->set_flags(0);
422 macho_section<P>* stubSection = (macho_section<P>*)((uint8_t*)textSegCmd + sizeof(macho_segment_command<P>));
423 stubSection->set_sectname("__stubs");
424 stubSection->set_segname("__TEXT");
425 stubSection->set_addr(poolStartAddr + _firstStubOffset);
426 stubSection->set_size(textSegSize - _firstStubOffset);
427 stubSection->set_offset((uint32_t)(poolStartAddr + _firstStubOffset - textRegionStartAddr));
428 stubSection->set_align(2);
429 stubSection->set_reloff(0);
430 stubSection->set_nreloc(0);
431 stubSection->set_flags(S_SYMBOL_STUBS | S_ATTR_SOME_INSTRUCTIONS | S_ATTR_PURE_INSTRUCTIONS);
432 stubSection->set_reserved1(0); // start index in indirect table
433 stubSection->set_reserved2(4); // size of stubs
434 // LC_SEGMENT
435 cmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
436 macho_segment_command<P>* linkEditSegCmd = (macho_segment_command<P>*)cmd;
437 linkEditSegCmd->set_cmd(is64 ? LC_SEGMENT_64 : LC_SEGMENT);
438 linkEditSegCmd->set_cmdsize(sizeof(macho_segment_command<P>));
439 linkEditSegCmd->set_segname("__LINKEDIT");
440 linkEditSegCmd->set_vmaddr(poolLinkEditStartAddr);
441 linkEditSegCmd->set_vmsize(linkEditSegSize);
442 linkEditSegCmd->set_fileoff(poolLinkEditStartOffset);
443 linkEditSegCmd->set_filesize(linkEditSegSize);
444 linkEditSegCmd->set_maxprot(PROT_READ);
445 linkEditSegCmd->set_initprot(PROT_READ);
446 linkEditSegCmd->set_nsects(0);
447 linkEditSegCmd->set_flags(0);
448 // LC_ID_DYLIB
449 cmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
450 macho_dylib_command<P>* installNameCmd = (macho_dylib_command<P>*)cmd;
451 installNameCmd->set_cmd(LC_ID_DYLIB);
452 installNameCmd->set_cmdsize(sizeof(macho_dylib_command<P>) + 48);
453 installNameCmd->set_timestamp(2);
454 installNameCmd->set_current_version(0x10000);
455 installNameCmd->set_compatibility_version(0x10000);
456 installNameCmd->set_name_offset();
457 strcpy((char*)cmd + sizeof(macho_dylib_command<P>), "dyld_shared_cache_branch_islands");
458 // LC_SYMTAB
459 cmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
460 _symbolTableCmd = (macho_symtab_command<P>*)cmd;
461 _symbolTableCmd->set_cmd(LC_SYMTAB);
462 _symbolTableCmd->set_cmdsize(sizeof(macho_symtab_command<P>));
463 _symbolTableCmd->set_nsyms(stubCount);
464 _symbolTableCmd->set_symoff((uint32_t)(poolLinkEditStartOffset + linkeditOffsetSymbolTable));
465 _symbolTableCmd->set_stroff((uint32_t)(poolLinkEditStartOffset + linkeditOffsetSymbolPoolOffset));
466 _symbolTableCmd->set_strsize((uint32_t)(linkEditSegSize - linkeditOffsetSymbolPoolOffset));
467 // LC_DYSYMTAB
468 cmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
469 _dynamicSymbolTableCmd = (macho_dysymtab_command<P>*)cmd;
470 _dynamicSymbolTableCmd->set_cmd(LC_DYSYMTAB);
471 _dynamicSymbolTableCmd->set_cmdsize(sizeof(macho_dysymtab_command<P>));
472 _dynamicSymbolTableCmd->set_ilocalsym(0);
473 _dynamicSymbolTableCmd->set_nlocalsym(0);
474 _dynamicSymbolTableCmd->set_iextdefsym(0);
475 _dynamicSymbolTableCmd->set_nextdefsym(0);
476 _dynamicSymbolTableCmd->set_iundefsym(0);
477 _dynamicSymbolTableCmd->set_nundefsym(stubCount);
478 _dynamicSymbolTableCmd->set_tocoff(0);
479 _dynamicSymbolTableCmd->set_ntoc(0);
480 _dynamicSymbolTableCmd->set_modtaboff(0);
481 _dynamicSymbolTableCmd->set_nmodtab(0);
482 _dynamicSymbolTableCmd->set_extrefsymoff(0);
483 _dynamicSymbolTableCmd->set_nextrefsyms(0);
484 _dynamicSymbolTableCmd->set_indirectsymoff((uint32_t)(poolLinkEditStartOffset + linkeditOffsetIndirectSymbolTable));
485 _dynamicSymbolTableCmd->set_nindirectsyms(stubCount);
486 _dynamicSymbolTableCmd->set_extreloff(0);
487 _dynamicSymbolTableCmd->set_nextrel(0);
488 _dynamicSymbolTableCmd->set_locreloff(0);
489 _dynamicSymbolTableCmd->set_nlocrel(0);
490 cmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
491 // LC_UUID
492 _uuidCmd = (macho_uuid_command<P>*)cmd;
493 _uuidCmd->set_cmd(LC_UUID);
494 _uuidCmd->set_cmdsize(sizeof(macho_uuid_command<P>));
495 cmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
496
497 // write stubs section content
498 _stubInstructions = (uint32_t*)((uint8_t*)mh + _firstStubOffset);
499 for (int i=0; i < stubCount; ++i) {
500 E::set32(_stubInstructions[i], 0xD4200000);
501 }
502
503 // write linkedit content
504 uint8_t* linkeditBufferStart = (uint8_t*)cacheBuffer + poolLinkEditStartOffset;
505 // write symbol table
506 _symbolTable = (macho_nlist<P>*)(linkeditBufferStart);
507 for (int i=0; i < stubCount; ++i) {
508 _symbolTable[i].set_n_strx(1);
509 _symbolTable[i].set_n_type(N_UNDF | N_EXT);
510 _symbolTable[i].set_n_sect(0);
511 _symbolTable[i].set_n_desc(0);
512 _symbolTable[i].set_n_value(0);
513 }
514 // write indirect symbol table
515 uint32_t* indirectSymboTable = (uint32_t*)(linkeditBufferStart + linkeditOffsetIndirectSymbolTable);
516 for (int i=0; i < stubCount; ++i) {
517 P::E::set32(indirectSymboTable[i], i);
518 }
519 // write string pool
520 _stringPoolStart = (char*)(linkeditBufferStart + linkeditOffsetSymbolPoolOffset);
521 _stringPoolEnd = _stringPoolStart + linkEditSegSize - linkeditOffsetSymbolPoolOffset;
522 _stringPoolStart[0] = '\0';
523 strcpy(&_stringPoolStart[1], "<unused>");
524 _nextString = &_stringPoolStart[10];
525 }
526
527
528 template <typename P>
529 void BranchPoolDylib<P>::finalizeLoadCommands()
530 {
531 _symbolTableCmd->set_nsyms(_nextIndex);
532 _symbolTableCmd->set_strsize((uint32_t)(_nextString - _stringPoolStart));
533 _dynamicSymbolTableCmd->set_nundefsym(_nextIndex);
534
535 uint8_t digest[CC_MD5_DIGEST_LENGTH];
536 CC_MD5(_stubInstructions, _maxStubs*sizeof(uint64_t), digest);
537 _uuidCmd->set_uuid(digest);
538
539 if ( verbose ) {
540 verboseLog("branch islands in image at 0x%0llX:", _startAddr);
541 for (uint32_t i=0; i < _nextIndex; ++i) {
542 verboseLog(" 0x%llX %s", indexToAddr(i), _islandIndexToName[i]);
543 }
544 }
545 }
546
547 template <typename P>
548 uint64_t BranchPoolDylib<P>::getForwardBranch(uint64_t finalTargetAddr, const char* name, std::vector<BranchPoolDylib<P>*>& branchIslandPools)
549 {
550 // check if we can re-used existing branch island
551 const auto& pos = _targetToIslandIndex.find(finalTargetAddr);
552 if ( pos != _targetToIslandIndex.end() )
553 return indexToAddr(pos->second);
554
555 // skip if instruction pool is full
556 if ( _nextIndex >= _maxStubs )
557 return 0;
558
559 // skip if string pool is full
560 if ( (_nextString + strlen(name)+1) >= _stringPoolEnd )
561 return 0;
562
563 uint64_t branchIslandTargetAddr = finalTargetAddr;
564 // if final target is too far, we need to use branch island in next pool
565 if ( (finalTargetAddr - _startAddr) > b128MegLimit ) {
566 BranchPoolDylib<P>* nextPool = nullptr;
567 for (size_t i=0; i < branchIslandPools.size()-1; ++i) {
568 if ( branchIslandPools[i] == this ) {
569 nextPool = branchIslandPools[i+1];
570 break;
571 }
572 }
573
574 if (nextPool == nullptr) {
575 warning("BranchPoolDylib<P>::getForwardBranch: nextPool unreachable");
576 return 0;
577 }
578
579 branchIslandTargetAddr = nextPool->getForwardBranch(finalTargetAddr, name, branchIslandPools);
580 if ( branchIslandTargetAddr == 0 )
581 return 0; // next pool is full
582 }
583
584 // write branch instruction in stubs section
585 uint32_t index = _nextIndex++;
586 int64_t branchDelta = branchIslandTargetAddr - indexToAddr(index);
587 uint32_t branchInstr = 0x14000000 + ((branchDelta/4) & 0x03FFFFFF);
588 E::set32(_stubInstructions[index], branchInstr);
589
590 // update symbol table
591 _symbolTable[index].set_n_strx((uint32_t)(_nextString - _stringPoolStart));
592 strcpy(_nextString, name);
593 _nextString += (strlen(name) +1);
594
595 // record island
596 _targetToIslandIndex[finalTargetAddr] = index;
597 _islandIndexToName[index] = name;
598 return indexToAddr(index);
599 }
600
601 template <typename P>
602 uint64_t BranchPoolDylib<P>::getBackBranch(uint64_t finalTargetAddr, const char* name, std::vector<BranchPoolDylib<P>*>& branchIslandPools)
603 {
604 // check if we can re-used existing branch island
605 const auto& pos = _targetToIslandIndex.find(finalTargetAddr);
606 if ( pos != _targetToIslandIndex.end() )
607 return indexToAddr(pos->second);
608
609 // skip if instruction pool is full
610 if ( _nextIndex >= _maxStubs )
611 return 0;
612
613 // skip if string pool is full
614 if ( (_nextString + strlen(name)+1) >= _stringPoolEnd )
615 return 0;
616
617 uint64_t branchIslandTargetAddr = finalTargetAddr;
618 // if final target is too far, we need to use branch island in next pool
619 if ( (indexToAddr(_nextIndex) - finalTargetAddr) > b128MegLimit ) {
620 BranchPoolDylib<P>* nextPool = nullptr;
621 for (long i=branchIslandPools.size()-1; i > 0; --i) {
622 if ( branchIslandPools[i] == this ) {
623 nextPool = branchIslandPools[i-1];
624 break;
625 }
626 }
627
628 if (nextPool == nullptr) {
629 warning("BranchPoolDylib<P>::getBackBranch: nextPool unreachable");
630 return 0;
631 }
632
633 branchIslandTargetAddr = nextPool->getBackBranch(finalTargetAddr, name, branchIslandPools);
634 if ( branchIslandTargetAddr == 0 )
635 return 0; // next pool is full
636 }
637
638 // write branch instruction in stubs section
639 uint32_t index = _nextIndex++;
640 int64_t branchDelta = branchIslandTargetAddr - indexToAddr(index);
641 uint32_t branchInstr = 0x14000000 + ((branchDelta/4) & 0x03FFFFFF);
642 E::set32(_stubInstructions[index], branchInstr);
643
644 // update symbol table
645 _symbolTable[index].set_n_strx((uint32_t)(_nextString - _stringPoolStart));
646 strcpy(_nextString, name);
647 _nextString += (strlen(name) +1);
648
649 // record island
650 _targetToIslandIndex[finalTargetAddr] = index;
651 _islandIndexToName[index] = name;
652 return indexToAddr(index);
653 }
654
655 template <typename P>
656 void BranchPoolDylib<P>::printStats()
657 {
658 verboseLog(" island pool at 0x%0llX has %u stubs and stringPool size=%lu", _startAddr, _nextIndex, _nextString-_stringPoolStart);
659 }
660
661
662
663 template <typename P>
664 class StubOptimizer {
665 public:
666 StubOptimizer(void* cacheBuffer, macho_header<P>* mh);
667 void buildStubMap(const std::unordered_set<std::string>& neverStubEliminate);
668 void optimizeStubs(std::unordered_map<uint64_t,std::vector<uint64_t>>& targetToBranchIslands);
669 void bypassStubs(std::unordered_map<uint64_t,std::vector<uint64_t>>& targetToBranchIslands);
670 void optimizeCallSites(std::vector<BranchPoolDylib<P>*>& branchIslandPools);
671 const char* installName() { return _installName; }
672 const uint8_t* exportsTrie() { return (uint8_t*)_cacheBuffer + _dyldInfo->export_off(); }
673 uint32_t exportsTrieSize() { return _dyldInfo->export_size(); }
674
675 uint32_t _stubCount = 0;
676 uint32_t _stubOptimizedCount = 0;
677 uint32_t _branchesCount = 0;
678 uint32_t _branchesModifiedCount = 0;
679 uint32_t _branchesDirectCount = 0;
680 uint32_t _branchesIslandCount = 0;
681
682 private:
683
684 typedef std::function<bool(uint8_t callSiteKind, uint64_t callSiteAddr, uint64_t stubAddr, uint32_t& instruction)> CallSiteHandler;
685 typedef typename P::uint_t pint_t;
686 typedef typename P::E E;
687
688 void forEachCallSiteToAStub(CallSiteHandler);
689 void optimizeArm64CallSites(std::vector<BranchPoolDylib<P>*>& branchIslandPools);
690 void optimizeArmCallSites();
691 void optimizeArmStubs();
692 uint64_t lazyPointerAddrFromArm64Stub(const uint8_t* stubInstructions, uint64_t stubVMAddr);
693 uint32_t lazyPointerAddrFromArmStub(const uint8_t* stubInstructions, uint32_t stubVMAddr);
694 int32_t getDisplacementFromThumbBranch(uint32_t instruction, uint32_t instrAddr);
695 uint32_t setDisplacementInThumbBranch(uint32_t instruction, uint32_t instrAddr,
696 int32_t displacement, bool targetIsThumb);
697
698
699 struct AddressAndName { pint_t targetVMAddr; const char* targetName; };
700 typedef std::unordered_map<pint_t, AddressAndName> StubVMAddrToTarget;
701
702 static const int64_t b128MegLimit = 0x07FFFFFF;
703 static const int64_t b16MegLimit = 0x00FFFFFF;
704
705
706 macho_header<P>* _mh;
707 void* _cacheBuffer;
708 uint32_t _linkeditSize = 0;
709 uint32_t _linkeditCacheOffset = 0;
710 uint64_t _linkeditAddr = 0;
711 const uint8_t* _linkeditBias = nullptr;
712 const char* _installName = nullptr;
713 const macho_symtab_command<P>* _symTabCmd = nullptr;
714 const macho_dysymtab_command<P>* _dynSymTabCmd = nullptr;
715 const macho_dyld_info_command<P>* _dyldInfo = nullptr;
716 macho_linkedit_data_command<P>* _splitSegInfoCmd = nullptr;
717 const macho_section<P>* _textSection = nullptr;
718 const macho_section<P>* _stubSection = nullptr;
719 uint32_t _textSectionIndex = 0;
720 uint32_t _stubSectionIndex = 0;
721 pint_t _textSegStartAddr = 0;
722 uint32_t _textSegCacheOffset = 0;
723 std::vector<macho_segment_command<P>*> _segCmds;
724 std::unordered_map<pint_t, pint_t> _stubAddrToLPAddr;
725 std::unordered_map<pint_t, pint_t> _lpAddrToTargetAddr;
726 std::unordered_map<pint_t, const char*> _targetAddrToName;
727 };
728
729
730
731 template <typename P>
732 StubOptimizer<P>::StubOptimizer(void* cacheBuffer, macho_header<P>* mh)
733 : _mh(mh), _cacheBuffer(cacheBuffer)
734 {
735 _linkeditBias = (uint8_t*)cacheBuffer;
736 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
737 const uint32_t cmd_count = mh->ncmds();
738 macho_segment_command<P>* segCmd;
739 uint32_t sectionIndex = 0;
740 const macho_load_command<P>* cmd = cmds;
741 for (uint32_t i = 0; i < cmd_count; ++i) {
742 switch (cmd->cmd()) {
743 case LC_ID_DYLIB:
744 _installName = ((macho_dylib_command<P>*)cmd)->name();
745 break;
746 case LC_SYMTAB:
747 _symTabCmd = (macho_symtab_command<P>*)cmd;
748 break;
749 case LC_DYSYMTAB:
750 _dynSymTabCmd = (macho_dysymtab_command<P>*)cmd;
751 break;
752 case LC_SEGMENT_SPLIT_INFO:
753 _splitSegInfoCmd = (macho_linkedit_data_command<P>*)cmd;
754 break;
755 case LC_DYLD_INFO:
756 case LC_DYLD_INFO_ONLY:
757 _dyldInfo = (macho_dyld_info_command<P>*)cmd;
758 break;
759 case macho_segment_command<P>::CMD:
760 segCmd =( macho_segment_command<P>*)cmd;
761 _segCmds.push_back(segCmd);
762 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
763 _linkeditSize = (uint32_t)segCmd->vmsize();
764 _linkeditCacheOffset = (uint32_t)segCmd->fileoff();
765 _linkeditAddr = segCmd->vmaddr();
766 }
767 else if ( strcmp(segCmd->segname(), "__TEXT") == 0 ) {
768 _textSegStartAddr = (pint_t)segCmd->vmaddr();
769 _textSegCacheOffset = (uint32_t)((uint8_t*)mh - (uint8_t*)cacheBuffer);
770 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
771 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
772 for (const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
773 ++sectionIndex;
774 if ( strcmp(sect->sectname(), "__text") == 0 ) {
775 _textSection = sect;
776 _textSectionIndex = sectionIndex;
777 }
778 else if ( ((sect->flags() & SECTION_TYPE) == S_SYMBOL_STUBS) && (sect->size() != 0) ) {
779 _stubSection = sect;
780 _stubSectionIndex = sectionIndex;
781 }
782 }
783 }
784 break;
785 }
786 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
787 }
788 }
789
790
791
792 template <typename P>
793 uint32_t StubOptimizer<P>::lazyPointerAddrFromArmStub(const uint8_t* stubInstructions, uint32_t stubVMAddr)
794 {
795 uint32_t stubInstr1 = E::get32(*(uint32_t*)stubInstructions);
796 uint32_t stubInstr2 = E::get32(*(uint32_t*)(stubInstructions+4));
797 uint32_t stubInstr3 = E::get32(*(uint32_t*)(stubInstructions+8));
798 int32_t stubData = E::get32(*(uint32_t*)(stubInstructions+12));
799 if ( stubInstr1 != 0xe59fc004 ) {
800 warning("first instruction of stub (0x%08X) is not 'ldr ip, pc + 12' for stub at addr 0x%0llX in %s",
801 stubInstr1, (uint64_t)stubVMAddr, _installName);
802 return 0;
803 }
804 if ( stubInstr2 != 0xe08fc00c ) {
805 warning("second instruction of stub (0x%08X) is not 'add ip, pc, ip' for stub at addr 0x%0llX in %s",
806 stubInstr1, (uint64_t)stubVMAddr, _installName);
807 return 0;
808 }
809 if ( stubInstr3 != 0xe59cf000 ) {
810 warning("third instruction of stub (0x%08X) is not 'ldr pc, [ip]' for stub at addr 0x%0llX in %s",
811 stubInstr1, (uint64_t)stubVMAddr, _installName);
812 return 0;
813 }
814 return stubVMAddr + 12 + stubData;
815 }
816
817
818 template <typename P>
819 uint64_t StubOptimizer<P>::lazyPointerAddrFromArm64Stub(const uint8_t* stubInstructions, uint64_t stubVMAddr)
820 {
821 uint32_t stubInstr1 = E::get32(*(uint32_t*)stubInstructions);
822 if ( (stubInstr1 & 0x9F00001F) != 0x90000010 ) {
823 warning("first instruction of stub (0x%08X) is not ADRP for stub at addr 0x%0llX in %s",
824 stubInstr1, (uint64_t)stubVMAddr, _installName);
825 return 0;
826 }
827 int32_t adrpValue = ((stubInstr1 & 0x00FFFFE0) >> 3) | ((stubInstr1 & 0x60000000) >> 29);
828 if ( stubInstr1 & 0x00800000 )
829 adrpValue |= 0xFFF00000;
830 uint32_t stubInstr2 = E::get32(*(uint32_t*)(stubInstructions + 4));
831 if ( (stubInstr2 & 0xFFC003FF) != 0xF9400210 ) {
832 warning("second instruction of stub (0x%08X) is not LDR for stub at addr 0x%0llX in %s",
833 stubInstr2, (uint64_t)stubVMAddr, _installName);
834 return 0;
835 }
836 uint32_t ldrValue = ((stubInstr2 >> 10) & 0x00000FFF);
837 return (stubVMAddr & (-4096)) + adrpValue*4096 + ldrValue*8;
838 }
839
840
841
842 template <typename P>
843 void StubOptimizer<P>::buildStubMap(const std::unordered_set<std::string>& neverStubEliminate)
844 {
845 // find all stubs and lazy pointers
846 const macho_nlist<P>* symbolTable = (const macho_nlist<P>*)(((uint8_t*)_cacheBuffer) + _symTabCmd->symoff());
847 const char* symbolStrings = (char*)_cacheBuffer + _symTabCmd->stroff();
848 const uint32_t* const indirectTable = (uint32_t*)(((uint8_t*)_cacheBuffer) + _dynSymTabCmd->indirectsymoff());
849 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)_mh + sizeof(macho_header<P>));
850 const uint32_t cmd_count = _mh->ncmds();
851 const macho_load_command<P>* cmd = cmds;
852 for (uint32_t i = 0; i < cmd_count; ++i) {
853 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
854 macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
855 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
856 macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
857 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
858 if ( sect->size() == 0 )
859 continue;
860 unsigned sectionType = (sect->flags() & SECTION_TYPE);
861 const uint32_t indirectTableOffset = sect->reserved1();
862 if ( sectionType == S_SYMBOL_STUBS ) {
863 const uint32_t stubSize = sect->reserved2();
864 _stubCount = (uint32_t)(sect->size() / stubSize);
865 pint_t stubVMAddr = (pint_t)sect->addr();
866 for (uint32_t j=0; j < _stubCount; ++j, stubVMAddr += stubSize) {
867 uint32_t symbolIndex = E::get32(indirectTable[indirectTableOffset + j]);
868 switch ( symbolIndex ) {
869 case INDIRECT_SYMBOL_ABS:
870 case INDIRECT_SYMBOL_LOCAL:
871 break;
872 default:
873 if ( symbolIndex >= _symTabCmd->nsyms() ) {
874 warning("symbol index out of range (%d of %d) for stub at addr 0x%0llX in %s",
875 symbolIndex, _symTabCmd->nsyms(), (uint64_t)stubVMAddr, _installName);
876 continue;
877 }
878 const macho_nlist<P>* sym = &symbolTable[symbolIndex];
879 uint32_t stringOffset = sym->n_strx();
880 if ( stringOffset > _symTabCmd->strsize() ) {
881 warning("symbol string offset out of range (%u of %u) for stub at addr 0x%0llX in %s",
882 stringOffset, sym->n_strx(), (uint64_t)stubVMAddr, _installName);
883 continue;
884 }
885 const char* symName = &symbolStrings[stringOffset];
886 if ( neverStubEliminate.count(symName) ) {
887 //verboseLog("not bypassing stub to %s in %s because target is interposable\n", symName, _installName);
888 continue;
889 }
890 const uint8_t* stubInstrs = (uint8_t*)_cacheBuffer + sect->offset() + stubVMAddr - sect->addr();
891 pint_t targetLPAddr = 0;
892 switch ( _mh->cputype() ) {
893 case CPU_TYPE_ARM64:
894 targetLPAddr = (pint_t)lazyPointerAddrFromArm64Stub(stubInstrs, stubVMAddr);
895 break;
896 case CPU_TYPE_ARM:
897 targetLPAddr = (pint_t)lazyPointerAddrFromArmStub(stubInstrs, (uint32_t)stubVMAddr);
898 break;
899 }
900 if ( targetLPAddr != 0 )
901 _stubAddrToLPAddr[stubVMAddr] = targetLPAddr;
902 break;
903 }
904 }
905 }
906 else if ( sectionType == S_LAZY_SYMBOL_POINTERS ) {
907 pint_t lpVMAddr = (pint_t)sect->addr();
908 pint_t* lpContent = (pint_t*)(((uint8_t*)_cacheBuffer) + sect->offset());
909 uint32_t elementCount = (uint32_t)(sect->size() / sizeof(pint_t));
910 uint64_t textSegStartAddr = _segCmds[0]->vmaddr();
911 uint64_t textSegEndAddr = _segCmds[0]->vmaddr() + _segCmds[0]->vmsize();
912 pint_t lpValue;
913 for (uint32_t j=0; j < elementCount; ++j) {
914 uint32_t symbolIndex = E::get32(indirectTable[indirectTableOffset + j]);
915 switch ( symbolIndex ) {
916 case INDIRECT_SYMBOL_ABS:
917 case INDIRECT_SYMBOL_LOCAL:
918 break;
919 default:
920 lpValue = (pint_t)P::getP(lpContent[j]);
921 if ( symbolIndex >= _symTabCmd->nsyms() ) {
922 warning("symbol index out of range (%d of %d) for lazy pointer at addr 0x%0llX in %s",
923 symbolIndex, _symTabCmd->nsyms(), (uint64_t)lpVMAddr, _installName);
924 continue;
925 }
926 const macho_nlist<P>* sym = &symbolTable[symbolIndex];
927 uint32_t stringOffset = sym->n_strx();
928 if ( stringOffset > _symTabCmd->strsize() ) {
929 warning("symbol string offset out of range (%u of %u) for lazy pointer at addr 0x%0llX in %s",
930 stringOffset, sym->n_strx(), (uint64_t)lpVMAddr, _installName);
931 continue;
932 }
933 const char* symName = &symbolStrings[stringOffset];
934 if ( (lpValue > textSegStartAddr) && (lpValue< textSegEndAddr) ) {
935 //verboseLog("skipping lazy pointer at 0x%0lX to %s in %s because target is within dylib\n", lpVMAddr, symName, _installName);
936 }
937 else if ( (sizeof(pint_t) == 8) && ((lpValue % 4) != 0) ) {
938 warning("lazy pointer at 0x%0llX does not point to 4-byte aligned address(0x%0llX) in %s",
939 (uint64_t)lpVMAddr, (uint64_t)lpValue, _installName);
940 }
941 else {
942 _lpAddrToTargetAddr[lpVMAddr] = lpValue;
943 _targetAddrToName[lpValue] = symName;
944 }
945 break;
946 }
947 lpVMAddr += sizeof(pint_t);
948 }
949 }
950 }
951 }
952 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
953 }
954 }
955
956
957 template <typename P>
958 void StubOptimizer<P>::forEachCallSiteToAStub(CallSiteHandler handler)
959 {
960 const uint8_t* infoStart = &_linkeditBias[_splitSegInfoCmd->dataoff()];
961 const uint8_t* infoEnd = &infoStart[_splitSegInfoCmd->datasize()];
962 if ( *infoStart++ != DYLD_CACHE_ADJ_V2_FORMAT )
963 terminate("malformed split seg info in %s", _installName);
964
965 uint8_t* textSectionContent = (uint8_t*)_cacheBuffer + _textSegCacheOffset + _textSection->addr() -_textSegStartAddr;
966
967 // Whole :== <count> FromToSection+
968 // FromToSection :== <from-sect-index> <to-sect-index> <count> ToOffset+
969 // ToOffset :== <to-sect-offset-delta> <count> FromOffset+
970 // FromOffset :== <kind> <count> <from-sect-offset-delta>
971 const uint8_t* p = infoStart;
972 uint64_t sectionCount = read_uleb128(p, infoEnd);
973 for (uint64_t i=0; i < sectionCount; ++i) {
974 uint64_t fromSectionIndex = read_uleb128(p, infoEnd);
975 uint64_t toSectionIndex = read_uleb128(p, infoEnd);
976 uint64_t toOffsetCount = read_uleb128(p, infoEnd);
977 uint64_t toSectionOffset = 0;
978 for (uint64_t j=0; j < toOffsetCount; ++j) {
979 uint64_t toSectionDelta = read_uleb128(p, infoEnd);
980 uint64_t fromOffsetCount = read_uleb128(p, infoEnd);
981 toSectionOffset += toSectionDelta;
982 for (uint64_t k=0; k < fromOffsetCount; ++k) {
983 uint64_t kind = read_uleb128(p, infoEnd);
984 if ( kind > 12 )
985 terminate("bad kind (%llu) value in %s", kind, _installName);
986 uint64_t fromSectDeltaCount = read_uleb128(p, infoEnd);
987 uint64_t fromSectionOffset = 0;
988 for (uint64_t l=0; l < fromSectDeltaCount; ++l) {
989 uint64_t delta = read_uleb128(p, infoEnd);
990 fromSectionOffset += delta;
991 if ( (fromSectionIndex == _textSectionIndex) && (toSectionIndex == _stubSectionIndex) ) {
992 uint32_t* instrPtr = (uint32_t*)(textSectionContent + fromSectionOffset);
993 uint64_t instrAddr = _textSection->addr() + fromSectionOffset;
994 uint64_t stubAddr = _stubSection->addr() + toSectionOffset;
995 uint32_t instruction = E::get32(*instrPtr);
996 _branchesCount++;
997 if ( handler(kind, instrAddr, stubAddr, instruction) ) {
998 _branchesModifiedCount++;
999 E::set32(*instrPtr, instruction);
1000 }
1001 }
1002 }
1003 }
1004 }
1005 }
1006 }
1007
1008
1009 /// Extract displacement from a thumb b/bl/blx instruction.
1010 template <typename P>
1011 int32_t StubOptimizer<P>::getDisplacementFromThumbBranch(uint32_t instruction, uint32_t instrAddr)
1012 {
1013 bool is_blx = ((instruction & 0xD000F800) == 0xC000F000);
1014 uint32_t s = (instruction >> 10) & 0x1;
1015 uint32_t j1 = (instruction >> 29) & 0x1;
1016 uint32_t j2 = (instruction >> 27) & 0x1;
1017 uint32_t imm10 = instruction & 0x3FF;
1018 uint32_t imm11 = (instruction >> 16) & 0x7FF;
1019 uint32_t i1 = (j1 == s);
1020 uint32_t i2 = (j2 == s);
1021 uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
1022 int32_t sdis = dis;
1023 int32_t result = s ? (sdis | 0xFE000000) : sdis;
1024 if ( is_blx && (instrAddr & 0x2) ) {
1025 // The thumb blx instruction always has low bit of imm11 as zero. The way
1026 // a 2-byte aligned blx can branch to a 4-byte aligned ARM target is that
1027 // the blx instruction always 4-byte aligns the pc before adding the
1028 // displacement from the blx. We must emulate that when decoding this.
1029 result -= 2;
1030 }
1031 return result;
1032 }
1033
1034 /// Update a thumb b/bl/blx instruction, switching bl <-> blx as needed.
1035 template <typename P>
1036 uint32_t StubOptimizer<P>::setDisplacementInThumbBranch(uint32_t instruction, uint32_t instrAddr,
1037 int32_t displacement, bool targetIsThumb) {
1038 if ( (displacement > 16777214) || (displacement < (-16777216)) )
1039 terminate("thumb branch out of range at 0x%0X in %s", instrAddr, _installName);
1040 bool is_bl = ((instruction & 0xD000F800) == 0xD000F000);
1041 bool is_blx = ((instruction & 0xD000F800) == 0xC000F000);
1042 bool is_b = ((instruction & 0xD000F800) == 0x9000F000);
1043 uint32_t newInstruction = (instruction & 0xD000F800);
1044 if (is_bl || is_blx) {
1045 if (targetIsThumb) {
1046 newInstruction = 0xD000F000; // Use bl
1047 }
1048 else {
1049 newInstruction = 0xC000F000; // Use blx
1050 // See note in getDisplacementFromThumbBranch() about blx.
1051 if (instrAddr & 0x2)
1052 displacement += 2;
1053 }
1054 }
1055 else if (is_b) {
1056 if ( !targetIsThumb )
1057 terminate("no pc-rel thumb branch instruction that switches to arm mode at 0x%0X in %s", instrAddr, _installName);
1058 }
1059 else {
1060 terminate("not b/bl/blx at 0x%0X in %s", instrAddr, _installName);
1061 }
1062 uint32_t s = (uint32_t)(displacement >> 24) & 0x1;
1063 uint32_t i1 = (uint32_t)(displacement >> 23) & 0x1;
1064 uint32_t i2 = (uint32_t)(displacement >> 22) & 0x1;
1065 uint32_t imm10 = (uint32_t)(displacement >> 12) & 0x3FF;
1066 uint32_t imm11 = (uint32_t)(displacement >> 1) & 0x7FF;
1067 uint32_t j1 = (i1 == s);
1068 uint32_t j2 = (i2 == s);
1069 uint32_t nextDisp = (j1 << 13) | (j2 << 11) | imm11;
1070 uint32_t firstDisp = (s << 10) | imm10;
1071 newInstruction |= (nextDisp << 16) | firstDisp;
1072 return newInstruction;
1073 }
1074
1075
1076 template <typename P>
1077 void StubOptimizer<P>::optimizeArmCallSites()
1078 {
1079 forEachCallSiteToAStub([&](uint8_t kind, uint64_t callSiteAddr, uint64_t stubAddr, uint32_t& instruction) -> bool {
1080 if ( kind == DYLD_CACHE_ADJ_V2_THUMB_BR22 ) {
1081 bool is_bl = ((instruction & 0xD000F800) == 0xD000F000);
1082 bool is_blx = ((instruction & 0xD000F800) == 0xC000F000);
1083 bool is_b = ((instruction & 0xD000F800) == 0x9000F000);
1084 if ( !is_bl && !is_blx && !is_b ){
1085 warning("non-branch instruction at 0x%0llX in %s", callSiteAddr, _installName);
1086 return false;
1087 }
1088 int32_t brDelta = getDisplacementFromThumbBranch(instruction, (uint32_t)callSiteAddr);
1089 pint_t targetAddr = (pint_t)callSiteAddr + 4 + brDelta;
1090 if ( targetAddr != stubAddr ) {
1091 warning("stub target mismatch at callsite 0x%0llX in %s", callSiteAddr, _installName);
1092 return false;
1093 }
1094 // ignore branch if not to a known stub
1095 const auto& pos = _stubAddrToLPAddr.find(targetAddr);
1096 if ( pos == _stubAddrToLPAddr.end() )
1097 return false;
1098 // ignore branch if lazy pointer is not known (could be resolver based)
1099 pint_t lpAddr = pos->second;
1100 const auto& pos2 = _lpAddrToTargetAddr.find(lpAddr);
1101 if ( pos2 == _lpAddrToTargetAddr.end() )
1102 return false;
1103 uint64_t finalTargetAddr = pos2->second;
1104 int64_t deltaToFinalTarget = finalTargetAddr - (callSiteAddr + 4);
1105 // if final target within range, change to branch there directly
1106 if ( (deltaToFinalTarget > -b16MegLimit) && (deltaToFinalTarget < b16MegLimit) ) {
1107 bool targetIsThumb = finalTargetAddr & 1;
1108 instruction = setDisplacementInThumbBranch(instruction, (uint32_t)callSiteAddr, (int32_t)deltaToFinalTarget, targetIsThumb);
1109 _branchesDirectCount++;
1110 return true;
1111 }
1112 }
1113 else if ( kind == DYLD_CACHE_ADJ_V2_ARM_BR24 ) {
1114 // too few of these to be worth trying to optimize
1115 }
1116
1117 return false;
1118 });
1119 }
1120
1121
1122 template <typename P>
1123 void StubOptimizer<P>::optimizeArmStubs()
1124 {
1125 for (const auto& stubEntry : _stubAddrToLPAddr) {
1126 pint_t stubVMAddr = stubEntry.first;
1127 pint_t lpVMAddr = stubEntry.second;
1128 const auto& pos = _lpAddrToTargetAddr.find(lpVMAddr);
1129 if ( pos == _lpAddrToTargetAddr.end() )
1130 return;
1131 pint_t targetVMAddr = pos->second;
1132
1133 int32_t delta = (int32_t)(targetVMAddr - (stubVMAddr + 12));
1134 const uint32_t* stubInstructions = (uint32_t*)((uint8_t*)_cacheBuffer + _stubSection->offset() + stubVMAddr - _stubSection->addr());
1135 E::set32(*(uint32_t*)&stubInstructions[0], 0xe59fc000); // ldr ip, L0
1136 E::set32(*(uint32_t*)&stubInstructions[1], 0xe08ff00c); // add pc, pc, ip
1137 E::set32(*(uint32_t*)&stubInstructions[2], delta); // L0: .long xxxx
1138 E::set32(*(uint32_t*)&stubInstructions[3], 0xe7ffdefe); // trap
1139 _stubOptimizedCount++;
1140 }
1141 }
1142
1143
1144
1145
1146 template <typename P>
1147 void StubOptimizer<P>::optimizeArm64CallSites(std::vector<BranchPoolDylib<P>*>& branchIslandPools)
1148 {
1149 forEachCallSiteToAStub([&](uint8_t kind, uint64_t callSiteAddr, uint64_t stubAddr, uint32_t& instruction) -> bool {
1150 if ( kind != DYLD_CACHE_ADJ_V2_ARM64_BR26 )
1151 return false;
1152 // skip all but BL or B
1153 if ( (instruction & 0x7C000000) != 0x14000000 )
1154 return false;
1155 // compute target of branch instruction
1156 int32_t brDelta = (instruction & 0x03FFFFFF) << 2;
1157 if ( brDelta & 0x08000000 )
1158 brDelta |= 0xF0000000;
1159 uint64_t targetAddr = callSiteAddr + (int64_t)brDelta;
1160 if ( targetAddr != stubAddr ) {
1161 warning("stub target mismatch");
1162 return false;
1163 }
1164 // ignore branch if not to a known stub
1165 const auto& pos = _stubAddrToLPAddr.find((pint_t)targetAddr);
1166 if ( pos == _stubAddrToLPAddr.end() )
1167 return false;
1168 // ignore branch if lazy pointer is not known (could be resolver based)
1169 uint64_t lpAddr = pos->second;
1170 const auto& pos2 = _lpAddrToTargetAddr.find((pint_t)lpAddr);
1171 if ( pos2 == _lpAddrToTargetAddr.end() )
1172 return false;
1173 uint64_t finalTargetAddr = pos2->second;
1174 int64_t deltaToFinalTarget = finalTargetAddr - callSiteAddr;
1175 // if final target within range, change to branch there directly
1176 if ( (deltaToFinalTarget > -b128MegLimit) && (deltaToFinalTarget < b128MegLimit) ) {
1177 instruction= (instruction & 0xFC000000) | ((deltaToFinalTarget >> 2) & 0x03FFFFFF);
1178 _branchesDirectCount++;
1179 return true;
1180 }
1181 // find closest branch island pool between instruction and target and get island
1182 const auto& pos3 = _targetAddrToName.find((pint_t)finalTargetAddr);
1183 if ( pos3 == _targetAddrToName.end() )
1184 return false;
1185 const char* targetName = pos3->second;
1186 if ( finalTargetAddr > callSiteAddr ) {
1187 // target is after branch so find first pool after branch
1188 for ( BranchPoolDylib<P>* pool : branchIslandPools ) {
1189 if ( (pool->addr() > callSiteAddr) && (pool->addr() < finalTargetAddr) ) {
1190 uint64_t brIslandAddr = pool->getForwardBranch(finalTargetAddr, targetName, branchIslandPools);
1191 if ( brIslandAddr == 0 ) {
1192 // branch island pool full
1193 warning("pool full. Can't optimizer branch to %s from 0x%llX in %s\n", targetName, callSiteAddr, _installName);
1194 break;
1195 }
1196 int64_t deltaToTarget = brIslandAddr - callSiteAddr;
1197 instruction = (instruction & 0xFC000000) | ((deltaToTarget >> 2) & 0x03FFFFFF);
1198 _branchesIslandCount++;
1199 return true;
1200 }
1201 }
1202 }
1203 else {
1204 // target is before branch so find closest pool before branch
1205 for (size_t j = branchIslandPools.size(); j > 0; --j) {
1206 BranchPoolDylib<P>* pool = branchIslandPools[j-1];
1207 if ( (pool->addr() < callSiteAddr) && (pool->addr() > finalTargetAddr) ) {
1208 uint64_t brIslandAddr = pool->getBackBranch(finalTargetAddr, targetName, branchIslandPools);
1209 if ( brIslandAddr == 0 ) {
1210 // branch island pool full
1211 warning("pool full. Can't optimizer branch to %s from 0x%llX in %s\n", targetName, callSiteAddr, _installName);
1212 break;
1213 }
1214 int64_t deltaToTarget = brIslandAddr - callSiteAddr;
1215 instruction = (instruction & 0xFC000000) | ((deltaToTarget >> 2) & 0x03FFFFFF);
1216 _branchesIslandCount++;
1217 return true;
1218 }
1219 }
1220 }
1221 return false;
1222 });
1223 }
1224
1225
1226 template <typename P>
1227 void StubOptimizer<P>::optimizeCallSites(std::vector<BranchPoolDylib<P>*>& branchIslandPools)
1228 {
1229 if ( _textSection == NULL )
1230 return;
1231 if ( _stubSection == NULL )
1232 return;
1233
1234
1235 switch ( _mh->cputype() ) {
1236 case CPU_TYPE_ARM64:
1237 optimizeArm64CallSites(branchIslandPools);
1238 if ( verbose ) {
1239 verboseLog("%5u branches in __text, %5u changed to direct branches, %5u changed to use islands for %s",
1240 _branchesCount, _branchesDirectCount, _branchesIslandCount, _installName);
1241 }
1242 break;
1243 case CPU_TYPE_ARM:
1244 optimizeArmCallSites();
1245 optimizeArmStubs();
1246 if ( verbose ) {
1247 verboseLog("%3u of %3u stubs optimized. %5u branches in __text, %5u changed to direct branches for %s",
1248 _stubOptimizedCount, _stubCount, _branchesCount, _branchesDirectCount, _installName);
1249 }
1250 break;
1251 }
1252 }
1253
1254
1255 template <typename P>
1256 void SharedCache::bypassStubs(const std::vector<uint64_t>& branchPoolStartAddrs)
1257 {
1258 verboseLog("Stub elimination optimization:");
1259
1260 // construct a StubOptimizer for each image
1261 std::vector<StubOptimizer<P>*> optimizers;
1262 forEachImage([&](const void* mh, const char*, time_t, ino_t, const std::vector<MachOProxy::Segment>&) {
1263 optimizers.push_back(new StubOptimizer<P>(_buffer.get(), (macho_header<P>*)mh));
1264 });
1265
1266 // construct a BranchPoolDylib for each pool
1267 std::vector<BranchPoolDylib<P>*> pools;
1268
1269 if ( _arch.arch == CPU_TYPE_ARM64 ) {
1270 // Find hole at end of linkedit region for branch pool linkedits
1271 uint64_t textRegionStartAddr = 0;
1272 uint64_t linkEditRegionStartAddr = 0;
1273 uint64_t linkEditRegionEndAddr = 0;
1274 uint64_t linkEditRegionStartCacheOffset = 0;
1275 forEachRegion([&] (void* content, uint64_t vmAddr, uint64_t size, uint32_t permissions) {
1276 if ( permissions == (PROT_READ|PROT_EXEC) ) {
1277 textRegionStartAddr = vmAddr;
1278 }
1279 else if ( permissions == PROT_READ ) {
1280 linkEditRegionStartAddr = vmAddr;
1281 linkEditRegionEndAddr = vmAddr + size;
1282 linkEditRegionStartCacheOffset = (char*)content - (char*)_buffer.get();
1283 }
1284 });
1285 uint64_t lastLinkEditRegionUsedOffset = 0;
1286 forEachImage([&](const void* mh, const char*, time_t, ino_t, const std::vector<MachOProxy::Segment>& segs) {
1287 for (MachOProxy::Segment seg : segs) {
1288 if ( seg.name != "__LINKEDIT" )
1289 continue;
1290 if ( seg.fileOffset >= lastLinkEditRegionUsedOffset )
1291 lastLinkEditRegionUsedOffset = seg.fileOffset + seg.size;
1292 }
1293 });
1294 uint64_t allPoolsLinkEditStartOffset = lastLinkEditRegionUsedOffset;
1295 uint64_t allPoolsLinkEditStartAddr = linkEditRegionStartAddr + allPoolsLinkEditStartOffset - linkEditRegionStartCacheOffset;
1296 uint64_t allPoolsLinkEditSize = linkEditRegionEndAddr - allPoolsLinkEditStartAddr;
1297 if ( !branchPoolStartAddrs.empty() ) {
1298 uint64_t poolLinkEditStartAddr = allPoolsLinkEditStartAddr;
1299 uint64_t poolLinkEditStartOffset = allPoolsLinkEditStartOffset;
1300 const uint64_t poolSize = (allPoolsLinkEditSize/branchPoolStartAddrs.size()) & (-4096);
1301 for (uint64_t poolAddr : branchPoolStartAddrs) {
1302 pools.push_back(new BranchPoolDylib<P>(_arch, _buffer.get(), _fileSize, poolAddr,
1303 textRegionStartAddr, poolLinkEditStartAddr, poolLinkEditStartOffset));
1304 poolLinkEditStartAddr += poolSize;
1305 poolLinkEditStartOffset += poolSize;
1306 }
1307 }
1308 }
1309
1310 // build set of functions to never stub-eliminate because tools may need to override them
1311 std::unordered_set<std::string> neverStubEliminate;
1312 for (const char** p=sNeverStubEliminateSymbols; *p != nullptr; ++p) {
1313 neverStubEliminate.insert(*p);
1314 }
1315 for (const char** d=sNeverStubEliminateDylibs; *d != nullptr; ++d) {
1316 for (StubOptimizer<P>* op : optimizers) {
1317 if ( strcmp(op->installName(), *d) == 0 ) {
1318 // add all exports
1319 const uint8_t* exportsStart = op->exportsTrie();
1320 const uint8_t* exportsEnd = exportsStart + op->exportsTrieSize();
1321 std::vector<ExportInfoTrie::Entry> exports;
1322 if ( !ExportInfoTrie::parseTrie(exportsStart, exportsEnd, exports) ) {
1323 terminate("malformed exports trie in %s", *d);
1324 }
1325 for(const ExportInfoTrie::Entry& entry : exports) {
1326 neverStubEliminate.insert(entry.name);
1327 }
1328 }
1329 }
1330 }
1331
1332 // build maps of stubs-to-lp and lp-to-target
1333 for (StubOptimizer<P>* op : optimizers)
1334 op->buildStubMap(neverStubEliminate);
1335
1336 // optimize call sites to by-pass stubs or jump through island
1337 for (StubOptimizer<P>* op : optimizers)
1338 op->optimizeCallSites(pools);
1339
1340 // final fix ups in branch pools
1341 for (BranchPoolDylib<P>* pool : pools) {
1342 pool->finalizeLoadCommands();
1343 pool->printStats();
1344 }
1345
1346 // write total optimization info
1347 uint32_t callSiteCount = 0;
1348 uint32_t callSiteDirectOptCount = 0;
1349 uint32_t callSiteOneHopOptCount = 0;
1350 for (StubOptimizer<P>* op : optimizers) {
1351 callSiteCount += op->_branchesCount;
1352 callSiteDirectOptCount += op->_branchesDirectCount;
1353 callSiteOneHopOptCount += op->_branchesIslandCount;
1354 }
1355 verboseLog(" cache contains %u call sites of which %u were direct bound and %u were bound through islands", callSiteCount, callSiteDirectOptCount, callSiteOneHopOptCount);
1356
1357 // clean up
1358 for (StubOptimizer<P>* op : optimizers)
1359 delete op;
1360 for (BranchPoolDylib<P>* p : pools)
1361 delete p;
1362
1363 }
1364
1365
1366 void SharedCache::bypassStubs(const std::vector<uint64_t>& branchPoolStartAddrs) {
1367 switch( _arch.arch ) {
1368 case CPU_TYPE_ARM:
1369 bypassStubs<Pointer32<LittleEndian>>(branchPoolStartAddrs);
1370 break;
1371 case CPU_TYPE_ARM64:
1372 bypassStubs<Pointer64<LittleEndian>>(branchPoolStartAddrs);
1373 break;
1374 default:
1375 // no stub optimization done for other arches
1376 break;
1377 }
1378 }
1379
1380
1381 /*
1382 template <typename P>
1383 void StubOptimizer<P>::optimizeStubs(std::unordered_map<uint64_t,std::vector<uint64_t>>& targetToBranchIslands)
1384 {
1385 for (const auto& stubEntry : _stubAddrToLPAddr) {
1386 pint_t stubVMAddr = stubEntry.first;
1387 pint_t lpVMAddr = stubEntry.second;
1388 const auto& pos = _lpAddrToTargetAddr.find(lpVMAddr);
1389 if ( pos == _lpAddrToTargetAddr.end() )
1390 continue;
1391 pint_t targetVMAddr = pos->second;
1392 int64_t delta = targetVMAddr - stubVMAddr;
1393 if ( (delta > -b128MegLimit) && (delta < b128MegLimit) ) {
1394 // target within reach, change stub to direct branch
1395 uint32_t* stubInstructions = (uint32_t*)((uint8_t*)_cacheBuffer + _textSegCacheOffset + stubVMAddr -_textSegStartAddr);
1396 uint32_t stubInstr1 = E::get32(stubInstructions[0]);
1397 if ( (stubInstr1 & 0x9F00001F) != 0x90000010 ) {
1398 warning("first instruction of stub (0x%08X) is no longer ADRP for stub at addr 0x%0X in %s\n",
1399 stubInstr1, stubVMAddr, _installName);
1400 continue;
1401 }
1402 uint32_t directBranchInstr = 0x14000000 + ((delta/4) & 0x03FFFFFF);
1403 E::set32(stubInstructions[0], directBranchInstr);
1404 uint32_t brkInstr = 0xD4200000;
1405 E::set32(stubInstructions[1], brkInstr);
1406 E::set32(stubInstructions[2], brkInstr);
1407 _stubOptimizedCount++;
1408 targetToBranchIslands[targetVMAddr].push_back(stubVMAddr);
1409 }
1410 }
1411 verboseLog("%3u of %3u stubs optimized for %s\n", _stubOptimizedCount, _stubCount, _installName);
1412 }
1413
1414
1415 template <typename P>
1416 void StubOptimizer<P>::bypassStubs(std::unordered_map<uint64_t,std::vector<uint64_t>>& targetToBranchIslands)
1417 {
1418 if ( _textSection == NULL )
1419 return;
1420
1421 // scan __text section looking for B(L) instructions that branch to a stub
1422 unsigned instructionCount = (unsigned)(_textSection->size() / 4);
1423 uint32_t* instructions = (uint32_t*)((uint8_t*)_cacheBuffer + _textSegCacheOffset + _textSection->addr() -_textSegStartAddr);
1424 for (unsigned i=0; i < instructionCount; ++i) {
1425 uint32_t instr = E::get32(instructions[i]);
1426 // skip all but BL or B
1427 if ( (instr & 0x7C000000) != 0x14000000 )
1428 continue;
1429 // compute target of branch instruction
1430 int32_t brDelta = (instr & 0x03FFFFFF) << 2;
1431 if ( brDelta & 0x08000000 )
1432 brDelta |= 0xF0000000;
1433 uint64_t branchAddr = _textSection->addr() + i*4;
1434 uint64_t targetAddr = branchAddr + (int64_t)brDelta;
1435 // ignore branch if not to a known stub
1436 const auto& pos = _stubAddrToLPAddr.find(targetAddr);
1437 if ( pos == _stubAddrToLPAddr.end() )
1438 continue;
1439 _branchesCount++;
1440 // ignore branch if lazy pointer is not known (could be resolver based)
1441 const auto& pos2 = _lpAddrToTargetAddr.find(pos->second);
1442 if ( pos2 == _lpAddrToTargetAddr.end() )
1443 continue;
1444 uint64_t finalTargetAddr = pos2->second;
1445 int64_t deltaToFinalTarget = finalTargetAddr - branchAddr;
1446 // if final target within range, change to branch there directly
1447 if ( (deltaToFinalTarget > -b128MegLimit) && (deltaToFinalTarget < b128MegLimit) ) {
1448 uint32_t newInstr = (instr & 0xFC000000) | ((deltaToFinalTarget >> 2) & 0x03FFFFFF);
1449 E::set32(instructions[i], newInstr);
1450 _branchesDirectCount++;
1451 continue;
1452 }
1453 // see if there is an existing branch island in range that can be used
1454 std::vector<uint64_t>& existingBranchIslands = targetToBranchIslands[finalTargetAddr];
1455 for (uint64_t branchIslandAddr : existingBranchIslands) {
1456 int64_t deltaToBranchIsland = branchIslandAddr - branchAddr;
1457 // if final target within range, change to branch deltaToBranchIsland directly
1458 if ( (deltaToBranchIsland > -b128MegLimit) && (deltaToFinalTarget < b128MegLimit) ) {
1459 uint32_t newInstr = (instr & 0xFC000000) | ((deltaToBranchIsland >> 2) & 0x03FFFFFF);
1460 E::set32(instructions[i], newInstr);
1461 _branchesIslandCount++;
1462 break;
1463 }
1464 }
1465 }
1466 if ( verbose ) {
1467 verboseLog("%5u branches in __text, %5u changed to direct branches, %5u changed to indirect for %s\n",
1468 _branchesCount, _branchesDirectCount, _branchesIslandCount, _installName);
1469 }
1470 }
1471 */
1472