1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*-
3 * Copyright (c) 2015 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
26 #include <sys/types.h>
33 #include <CommonCrypto/CommonDigest.h>
36 #include <unordered_map>
37 #include <unordered_set>
39 #include "StringUtils.h"
41 #include "MachOFileAbstraction.hpp"
42 #include "MachOAnalyzer.h"
43 #include "Diagnostics.h"
44 #include "DyldSharedCache.h"
45 #include "CacheBuilder.h"
47 static const bool verbose
= false;
49 // These are functions that are interposed by Instruments.app or ASan
50 static const char* sNeverStubEliminateSymbols
[] = {
55 "__objc_autoreleasePoolPop",
75 "_dispatch_barrier_async_f",
76 "_dispatch_group_async",
77 "_dispatch_group_async_f",
78 "_dispatch_source_set_cancel_handler",
79 "_dispatch_source_set_event_handler",
158 "_malloc_create_zone",
159 "_malloc_default_purgeable_zone",
160 "_malloc_default_zone",
162 "_malloc_make_nonpurgeable",
163 "_malloc_make_purgeable",
164 "_malloc_set_zone_name",
181 "_objc_autoreleasePoolPop",
183 "_objc_setProperty_atomic",
184 "_objc_setProperty_atomic_copy",
185 "_objc_setProperty_nonatomic",
186 "_objc_setProperty_nonatomic_copy",
194 "_pthread_attr_getdetachstate",
195 "_pthread_attr_getguardsize",
196 "_pthread_attr_getinheritsched",
197 "_pthread_attr_getschedparam",
198 "_pthread_attr_getschedpolicy",
199 "_pthread_attr_getscope",
200 "_pthread_attr_getstack",
201 "_pthread_attr_getstacksize",
202 "_pthread_condattr_getpshared",
204 "_pthread_getschedparam",
206 "_pthread_mutex_lock",
207 "_pthread_mutex_unlock",
208 "_pthread_mutexattr_getprioceiling",
209 "_pthread_mutexattr_getprotocol",
210 "_pthread_mutexattr_getpshared",
211 "_pthread_mutexattr_gettype",
212 "_pthread_rwlockattr_getpshared",
302 // <rdar://problem/22050956> always use stubs for C++ symbols that can be overridden
312 static uint64_t branchPoolTextSize(const std::string
& archName
)
314 if ( startsWith(archName
, "arm64") )
315 return 0x0000C000; // 48KB
320 static uint64_t branchPoolLinkEditSize(const std::string
& archName
)
322 if ( startsWith(archName
, "arm64") )
323 return 0x00100000; // 1MB
329 template <typename P
>
330 class BranchPoolDylib
{
332 BranchPoolDylib(DyldSharedCache
* cache
, uint64_t startAddr
,
333 uint64_t textRegionStartAddr
, uint64_t poolLinkEditStartAddr
, uint64_t poolLinkEditFileOffset
, Diagnostics
& diags
);
335 uint64_t addr() { return _startAddr
; }
336 uint64_t getForwardBranch(uint64_t finalTargetAddr
, const char* name
, std::vector
<BranchPoolDylib
<P
>*>& branchIslandPools
);
337 uint64_t getBackBranch(uint64_t finalTargetAddr
, const char* name
, std::vector
<BranchPoolDylib
<P
>*>& branchIslandPools
);
338 void finalizeLoadCommands();
342 Diagnostics
& _diagnostics
;
343 uint64_t indexToAddr(uint32_t index
) { return _startAddr
+ _firstStubOffset
+ sizeof(uint32_t)*index
; }
345 static const int64_t b128MegLimit
= 0x07FFFFFF;
347 typedef typename
P::uint_t pint_t
;
348 typedef typename
P::E E
;
350 DyldSharedCache
* _cacheBuffer
;
352 std::unordered_map
<uint64_t, uint32_t> _targetToIslandIndex
;
353 std::unordered_map
<uint32_t, const char*> _islandIndexToName
;
354 macho_symtab_command
<P
>* _symbolTableCmd
;
355 macho_dysymtab_command
<P
>* _dynamicSymbolTableCmd
;
356 macho_uuid_command
<P
>* _uuidCmd
;
359 uint32_t _firstStubOffset
;
360 uint32_t* _stubInstructions
;
361 macho_nlist
<P
>* _symbolTable
;
363 char* _stringPoolStart
;
364 char* _stringPoolEnd
;
367 template <typename P
>
368 BranchPoolDylib
<P
>::BranchPoolDylib(DyldSharedCache
* cache
, uint64_t poolStartAddr
,
369 uint64_t textRegionStartAddr
, uint64_t poolLinkEditStartAddr
, uint64_t poolLinkEditFileOffset
, Diagnostics
& diags
)
370 : _cacheBuffer(cache
), _startAddr(poolStartAddr
), _nextIndex(0), _firstStubOffset(0x280), _diagnostics(diags
)
372 std::string archName
= cache
->archName();
373 bool is64
= (sizeof(typename
P::uint_t
) == 8);
375 const int64_t cacheSlide
= (long)cache
- cache
->unslidLoadAddress();
376 const uint64_t textSegSize
= branchPoolTextSize(archName
);
377 const uint64_t linkEditSegSize
= branchPoolLinkEditSize(archName
);
378 const unsigned stubCount
= (unsigned)((textSegSize
- _firstStubOffset
)/sizeof(uint32_t));
379 const uint32_t linkeditOffsetSymbolTable
= 0;
380 const uint32_t linkeditOffsetIndirectSymbolTable
= stubCount
*sizeof(macho_nlist
<P
>);
381 const uint32_t linkeditOffsetSymbolPoolOffset
= linkeditOffsetIndirectSymbolTable
+ stubCount
*sizeof(uint32_t);
382 _maxStubs
= stubCount
;
384 // write mach_header and load commands for pseudo dylib
385 macho_header
<P
>* mh
= (macho_header
<P
>*)((uint8_t*)cache
+ poolStartAddr
- textRegionStartAddr
);
386 mh
->set_magic(is64
? MH_MAGIC_64
: MH_MAGIC
);
387 mh
->set_cputype(dyld3::MachOFile::cpuTypeFromArchName(archName
.c_str()));
388 mh
->set_cpusubtype(dyld3::MachOFile::cpuSubtypeFromArchName(archName
.c_str()));
389 mh
->set_filetype(MH_DYLIB
);
391 mh
->set_sizeofcmds(is64
? 0x210 : 100); // FIXME: 32-bit size
392 mh
->set_flags(0x80000000);
394 macho_load_command
<P
>* cmd
= (macho_load_command
<P
>*)((uint8_t*)mh
+ sizeof(macho_header
<P
>));
395 macho_segment_command
<P
>* textSegCmd
= (macho_segment_command
<P
>*)cmd
;
396 textSegCmd
->set_cmd(is64
? LC_SEGMENT_64
: LC_SEGMENT
);
397 textSegCmd
->set_cmdsize(sizeof(macho_segment_command
<P
>)*2+sizeof(macho_section
<P
>));
398 textSegCmd
->set_segname("__TEXT");
399 textSegCmd
->set_vmaddr(poolStartAddr
);
400 textSegCmd
->set_vmsize(textSegSize
);
401 textSegCmd
->set_fileoff(poolStartAddr
- textRegionStartAddr
);
402 textSegCmd
->set_filesize(branchPoolTextSize(archName
));
403 textSegCmd
->set_maxprot(PROT_READ
|PROT_EXEC
);
404 textSegCmd
->set_initprot(PROT_READ
|PROT_EXEC
);
405 textSegCmd
->set_nsects(1);
406 textSegCmd
->set_flags(0);
407 macho_section
<P
>* stubSection
= (macho_section
<P
>*)((uint8_t*)textSegCmd
+ sizeof(macho_segment_command
<P
>));
408 stubSection
->set_sectname("__stubs");
409 stubSection
->set_segname("__TEXT");
410 stubSection
->set_addr(poolStartAddr
+ _firstStubOffset
);
411 stubSection
->set_size(textSegSize
- _firstStubOffset
);
412 stubSection
->set_offset((uint32_t)(poolStartAddr
+ _firstStubOffset
- textRegionStartAddr
));
413 stubSection
->set_align(2);
414 stubSection
->set_reloff(0);
415 stubSection
->set_nreloc(0);
416 stubSection
->set_flags(S_SYMBOL_STUBS
| S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
);
417 stubSection
->set_reserved1(0); // start index in indirect table
418 stubSection
->set_reserved2(4); // size of stubs
420 cmd
= (macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmd
->cmdsize());
421 macho_segment_command
<P
>* linkEditSegCmd
= (macho_segment_command
<P
>*)cmd
;
422 linkEditSegCmd
->set_cmd(is64
? LC_SEGMENT_64
: LC_SEGMENT
);
423 linkEditSegCmd
->set_cmdsize(sizeof(macho_segment_command
<P
>));
424 linkEditSegCmd
->set_segname("__LINKEDIT");
425 linkEditSegCmd
->set_vmaddr(poolLinkEditStartAddr
);
426 linkEditSegCmd
->set_vmsize(linkEditSegSize
);
427 linkEditSegCmd
->set_fileoff(poolLinkEditFileOffset
);
428 linkEditSegCmd
->set_filesize(linkEditSegSize
);
429 linkEditSegCmd
->set_maxprot(PROT_READ
);
430 linkEditSegCmd
->set_initprot(PROT_READ
);
431 linkEditSegCmd
->set_nsects(0);
432 linkEditSegCmd
->set_flags(0);
434 cmd
= (macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmd
->cmdsize());
435 macho_dylib_command
<P
>* installNameCmd
= (macho_dylib_command
<P
>*)cmd
;
436 installNameCmd
->set_cmd(LC_ID_DYLIB
);
437 installNameCmd
->set_cmdsize(sizeof(macho_dylib_command
<P
>) + 48);
438 installNameCmd
->set_timestamp(2);
439 installNameCmd
->set_current_version(0x10000);
440 installNameCmd
->set_compatibility_version(0x10000);
441 installNameCmd
->set_name_offset();
442 strcpy((char*)cmd
+ sizeof(macho_dylib_command
<P
>), "dyld_shared_cache_branch_islands");
444 cmd
= (macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmd
->cmdsize());
445 _symbolTableCmd
= (macho_symtab_command
<P
>*)cmd
;
446 _symbolTableCmd
->set_cmd(LC_SYMTAB
);
447 _symbolTableCmd
->set_cmdsize(sizeof(macho_symtab_command
<P
>));
448 _symbolTableCmd
->set_nsyms(stubCount
);
449 _symbolTableCmd
->set_symoff((uint32_t)(poolLinkEditFileOffset
+ linkeditOffsetSymbolTable
));
450 _symbolTableCmd
->set_stroff((uint32_t)(poolLinkEditFileOffset
+ linkeditOffsetSymbolPoolOffset
));
451 _symbolTableCmd
->set_strsize((uint32_t)(linkEditSegSize
- linkeditOffsetSymbolPoolOffset
));
453 cmd
= (macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmd
->cmdsize());
454 _dynamicSymbolTableCmd
= (macho_dysymtab_command
<P
>*)cmd
;
455 _dynamicSymbolTableCmd
->set_cmd(LC_DYSYMTAB
);
456 _dynamicSymbolTableCmd
->set_cmdsize(sizeof(macho_dysymtab_command
<P
>));
457 _dynamicSymbolTableCmd
->set_ilocalsym(0);
458 _dynamicSymbolTableCmd
->set_nlocalsym(0);
459 _dynamicSymbolTableCmd
->set_iextdefsym(0);
460 _dynamicSymbolTableCmd
->set_nextdefsym(0);
461 _dynamicSymbolTableCmd
->set_iundefsym(0);
462 _dynamicSymbolTableCmd
->set_nundefsym(stubCount
);
463 _dynamicSymbolTableCmd
->set_tocoff(0);
464 _dynamicSymbolTableCmd
->set_ntoc(0);
465 _dynamicSymbolTableCmd
->set_modtaboff(0);
466 _dynamicSymbolTableCmd
->set_nmodtab(0);
467 _dynamicSymbolTableCmd
->set_extrefsymoff(0);
468 _dynamicSymbolTableCmd
->set_nextrefsyms(0);
469 _dynamicSymbolTableCmd
->set_indirectsymoff((uint32_t)(poolLinkEditFileOffset
+ linkeditOffsetIndirectSymbolTable
));
470 _dynamicSymbolTableCmd
->set_nindirectsyms(stubCount
);
471 _dynamicSymbolTableCmd
->set_extreloff(0);
472 _dynamicSymbolTableCmd
->set_nextrel(0);
473 _dynamicSymbolTableCmd
->set_locreloff(0);
474 _dynamicSymbolTableCmd
->set_nlocrel(0);
475 cmd
= (macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmd
->cmdsize());
477 _uuidCmd
= (macho_uuid_command
<P
>*)cmd
;
478 _uuidCmd
->set_cmd(LC_UUID
);
479 _uuidCmd
->set_cmdsize(sizeof(macho_uuid_command
<P
>));
480 cmd
= (macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmd
->cmdsize());
482 // write stubs section content
483 _stubInstructions
= (uint32_t*)((uint8_t*)mh
+ _firstStubOffset
);
484 for (unsigned i
=0; i
< stubCount
; ++i
) {
485 E::set32(_stubInstructions
[i
], 0xD4200000);
488 // write linkedit content
489 uint8_t* linkeditBufferStart
= (uint8_t*)poolLinkEditStartAddr
+ cacheSlide
;
490 // write symbol table
491 _symbolTable
= (macho_nlist
<P
>*)(linkeditBufferStart
);
492 for (unsigned i
=0; i
< stubCount
; ++i
) {
493 _symbolTable
[i
].set_n_strx(1);
494 _symbolTable
[i
].set_n_type(N_UNDF
| N_EXT
);
495 _symbolTable
[i
].set_n_sect(0);
496 _symbolTable
[i
].set_n_desc(0);
497 _symbolTable
[i
].set_n_value(0);
499 // write indirect symbol table
500 uint32_t* indirectSymboTable
= (uint32_t*)(linkeditBufferStart
+ linkeditOffsetIndirectSymbolTable
);
501 for (unsigned i
=0; i
< stubCount
; ++i
) {
502 P::E::set32(indirectSymboTable
[i
], i
);
505 _stringPoolStart
= (char*)(linkeditBufferStart
+ linkeditOffsetSymbolPoolOffset
);
506 _stringPoolEnd
= _stringPoolStart
+ linkEditSegSize
- linkeditOffsetSymbolPoolOffset
;
507 _stringPoolStart
[0] = '\0';
508 strcpy(&_stringPoolStart
[1], "<unused>");
509 _nextString
= &_stringPoolStart
[10];
513 template <typename P
>
514 void BranchPoolDylib
<P
>::finalizeLoadCommands()
516 _symbolTableCmd
->set_nsyms(_nextIndex
);
517 _symbolTableCmd
->set_strsize((uint32_t)(_nextString
- _stringPoolStart
));
518 _dynamicSymbolTableCmd
->set_nundefsym(_nextIndex
);
520 uint8_t digest
[CC_MD5_DIGEST_LENGTH
];
521 CC_MD5(_stubInstructions
, _maxStubs
*sizeof(uint32_t), digest
);
522 _uuidCmd
->set_uuid(digest
);
525 _diagnostics
.verbose("branch islands in image at 0x%0llX:\n", _startAddr
);
526 for (uint32_t i
=0; i
< _nextIndex
; ++i
) {
527 _diagnostics
.verbose(" 0x%llX %s\n", indexToAddr(i
), _islandIndexToName
[i
]);
532 template <typename P
>
533 uint64_t BranchPoolDylib
<P
>::getForwardBranch(uint64_t finalTargetAddr
, const char* name
, std::vector
<BranchPoolDylib
<P
>*>& branchIslandPools
)
535 // check if we can re-used existing branch island
536 const auto& pos
= _targetToIslandIndex
.find(finalTargetAddr
);
537 if ( pos
!= _targetToIslandIndex
.end() )
538 return indexToAddr(pos
->second
);
540 // skip if instruction pool is full
541 if ( _nextIndex
>= _maxStubs
)
544 // skip if string pool is full
545 if ( (_nextString
+ strlen(name
)+1) >= _stringPoolEnd
)
548 uint64_t branchIslandTargetAddr
= finalTargetAddr
;
549 // if final target is too far, we need to use branch island in next pool
550 if ( (finalTargetAddr
- _startAddr
) > b128MegLimit
) {
551 BranchPoolDylib
<P
>* nextPool
= nullptr;
552 for (size_t i
=0; i
< branchIslandPools
.size()-1; ++i
) {
553 if ( branchIslandPools
[i
] == this ) {
554 nextPool
= branchIslandPools
[i
+1];
559 if (nextPool
== nullptr) {
560 _diagnostics
.warning("BranchPoolDylib<P>::getForwardBranch: nextPool unreachable");
564 branchIslandTargetAddr
= nextPool
->getForwardBranch(finalTargetAddr
, name
, branchIslandPools
);
565 if ( branchIslandTargetAddr
== 0 )
566 return 0; // next pool is full
569 // write branch instruction in stubs section
570 uint32_t index
= _nextIndex
++;
571 int64_t branchDelta
= branchIslandTargetAddr
- indexToAddr(index
);
572 uint32_t branchInstr
= 0x14000000 + ((branchDelta
/4) & 0x03FFFFFF);
573 E::set32(_stubInstructions
[index
], branchInstr
);
575 // update symbol table
576 _symbolTable
[index
].set_n_strx((uint32_t)(_nextString
- _stringPoolStart
));
577 strcpy(_nextString
, name
);
578 _nextString
+= (strlen(name
) +1);
581 _targetToIslandIndex
[finalTargetAddr
] = index
;
582 _islandIndexToName
[index
] = name
;
583 return indexToAddr(index
);
586 template <typename P
>
587 uint64_t BranchPoolDylib
<P
>::getBackBranch(uint64_t finalTargetAddr
, const char* name
, std::vector
<BranchPoolDylib
<P
>*>& branchIslandPools
)
589 // check if we can re-used existing branch island
590 const auto& pos
= _targetToIslandIndex
.find(finalTargetAddr
);
591 if ( pos
!= _targetToIslandIndex
.end() )
592 return indexToAddr(pos
->second
);
594 // skip if instruction pool is full
595 if ( _nextIndex
>= _maxStubs
)
598 // skip if string pool is full
599 if ( (_nextString
+ strlen(name
)+1) >= _stringPoolEnd
)
602 uint64_t branchIslandTargetAddr
= finalTargetAddr
;
603 // if final target is too far, we need to use branch island in next pool
604 if ( (indexToAddr(_nextIndex
) - finalTargetAddr
) > b128MegLimit
) {
605 BranchPoolDylib
<P
>* nextPool
= nullptr;
606 for (long i
=branchIslandPools
.size()-1; i
> 0; --i
) {
607 if ( branchIslandPools
[i
] == this ) {
608 nextPool
= branchIslandPools
[i
-1];
613 if (nextPool
== nullptr) {
614 _diagnostics
.warning("BranchPoolDylib<P>::getBackBranch: nextPool unreachable");
618 branchIslandTargetAddr
= nextPool
->getBackBranch(finalTargetAddr
, name
, branchIslandPools
);
619 if ( branchIslandTargetAddr
== 0 )
620 return 0; // next pool is full
623 // write branch instruction in stubs section
624 uint32_t index
= _nextIndex
++;
625 int64_t branchDelta
= branchIslandTargetAddr
- indexToAddr(index
);
626 uint32_t branchInstr
= 0x14000000 + ((branchDelta
/4) & 0x03FFFFFF);
627 E::set32(_stubInstructions
[index
], branchInstr
);
629 // update symbol table
630 _symbolTable
[index
].set_n_strx((uint32_t)(_nextString
- _stringPoolStart
));
631 strcpy(_nextString
, name
);
632 _nextString
+= (strlen(name
) +1);
635 _targetToIslandIndex
[finalTargetAddr
] = index
;
636 _islandIndexToName
[index
] = name
;
637 return indexToAddr(index
);
640 template <typename P
>
641 void BranchPoolDylib
<P
>::printStats()
643 _diagnostics
.verbose(" island pool at 0x%0llX has %u stubs and stringPool size=%lu\n", _startAddr
, _nextIndex
, _nextString
- _stringPoolStart
);
648 template <typename P
>
649 class StubOptimizer
{
651 StubOptimizer(const DyldSharedCache
* cache
, macho_header
<P
>* mh
, Diagnostics
& diags
);
652 void buildStubMap(const std::unordered_set
<std::string
>& neverStubEliminate
);
653 void optimizeStubs(std::unordered_map
<uint64_t,std::vector
<uint64_t>>& targetToBranchIslands
);
654 void optimizeCallSites(std::vector
<BranchPoolDylib
<P
>*>& branchIslandPools
);
655 const char* installName() { return _installName
; }
656 const uint8_t* exportsTrie() { return &_linkeditBias
[_dyldInfo
->export_off()]; }
657 uint32_t exportsTrieSize() { return _dyldInfo
->export_size(); }
659 uint32_t _stubCount
= 0;
660 uint32_t _stubOptimizedCount
= 0;
661 uint32_t _branchesCount
= 0;
662 uint32_t _branchesModifiedCount
= 0;
663 uint32_t _branchesDirectCount
= 0;
664 uint32_t _branchesIslandCount
= 0;
667 Diagnostics _diagnostics
;
668 typedef std::function
<bool(uint8_t callSiteKind
, uint64_t callSiteAddr
, uint64_t stubAddr
, uint32_t& instruction
)> CallSiteHandler
;
669 typedef typename
P::uint_t pint_t
;
670 typedef typename
P::E E
;
672 void forEachCallSiteToAStub(CallSiteHandler
);
673 void optimizeArm64CallSites(std::vector
<BranchPoolDylib
<P
>*>& branchIslandPools
);
674 void optimizeArmCallSites();
675 void optimizeArmStubs();
676 uint64_t lazyPointerAddrFromArm64Stub(const uint8_t* stubInstructions
, uint64_t stubVMAddr
);
677 #if SUPPORT_ARCH_arm64e
678 uint64_t lazyPointerAddrFromArm64eStub(const uint8_t* stubInstructions
, uint64_t stubVMAddr
);
680 uint32_t lazyPointerAddrFromArmStub(const uint8_t* stubInstructions
, uint32_t stubVMAddr
);
681 int32_t getDisplacementFromThumbBranch(uint32_t instruction
, uint32_t instrAddr
);
682 uint32_t setDisplacementInThumbBranch(uint32_t instruction
, uint32_t instrAddr
,
683 int32_t displacement
, bool targetIsThumb
);
686 struct AddressAndName
{ pint_t targetVMAddr
; const char* targetName
; };
687 typedef std::unordered_map
<pint_t
, AddressAndName
> StubVMAddrToTarget
;
689 static const int64_t b128MegLimit
= 0x07FFFFFF;
690 static const int64_t b16MegLimit
= 0x00FFFFFF;
693 macho_header
<P
>* _mh
;
694 int64_t _cacheSlide
= 0;
695 uint64_t _cacheUnslideAddr
= 0;
696 bool _chainedFixups
= false;
697 uint32_t _linkeditSize
= 0;
698 uint64_t _linkeditAddr
= 0;
699 const uint8_t* _linkeditBias
= nullptr;
700 const char* _installName
= nullptr;
701 const macho_symtab_command
<P
>* _symTabCmd
= nullptr;
702 const macho_dysymtab_command
<P
>* _dynSymTabCmd
= nullptr;
703 const macho_dyld_info_command
<P
>* _dyldInfo
= nullptr;
704 macho_linkedit_data_command
<P
>* _splitSegInfoCmd
= nullptr;
705 const macho_section
<P
>* _textSection
= nullptr;
706 const macho_section
<P
>* _stubSection
= nullptr;
707 uint32_t _textSectionIndex
= 0;
708 uint32_t _stubSectionIndex
= 0;
709 pint_t _textSegStartAddr
= 0;
710 std::vector
<macho_segment_command
<P
>*> _segCmds
;
711 std::unordered_map
<pint_t
, pint_t
> _stubAddrToLPAddr
;
712 std::unordered_map
<pint_t
, pint_t
> _lpAddrToTargetAddr
;
713 std::unordered_map
<pint_t
, const char*> _targetAddrToName
;
716 template <typename P
>
717 StubOptimizer
<P
>::StubOptimizer(const DyldSharedCache
* cache
, macho_header
<P
>* mh
, Diagnostics
& diags
)
718 : _mh(mh
), _diagnostics(diags
)
720 _cacheSlide
= (long)cache
- cache
->unslidLoadAddress();
721 _cacheUnslideAddr
= cache
->unslidLoadAddress();
722 #if SUPPORT_ARCH_arm64e
723 _chainedFixups
= (strcmp(cache
->archName(), "arm64e") == 0);
725 _chainedFixups
= false;
727 const macho_load_command
<P
>* const cmds
= (macho_load_command
<P
>*)((uint8_t*)mh
+ sizeof(macho_header
<P
>));
728 const uint32_t cmd_count
= mh
->ncmds();
729 macho_segment_command
<P
>* segCmd
;
730 uint32_t sectionIndex
= 0;
731 const macho_load_command
<P
>* cmd
= cmds
;
732 for (uint32_t i
= 0; i
< cmd_count
; ++i
) {
733 switch (cmd
->cmd()) {
735 _installName
= ((macho_dylib_command
<P
>*)cmd
)->name();
738 _symTabCmd
= (macho_symtab_command
<P
>*)cmd
;
741 _dynSymTabCmd
= (macho_dysymtab_command
<P
>*)cmd
;
743 case LC_SEGMENT_SPLIT_INFO
:
744 _splitSegInfoCmd
= (macho_linkedit_data_command
<P
>*)cmd
;
747 case LC_DYLD_INFO_ONLY
:
748 _dyldInfo
= (macho_dyld_info_command
<P
>*)cmd
;
750 case macho_segment_command
<P
>::CMD
:
751 segCmd
=( macho_segment_command
<P
>*)cmd
;
752 _segCmds
.push_back(segCmd
);
753 if ( strcmp(segCmd
->segname(), "__LINKEDIT") == 0 ) {
754 _linkeditBias
= (uint8_t*)(segCmd
->vmaddr() + _cacheSlide
- segCmd
->fileoff());
755 _linkeditSize
= (uint32_t)segCmd
->vmsize();
756 _linkeditAddr
= segCmd
->vmaddr();
758 else if ( strcmp(segCmd
->segname(), "__TEXT") == 0 ) {
759 _textSegStartAddr
= (pint_t
)segCmd
->vmaddr();
760 const macho_section
<P
>* const sectionsStart
= (macho_section
<P
>*)((char*)segCmd
+ sizeof(macho_segment_command
<P
>));
761 const macho_section
<P
>* const sectionsEnd
= §ionsStart
[segCmd
->nsects()];
762 for (const macho_section
<P
>* sect
= sectionsStart
; sect
< sectionsEnd
; ++sect
) {
764 if ( strcmp(sect
->sectname(), "__text") == 0 ) {
766 _textSectionIndex
= sectionIndex
;
768 else if ( ((sect
->flags() & SECTION_TYPE
) == S_SYMBOL_STUBS
) && (sect
->size() != 0) ) {
770 _stubSectionIndex
= sectionIndex
;
776 cmd
= (const macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmd
->cmdsize());
782 template <typename P
>
783 uint32_t StubOptimizer
<P
>::lazyPointerAddrFromArmStub(const uint8_t* stubInstructions
, uint32_t stubVMAddr
)
785 uint32_t stubInstr1
= E::get32(*(uint32_t*)stubInstructions
);
786 uint32_t stubInstr2
= E::get32(*(uint32_t*)(stubInstructions
+4));
787 uint32_t stubInstr3
= E::get32(*(uint32_t*)(stubInstructions
+8));
788 int32_t stubData
= E::get32(*(uint32_t*)(stubInstructions
+12));
789 if ( stubInstr1
!= 0xe59fc004 ) {
790 _diagnostics
.warning("first instruction of stub (0x%08X) is not 'ldr ip, pc + 12' for stub at addr 0x%0llX in %s",
791 stubInstr1
, (uint64_t)stubVMAddr
, _installName
);
794 if ( stubInstr2
!= 0xe08fc00c ) {
795 _diagnostics
.warning("second instruction of stub (0x%08X) is not 'add ip, pc, ip' for stub at addr 0x%0llX in %s",
796 stubInstr1
, (uint64_t)stubVMAddr
, _installName
);
799 if ( stubInstr3
!= 0xe59cf000 ) {
800 _diagnostics
.warning("third instruction of stub (0x%08X) is not 'ldr pc, [ip]' for stub at addr 0x%0llX in %s",
801 stubInstr1
, (uint64_t)stubVMAddr
, _installName
);
804 return stubVMAddr
+ 12 + stubData
;
808 template <typename P
>
809 uint64_t StubOptimizer
<P
>::lazyPointerAddrFromArm64Stub(const uint8_t* stubInstructions
, uint64_t stubVMAddr
)
811 uint32_t stubInstr1
= E::get32(*(uint32_t*)stubInstructions
);
812 if ( (stubInstr1
& 0x9F00001F) != 0x90000010 ) {
813 _diagnostics
.warning("first instruction of stub (0x%08X) is not ADRP for stub at addr 0x%0llX in %s",
814 stubInstr1
, (uint64_t)stubVMAddr
, _installName
);
817 int32_t adrpValue
= ((stubInstr1
& 0x00FFFFE0) >> 3) | ((stubInstr1
& 0x60000000) >> 29);
818 if ( stubInstr1
& 0x00800000 )
819 adrpValue
|= 0xFFF00000;
820 uint32_t stubInstr2
= E::get32(*(uint32_t*)(stubInstructions
+ 4));
821 if ( (stubInstr2
& 0xFFC003FF) != 0xF9400210 ) {
822 _diagnostics
.warning("second instruction of stub (0x%08X) is not LDR for stub at addr 0x%0llX in %s",
823 stubInstr2
, (uint64_t)stubVMAddr
, _installName
);
826 uint32_t ldrValue
= ((stubInstr2
>> 10) & 0x00000FFF);
827 return (stubVMAddr
& (-4096)) + adrpValue
*4096 + ldrValue
*8;
830 #if SUPPORT_ARCH_arm64e
831 template <typename P
>
832 uint64_t StubOptimizer
<P
>::lazyPointerAddrFromArm64eStub(const uint8_t* stubInstructions
, uint64_t stubVMAddr
)
834 uint32_t stubInstr1
= E::get32(*(uint32_t*)stubInstructions
);
835 // ADRP X17, dyld_mageLoaderCache@page
836 if ( (stubInstr1
& 0x9F00001F) != 0x90000011 ) {
837 _diagnostics
.warning("first instruction of stub (0x%08X) is not ADRP for stub at addr 0x%0llX in %s",
838 stubInstr1
, (uint64_t)stubVMAddr
, _installName
);
841 int32_t adrpValue
= ((stubInstr1
& 0x00FFFFE0) >> 3) | ((stubInstr1
& 0x60000000) >> 29);
842 if ( stubInstr1
& 0x00800000 )
843 adrpValue
|= 0xFFF00000;
845 // ADD X17, X17, dyld_mageLoaderCache@pageoff
846 uint32_t stubInstr2
= E::get32(*(uint32_t*)(stubInstructions
+ 4));
847 if ( (stubInstr2
& 0xFFC003FF) != 0x91000231 ) {
848 _diagnostics
.warning("second instruction of stub (0x%08X) is not ADD for stub at addr 0x%0llX in %s",
849 stubInstr2
, (uint64_t)stubVMAddr
, _installName
);
852 uint32_t addValue
= ((stubInstr2
& 0x003FFC00) >> 10);
855 uint32_t stubInstr3
= E::get32(*(uint32_t*)(stubInstructions
+ 8));
856 if ( stubInstr3
!= 0xF9400230 ) {
857 _diagnostics
.warning("second instruction of stub (0x%08X) is not LDR for stub at addr 0x%0llX in %s",
858 stubInstr2
, (uint64_t)stubVMAddr
, _installName
);
861 return (stubVMAddr
& (-4096)) + adrpValue
*4096 + addValue
;
867 template <typename P
>
868 void StubOptimizer
<P
>::buildStubMap(const std::unordered_set
<std::string
>& neverStubEliminate
)
870 // find all stubs and lazy pointers
871 const macho_nlist
<P
>* symbolTable
= (const macho_nlist
<P
>*)(&_linkeditBias
[_symTabCmd
->symoff()]);
872 const char* symbolStrings
= (char*)(&_linkeditBias
[_symTabCmd
->stroff()]);
873 const uint32_t* const indirectTable
= (uint32_t*)(&_linkeditBias
[_dynSymTabCmd
->indirectsymoff()]);
874 const macho_load_command
<P
>* const cmds
= (macho_load_command
<P
>*)((uint8_t*)_mh
+ sizeof(macho_header
<P
>));
875 const uint32_t cmd_count
= _mh
->ncmds();
876 const macho_load_command
<P
>* cmd
= cmds
;
877 for (uint32_t i
= 0; i
< cmd_count
; ++i
) {
878 if ( cmd
->cmd() == macho_segment_command
<P
>::CMD
) {
879 macho_segment_command
<P
>* seg
= (macho_segment_command
<P
>*)cmd
;
880 macho_section
<P
>* const sectionsStart
= (macho_section
<P
>*)((char*)seg
+ sizeof(macho_segment_command
<P
>));
881 macho_section
<P
>* const sectionsEnd
= §ionsStart
[seg
->nsects()];
882 for(macho_section
<P
>* sect
= sectionsStart
; sect
< sectionsEnd
; ++sect
) {
883 if ( sect
->size() == 0 )
885 unsigned sectionType
= (sect
->flags() & SECTION_TYPE
);
886 const uint32_t indirectTableOffset
= sect
->reserved1();
887 if ( sectionType
== S_SYMBOL_STUBS
) {
888 const uint32_t stubSize
= sect
->reserved2();
889 _stubCount
= (uint32_t)(sect
->size() / stubSize
);
890 pint_t stubVMAddr
= (pint_t
)sect
->addr();
891 for (uint32_t j
=0; j
< _stubCount
; ++j
, stubVMAddr
+= stubSize
) {
892 uint32_t symbolIndex
= E::get32(indirectTable
[indirectTableOffset
+ j
]);
893 switch ( symbolIndex
) {
894 case INDIRECT_SYMBOL_ABS
:
895 case INDIRECT_SYMBOL_LOCAL
:
896 case INDIRECT_SYMBOL_ABS
| INDIRECT_SYMBOL_LOCAL
:
899 if ( symbolIndex
>= _symTabCmd
->nsyms() ) {
900 _diagnostics
.warning("symbol index out of range (%d of %d) for stub at addr 0x%0llX in %s",
901 symbolIndex
, _symTabCmd
->nsyms(), (uint64_t)stubVMAddr
, _installName
);
904 const macho_nlist
<P
>* sym
= &symbolTable
[symbolIndex
];
905 uint32_t stringOffset
= sym
->n_strx();
906 if ( stringOffset
> _symTabCmd
->strsize() ) {
907 _diagnostics
.warning("symbol string offset out of range (%u of %u) for stub at addr 0x%0llX in %s",
908 stringOffset
, sym
->n_strx(), (uint64_t)stubVMAddr
, _installName
);
911 const char* symName
= &symbolStrings
[stringOffset
];
912 if ( neverStubEliminate
.count(symName
) ) {
913 //fprintf(stderr, "not bypassing stub to %s in %s because target is interposable\n", symName, _installName);
916 const uint8_t* stubInstrs
= (uint8_t*)(long)stubVMAddr
+ _cacheSlide
;
917 pint_t targetLPAddr
= 0;
918 switch ( _mh
->cputype() ) {
920 case CPU_TYPE_ARM64_32
:
921 #if SUPPORT_ARCH_arm64e
922 if (_mh
->cpusubtype() == CPU_SUBTYPE_ARM64_E
)
923 targetLPAddr
= (pint_t
)lazyPointerAddrFromArm64eStub(stubInstrs
, stubVMAddr
);
926 targetLPAddr
= (pint_t
)lazyPointerAddrFromArm64Stub(stubInstrs
, stubVMAddr
);
929 targetLPAddr
= (pint_t
)lazyPointerAddrFromArmStub(stubInstrs
, (uint32_t)stubVMAddr
);
932 if ( targetLPAddr
!= 0 )
933 _stubAddrToLPAddr
[stubVMAddr
] = targetLPAddr
;
938 else if ( (sectionType
== S_LAZY_SYMBOL_POINTERS
) || (sectionType
== S_NON_LAZY_SYMBOL_POINTERS
) ) {
940 pint_t
* lpContent
= (pint_t
*)(sect
->addr() + _cacheSlide
);
941 uint32_t elementCount
= (uint32_t)(sect
->size() / sizeof(pint_t
));
942 uint64_t textSegStartAddr
= _segCmds
[0]->vmaddr();
943 uint64_t textSegEndAddr
= _segCmds
[0]->vmaddr() + _segCmds
[0]->vmsize();
945 for (uint32_t j
=0; j
< elementCount
; ++j
) {
946 uint32_t symbolIndex
= E::get32(indirectTable
[indirectTableOffset
+ j
]);
947 switch ( symbolIndex
) {
948 case INDIRECT_SYMBOL_ABS
:
949 case INDIRECT_SYMBOL_LOCAL
:
950 case INDIRECT_SYMBOL_LOCAL
|INDIRECT_SYMBOL_ABS
:
953 lpValue
= (pint_t
)P::getP(lpContent
[j
]);
955 // Fixup threaded rebase/bind
956 if ( _chainedFixups
) {
957 dyld3::MachOLoaded::ChainedFixupPointerOnDisk ptr
;
959 assert(ptr
.authRebase
.bind
== 0);
960 if ( ptr
.authRebase
.auth
) {
961 lpValue
= (pint_t
)(_cacheUnslideAddr
+ ptr
.authRebase
.target
);
964 lpValue
= (pint_t
)ptr
.plainRebase
.signExtendedTarget();
968 lpVMAddr
= (pint_t
)sect
->addr() + j
* sizeof(pint_t
);
969 if ( symbolIndex
>= _symTabCmd
->nsyms() ) {
970 _diagnostics
.warning("symbol index out of range (%d of %d) for lazy pointer at addr 0x%0llX in %s",
971 symbolIndex
, _symTabCmd
->nsyms(), (uint64_t)lpVMAddr
, _installName
);
974 const macho_nlist
<P
>* sym
= &symbolTable
[symbolIndex
];
975 uint32_t stringOffset
= sym
->n_strx();
976 if ( stringOffset
> _symTabCmd
->strsize() ) {
977 _diagnostics
.warning("symbol string offset out of range (%u of %u) for lazy pointer at addr 0x%0llX in %s",
978 stringOffset
, sym
->n_strx(), (uint64_t)lpVMAddr
, _installName
);
981 const char* symName
= &symbolStrings
[stringOffset
];
982 if ( (lpValue
> textSegStartAddr
) && (lpValue
< textSegEndAddr
) ) {
983 //fprintf(stderr, "skipping lazy pointer at 0x%0lX to %s in %s because target is within dylib\n", (long)lpVMAddr, symName, _installName);
985 else if ( (sizeof(pint_t
) == 8) && ((lpValue
% 4) != 0) ) {
986 _diagnostics
.warning("lazy pointer at 0x%0llX does not point to 4-byte aligned address(0x%0llX) in %s",
987 (uint64_t)lpVMAddr
, (uint64_t)lpValue
, _installName
);
990 _lpAddrToTargetAddr
[lpVMAddr
] = lpValue
;
991 _targetAddrToName
[lpValue
] = symName
;
999 cmd
= (const macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmd
->cmdsize());
1004 template <typename P
>
1005 void StubOptimizer
<P
>::forEachCallSiteToAStub(CallSiteHandler handler
)
1007 if (_diagnostics
.hasError())
1009 const uint8_t* infoStart
= &_linkeditBias
[_splitSegInfoCmd
->dataoff()];
1010 const uint8_t* infoEnd
= &infoStart
[_splitSegInfoCmd
->datasize()];
1011 if ( *infoStart
++ != DYLD_CACHE_ADJ_V2_FORMAT
) {
1012 _diagnostics
.error("malformed split seg info in %s", _installName
);
1016 uint8_t* textSectionContent
= (uint8_t*)(_textSection
->addr() + _cacheSlide
);
1018 // Whole :== <count> FromToSection+
1019 // FromToSection :== <from-sect-index> <to-sect-index> <count> ToOffset+
1020 // ToOffset :== <to-sect-offset-delta> <count> FromOffset+
1021 // FromOffset :== <kind> <count> <from-sect-offset-delta>
1022 const uint8_t* p
= infoStart
;
1023 uint64_t sectionCount
= read_uleb128(p
, infoEnd
);
1024 for (uint64_t i
=0; i
< sectionCount
; ++i
) {
1025 uint64_t fromSectionIndex
= read_uleb128(p
, infoEnd
);
1026 uint64_t toSectionIndex
= read_uleb128(p
, infoEnd
);
1027 uint64_t toOffsetCount
= read_uleb128(p
, infoEnd
);
1028 uint64_t toSectionOffset
= 0;
1029 for (uint64_t j
=0; j
< toOffsetCount
; ++j
) {
1030 uint64_t toSectionDelta
= read_uleb128(p
, infoEnd
);
1031 uint64_t fromOffsetCount
= read_uleb128(p
, infoEnd
);
1032 toSectionOffset
+= toSectionDelta
;
1033 for (uint64_t k
=0; k
< fromOffsetCount
; ++k
) {
1034 uint64_t kind
= read_uleb128(p
, infoEnd
);
1036 _diagnostics
.error("bad kind (%llu) value in %s\n", kind
, _installName
);
1038 uint64_t fromSectDeltaCount
= read_uleb128(p
, infoEnd
);
1039 uint64_t fromSectionOffset
= 0;
1040 for (uint64_t l
=0; l
< fromSectDeltaCount
; ++l
) {
1041 uint64_t delta
= read_uleb128(p
, infoEnd
);
1042 fromSectionOffset
+= delta
;
1043 if ( (fromSectionIndex
== _textSectionIndex
) && (toSectionIndex
== _stubSectionIndex
) ) {
1044 uint32_t* instrPtr
= (uint32_t*)(textSectionContent
+ fromSectionOffset
);
1045 uint64_t instrAddr
= _textSection
->addr() + fromSectionOffset
;
1046 uint64_t stubAddr
= _stubSection
->addr() + toSectionOffset
;
1047 uint32_t instruction
= E::get32(*instrPtr
);
1049 if ( handler(kind
, instrAddr
, stubAddr
, instruction
) ) {
1050 _branchesModifiedCount
++;
1051 E::set32(*instrPtr
, instruction
);
1061 /// Extract displacement from a thumb b/bl/blx instruction.
1062 template <typename P
>
1063 int32_t StubOptimizer
<P
>::getDisplacementFromThumbBranch(uint32_t instruction
, uint32_t instrAddr
)
1065 bool is_blx
= ((instruction
& 0xD000F800) == 0xC000F000);
1066 uint32_t s
= (instruction
>> 10) & 0x1;
1067 uint32_t j1
= (instruction
>> 29) & 0x1;
1068 uint32_t j2
= (instruction
>> 27) & 0x1;
1069 uint32_t imm10
= instruction
& 0x3FF;
1070 uint32_t imm11
= (instruction
>> 16) & 0x7FF;
1071 uint32_t i1
= (j1
== s
);
1072 uint32_t i2
= (j2
== s
);
1073 uint32_t dis
= (s
<< 24) | (i1
<< 23) | (i2
<< 22) | (imm10
<< 12) | (imm11
<< 1);
1075 int32_t result
= s
? (sdis
| 0xFE000000) : sdis
;
1076 if ( is_blx
&& (instrAddr
& 0x2) ) {
1077 // The thumb blx instruction always has low bit of imm11 as zero. The way
1078 // a 2-byte aligned blx can branch to a 4-byte aligned ARM target is that
1079 // the blx instruction always 4-byte aligns the pc before adding the
1080 // displacement from the blx. We must emulate that when decoding this.
1086 /// Update a thumb b/bl/blx instruction, switching bl <-> blx as needed.
1087 template <typename P
>
1088 uint32_t StubOptimizer
<P
>::setDisplacementInThumbBranch(uint32_t instruction
, uint32_t instrAddr
,
1089 int32_t displacement
, bool targetIsThumb
) {
1090 if ( (displacement
> 16777214) || (displacement
< (-16777216)) ) {
1091 _diagnostics
.error("thumb branch out of range at 0x%0X in %s", instrAddr
, _installName
);
1094 bool is_bl
= ((instruction
& 0xD000F800) == 0xD000F000);
1095 bool is_blx
= ((instruction
& 0xD000F800) == 0xC000F000);
1096 bool is_b
= ((instruction
& 0xD000F800) == 0x9000F000);
1097 uint32_t newInstruction
= (instruction
& 0xD000F800);
1098 if (is_bl
|| is_blx
) {
1099 if (targetIsThumb
) {
1100 newInstruction
= 0xD000F000; // Use bl
1103 newInstruction
= 0xC000F000; // Use blx
1104 // See note in getDisplacementFromThumbBranch() about blx.
1105 if (instrAddr
& 0x2)
1110 if ( !targetIsThumb
) {
1111 _diagnostics
.error("no pc-rel thumb branch instruction that switches to arm mode at 0x%0X in %s", instrAddr
, _installName
);
1116 _diagnostics
.error("not b/bl/blx at 0x%0X in %s", instrAddr
, _installName
);
1119 uint32_t s
= (uint32_t)(displacement
>> 24) & 0x1;
1120 uint32_t i1
= (uint32_t)(displacement
>> 23) & 0x1;
1121 uint32_t i2
= (uint32_t)(displacement
>> 22) & 0x1;
1122 uint32_t imm10
= (uint32_t)(displacement
>> 12) & 0x3FF;
1123 uint32_t imm11
= (uint32_t)(displacement
>> 1) & 0x7FF;
1124 uint32_t j1
= (i1
== s
);
1125 uint32_t j2
= (i2
== s
);
1126 uint32_t nextDisp
= (j1
<< 13) | (j2
<< 11) | imm11
;
1127 uint32_t firstDisp
= (s
<< 10) | imm10
;
1128 newInstruction
|= (nextDisp
<< 16) | firstDisp
;
1129 return newInstruction
;
1133 template <typename P
>
1134 void StubOptimizer
<P
>::optimizeArmCallSites()
1136 forEachCallSiteToAStub([&](uint8_t kind
, uint64_t callSiteAddr
, uint64_t stubAddr
, uint32_t& instruction
) -> bool {
1137 if ( kind
== DYLD_CACHE_ADJ_V2_THUMB_BR22
) {
1138 bool is_bl
= ((instruction
& 0xD000F800) == 0xD000F000);
1139 bool is_blx
= ((instruction
& 0xD000F800) == 0xC000F000);
1140 bool is_b
= ((instruction
& 0xD000F800) == 0x9000F000);
1141 if ( !is_bl
&& !is_blx
&& !is_b
){
1142 _diagnostics
.warning("non-branch instruction at 0x%0llX in %s", callSiteAddr
, _installName
);
1145 int32_t brDelta
= getDisplacementFromThumbBranch(instruction
, (uint32_t)callSiteAddr
);
1146 pint_t targetAddr
= (pint_t
)callSiteAddr
+ 4 + brDelta
;
1147 if ( targetAddr
!= stubAddr
) {
1148 _diagnostics
.warning("stub target mismatch at callsite 0x%0llX in %s", callSiteAddr
, _installName
);
1151 // ignore branch if not to a known stub
1152 const auto& pos
= _stubAddrToLPAddr
.find(targetAddr
);
1153 if ( pos
== _stubAddrToLPAddr
.end() )
1155 // ignore branch if lazy pointer is not known (could be resolver based)
1156 pint_t lpAddr
= pos
->second
;
1157 const auto& pos2
= _lpAddrToTargetAddr
.find(lpAddr
);
1158 if ( pos2
== _lpAddrToTargetAddr
.end() )
1160 uint64_t finalTargetAddr
= pos2
->second
;
1161 int64_t deltaToFinalTarget
= finalTargetAddr
- (callSiteAddr
+ 4);
1162 // if final target within range, change to branch there directly
1163 if ( (deltaToFinalTarget
> -b16MegLimit
) && (deltaToFinalTarget
< b16MegLimit
) ) {
1164 bool targetIsThumb
= finalTargetAddr
& 1;
1165 instruction
= setDisplacementInThumbBranch(instruction
, (uint32_t)callSiteAddr
, (int32_t)deltaToFinalTarget
, targetIsThumb
);
1166 if (_diagnostics
.hasError())
1168 _branchesDirectCount
++;
1172 else if ( kind
== DYLD_CACHE_ADJ_V2_ARM_BR24
) {
1173 // too few of these to be worth trying to optimize
1178 if (_diagnostics
.hasError())
1183 template <typename P
>
1184 void StubOptimizer
<P
>::optimizeArmStubs()
1186 for (const auto& stubEntry
: _stubAddrToLPAddr
) {
1187 pint_t stubVMAddr
= stubEntry
.first
;
1188 pint_t lpVMAddr
= stubEntry
.second
;
1189 const auto& pos
= _lpAddrToTargetAddr
.find(lpVMAddr
);
1190 if ( pos
== _lpAddrToTargetAddr
.end() )
1192 pint_t targetVMAddr
= pos
->second
;
1194 int32_t delta
= (int32_t)(targetVMAddr
- (stubVMAddr
+ 12));
1195 uint32_t* stubInstructions
= (uint32_t*)((uint8_t*)(long)stubVMAddr
+ _cacheSlide
);
1196 assert(stubInstructions
[0] == 0xe59fc004);
1197 stubInstructions
[0] = 0xe59fc000; // ldr ip, L0
1198 stubInstructions
[1] = 0xe08ff00c; // add pc, pc, ip
1199 stubInstructions
[2] = delta
; // L0: .long xxxx
1200 stubInstructions
[3] = 0xe7ffdefe; // trap
1201 _stubOptimizedCount
++;
1208 template <typename P
>
1209 void StubOptimizer
<P
>::optimizeArm64CallSites(std::vector
<BranchPoolDylib
<P
>*>& branchIslandPools
)
1211 forEachCallSiteToAStub([&](uint8_t kind
, uint64_t callSiteAddr
, uint64_t stubAddr
, uint32_t& instruction
) -> bool {
1212 if ( kind
!= DYLD_CACHE_ADJ_V2_ARM64_BR26
)
1214 // skip all but BL or B
1215 if ( (instruction
& 0x7C000000) != 0x14000000 )
1217 // compute target of branch instruction
1218 int32_t brDelta
= (instruction
& 0x03FFFFFF) << 2;
1219 if ( brDelta
& 0x08000000 )
1220 brDelta
|= 0xF0000000;
1221 uint64_t targetAddr
= callSiteAddr
+ (int64_t)brDelta
;
1222 if ( targetAddr
!= stubAddr
) {
1223 _diagnostics
.warning("stub target mismatch");
1226 // ignore branch if not to a known stub
1227 const auto& pos
= _stubAddrToLPAddr
.find((pint_t
)targetAddr
);
1228 if ( pos
== _stubAddrToLPAddr
.end() )
1230 // ignore branch if lazy pointer is not known (could be resolver based)
1231 uint64_t lpAddr
= pos
->second
;
1232 const auto& pos2
= _lpAddrToTargetAddr
.find((pint_t
)lpAddr
);
1233 if ( pos2
== _lpAddrToTargetAddr
.end() )
1235 uint64_t finalTargetAddr
= pos2
->second
;
1236 int64_t deltaToFinalTarget
= finalTargetAddr
- callSiteAddr
;
1237 // if final target within range, change to branch there directly
1238 if ( (deltaToFinalTarget
> -b128MegLimit
) && (deltaToFinalTarget
< b128MegLimit
) ) {
1239 instruction
= (instruction
& 0xFC000000) | ((deltaToFinalTarget
>> 2) & 0x03FFFFFF);
1240 _branchesDirectCount
++;
1243 // find closest branch island pool between instruction and target and get island
1244 const auto& pos3
= _targetAddrToName
.find((pint_t
)finalTargetAddr
);
1245 if ( pos3
== _targetAddrToName
.end() )
1247 const char* targetName
= pos3
->second
;
1248 if ( finalTargetAddr
> callSiteAddr
) {
1249 // target is after branch so find first pool after branch
1250 for ( BranchPoolDylib
<P
>* pool
: branchIslandPools
) {
1251 if ( (pool
->addr() > callSiteAddr
) && (pool
->addr() < finalTargetAddr
) ) {
1252 uint64_t brIslandAddr
= pool
->getForwardBranch(finalTargetAddr
, targetName
, branchIslandPools
);
1253 if ( brIslandAddr
== 0 ) {
1254 // branch island pool full
1255 _diagnostics
.warning("pool full. Can't optimizer branch to %s from 0x%llX in %s\n", targetName
, callSiteAddr
, _installName
);
1258 int64_t deltaToTarget
= brIslandAddr
- callSiteAddr
;
1259 instruction
= (instruction
& 0xFC000000) | ((deltaToTarget
>> 2) & 0x03FFFFFF);
1260 _branchesIslandCount
++;
1266 // target is before branch so find closest pool before branch
1267 for (size_t j
= branchIslandPools
.size(); j
> 0; --j
) {
1268 BranchPoolDylib
<P
>* pool
= branchIslandPools
[j
-1];
1269 if ( (pool
->addr() < callSiteAddr
) && (pool
->addr() > finalTargetAddr
) ) {
1270 uint64_t brIslandAddr
= pool
->getBackBranch(finalTargetAddr
, targetName
, branchIslandPools
);
1271 if ( brIslandAddr
== 0 ) {
1272 // branch island pool full
1273 _diagnostics
.warning("pool full. Can't optimizer branch to %s from 0x%llX in %s\n", targetName
, callSiteAddr
, _installName
);
1276 int64_t deltaToTarget
= brIslandAddr
- callSiteAddr
;
1277 instruction
= (instruction
& 0xFC000000) | ((deltaToTarget
>> 2) & 0x03FFFFFF);
1278 _branchesIslandCount
++;
1285 if (_diagnostics
.hasError())
1290 template <typename P
>
1291 void StubOptimizer
<P
>::optimizeCallSites(std::vector
<BranchPoolDylib
<P
>*>& branchIslandPools
)
1293 if ( _textSection
== NULL
)
1295 if ( _stubSection
== NULL
)
1299 switch ( _mh
->cputype() ) {
1300 case CPU_TYPE_ARM64
:
1301 case CPU_TYPE_ARM64_32
:
1302 optimizeArm64CallSites(branchIslandPools
);
1304 _diagnostics
.verbose("%5u branches in __text, %5u changed to direct branches, %5u changed to use islands for %s\n",
1305 _branchesCount
, _branchesDirectCount
, _branchesIslandCount
, _installName
);
1309 optimizeArmCallSites();
1312 _diagnostics
.verbose("%3u of %3u stubs optimized. %5u branches in __text, %5u changed to direct branches for %s\n",
1313 _stubOptimizedCount
, _stubCount
, _branchesCount
, _branchesDirectCount
, _installName
);
1319 template <typename P
>
1320 void bypassStubs(DyldSharedCache
* cache
, const std::string
& archName
, const std::vector
<uint64_t>& branchPoolStartAddrs
,
1321 uint64_t branchPoolsLinkEditStartAddr
, uint64_t branchPoolsLinkEditStartFileOffset
,
1322 const char* const neverStubEliminateDylibs
[], Diagnostics
& diags
)
1324 diags
.verbose("Stub elimination optimization:\n");
1326 // construct a StubOptimizer for each image
1327 __block
std::vector
<StubOptimizer
<P
>*> optimizers
;
1328 cache
->forEachImage(^(const mach_header
* mh
, const char* installName
) {
1329 optimizers
.push_back(new StubOptimizer
<P
>(cache
, (macho_header
<P
>*)mh
, diags
));
1332 // construct a BranchPoolDylib for each pool
1333 std::vector
<BranchPoolDylib
<P
>*> pools
;
1335 if ( startsWith(archName
, "arm64") ) {
1336 // Find hole at end of linkedit region for branch pool linkedits
1337 __block
uint64_t textRegionStartAddr
= 0;
1338 __block
uint64_t linkEditRegionStartAddr
= 0;
1339 __block
uint64_t linkEditRegionEndAddr
= 0;
1340 __block
uint64_t linkEditRegionStartCacheOffset
= 0;
1341 cache
->forEachRegion(^(const void* content
, uint64_t vmAddr
, uint64_t size
, uint32_t permissions
) {
1342 if ( permissions
== (PROT_READ
|PROT_EXEC
) ) {
1343 textRegionStartAddr
= vmAddr
;
1345 else if ( permissions
== PROT_READ
) {
1346 linkEditRegionStartAddr
= vmAddr
;
1347 linkEditRegionEndAddr
= vmAddr
+ size
;
1348 linkEditRegionStartCacheOffset
= (char*)content
- (char*)cache
;
1351 __block
uint64_t lastLinkEditRegionUsedOffset
= 0;
1352 cache
->forEachImage(^(const mach_header
* mh
, const char* installName
) {
1353 ((dyld3::MachOFile
*)mh
)->forEachSegment(^(const dyld3::MachOFile::SegmentInfo
& info
, bool &stop
) {
1354 if ( strcmp(info
.segName
, "__LINKEDIT") == 0 ) {
1355 if ( info
.fileOffset
>= lastLinkEditRegionUsedOffset
)
1356 lastLinkEditRegionUsedOffset
= info
.fileOffset
+ info
.vmSize
;
1360 uint64_t allPoolsLinkEditStartAddr
= branchPoolsLinkEditStartAddr
;
1361 if ( !branchPoolStartAddrs
.empty() ) {
1362 uint64_t poolLinkEditStartAddr
= allPoolsLinkEditStartAddr
;
1363 uint64_t poolLinkEditFileOffset
= branchPoolsLinkEditStartFileOffset
;
1364 const uint64_t poolSize
= branchPoolLinkEditSize("arm64");
1365 for (uint64_t poolAddr
: branchPoolStartAddrs
) {
1366 pools
.push_back(new BranchPoolDylib
<P
>(cache
, poolAddr
, textRegionStartAddr
, poolLinkEditStartAddr
, poolLinkEditFileOffset
, diags
));
1367 poolLinkEditStartAddr
+= poolSize
;
1368 poolLinkEditFileOffset
+= poolSize
;
1373 // build set of functions to never stub-eliminate because tools may need to override them
1374 std::unordered_set
<std::string
> neverStubEliminate
;
1375 for (const char** p
=sNeverStubEliminateSymbols
; *p
!= nullptr; ++p
) {
1376 neverStubEliminate
.insert(*p
);
1378 for (const char* const* d
=neverStubEliminateDylibs
; *d
!= nullptr; ++d
) {
1379 for (StubOptimizer
<P
>* op
: optimizers
) {
1380 if ( strcmp(op
->installName(), *d
) == 0 ) {
1382 const uint8_t* exportsStart
= op
->exportsTrie();
1383 const uint8_t* exportsEnd
= exportsStart
+ op
->exportsTrieSize();
1384 std::vector
<ExportInfoTrie::Entry
> exports
;
1385 if ( !ExportInfoTrie::parseTrie(exportsStart
, exportsEnd
, exports
) ) {
1386 diags
.error("malformed exports trie in %s", *d
);
1389 for(const ExportInfoTrie::Entry
& entry
: exports
) {
1390 neverStubEliminate
.insert(entry
.name
);
1396 // build maps of stubs-to-lp and lp-to-target
1397 for (StubOptimizer
<P
>* op
: optimizers
)
1398 op
->buildStubMap(neverStubEliminate
);
1400 // optimize call sites to by-pass stubs or jump through island
1401 for (StubOptimizer
<P
>* op
: optimizers
)
1402 op
->optimizeCallSites(pools
);
1404 // final fix ups in branch pools
1405 for (BranchPoolDylib
<P
>* pool
: pools
) {
1406 pool
->finalizeLoadCommands();
1410 // write total optimization info
1411 uint32_t callSiteCount
= 0;
1412 uint32_t callSiteDirectOptCount
= 0;
1413 uint32_t callSiteOneHopOptCount
= 0;
1414 for (StubOptimizer
<P
>* op
: optimizers
) {
1415 callSiteCount
+= op
->_branchesCount
;
1416 callSiteDirectOptCount
+= op
->_branchesDirectCount
;
1417 callSiteOneHopOptCount
+= op
->_branchesIslandCount
;
1419 diags
.verbose(" cache contains %u call sites of which %u were direct bound and %u were bound through islands\n", callSiteCount
, callSiteDirectOptCount
, callSiteOneHopOptCount
);
1422 for (StubOptimizer
<P
>* op
: optimizers
)
1424 for (BranchPoolDylib
<P
>* p
: pools
)
1429 void CacheBuilder::optimizeAwayStubs(const std::vector
<uint64_t>& branchPoolStartAddrs
, uint64_t branchPoolsLinkEditStartAddr
)
1431 DyldSharedCache
* dyldCache
= (DyldSharedCache
*)_readExecuteRegion
.buffer
;
1432 uint64_t branchPoolsLinkEditStartFileOffset
= _readOnlyRegion
.cacheFileOffset
+ branchPoolsLinkEditStartAddr
- _readOnlyRegion
.unslidLoadAddress
;
1433 std::string archName
= dyldCache
->archName();
1434 #if SUPPORT_ARCH_arm64_32
1435 if ( startsWith(archName
, "arm64_32") )
1436 bypassStubs
<Pointer32
<LittleEndian
> >(dyldCache
, archName
, branchPoolStartAddrs
, branchPoolsLinkEditStartAddr
, branchPoolsLinkEditStartFileOffset
, _s_neverStubEliminate
, _diagnostics
);
1439 if ( startsWith(archName
, "arm64") )
1440 bypassStubs
<Pointer64
<LittleEndian
> >(dyldCache
, archName
, branchPoolStartAddrs
, branchPoolsLinkEditStartAddr
, branchPoolsLinkEditStartFileOffset
, _s_neverStubEliminate
, _diagnostics
);
1441 else if ( archName
== "armv7k" )
1442 bypassStubs
<Pointer32
<LittleEndian
>>(dyldCache
, archName
, branchPoolStartAddrs
, branchPoolsLinkEditStartAddr
, branchPoolsLinkEditStartFileOffset
, _s_neverStubEliminate
, _diagnostics
);
1443 // no stub optimization done for other arches
1448 template <typename P>
1449 void StubOptimizer<P>::optimizeStubs(std::unordered_map<uint64_t,std::vector<uint64_t>>& targetToBranchIslands)
1451 for (const auto& stubEntry : _stubAddrToLPAddr) {
1452 pint_t stubVMAddr = stubEntry.first;
1453 pint_t lpVMAddr = stubEntry.second;
1454 const auto& pos = _lpAddrToTargetAddr.find(lpVMAddr);
1455 if ( pos == _lpAddrToTargetAddr.end() )
1457 pint_t targetVMAddr = pos->second;
1458 int64_t delta = targetVMAddr - stubVMAddr;
1459 if ( (delta > -b128MegLimit) && (delta < b128MegLimit) ) {
1460 // target within reach, change stub to direct branch
1461 uint32_t* stubInstructions = (uint32_t*)((uint8_t*)_cacheBuffer + _textSegCacheOffset + stubVMAddr -_textSegStartAddr);
1462 uint32_t stubInstr1 = E::get32(stubInstructions[0]);
1463 if ( (stubInstr1 & 0x9F00001F) != 0x90000010 ) {
1464 warning("first instruction of stub (0x%08X) is no longer ADRP for stub at addr 0x%0X in %s\n",
1465 stubInstr1, stubVMAddr, _installName);
1468 uint32_t directBranchInstr = 0x14000000 + ((delta/4) & 0x03FFFFFF);
1469 E::set32(stubInstructions[0], directBranchInstr);
1470 uint32_t brkInstr = 0xD4200000;
1471 E::set32(stubInstructions[1], brkInstr);
1472 E::set32(stubInstructions[2], brkInstr);
1473 _stubOptimizedCount++;
1474 targetToBranchIslands[targetVMAddr].push_back(stubVMAddr);
1477 verboseLog("%3u of %3u stubs optimized for %s\n", _stubOptimizedCount, _stubCount, _installName);
1481 template <typename P>
1482 void StubOptimizer<P>::bypassStubs(std::unordered_map<uint64_t,std::vector<uint64_t>>& targetToBranchIslands)
1484 if ( _textSection == NULL )
1487 // scan __text section looking for B(L) instructions that branch to a stub
1488 unsigned instructionCount = (unsigned)(_textSection->size() / 4);
1489 uint32_t* instructions = (uint32_t*)((uint8_t*)_cacheBuffer + _textSegCacheOffset + _textSection->addr() -_textSegStartAddr);
1490 for (unsigned i=0; i < instructionCount; ++i) {
1491 uint32_t instr = E::get32(instructions[i]);
1492 // skip all but BL or B
1493 if ( (instr & 0x7C000000) != 0x14000000 )
1495 // compute target of branch instruction
1496 int32_t brDelta = (instr & 0x03FFFFFF) << 2;
1497 if ( brDelta & 0x08000000 )
1498 brDelta |= 0xF0000000;
1499 uint64_t branchAddr = _textSection->addr() + i*4;
1500 uint64_t targetAddr = branchAddr + (int64_t)brDelta;
1501 // ignore branch if not to a known stub
1502 const auto& pos = _stubAddrToLPAddr.find(targetAddr);
1503 if ( pos == _stubAddrToLPAddr.end() )
1506 // ignore branch if lazy pointer is not known (could be resolver based)
1507 const auto& pos2 = _lpAddrToTargetAddr.find(pos->second);
1508 if ( pos2 == _lpAddrToTargetAddr.end() )
1510 uint64_t finalTargetAddr = pos2->second;
1511 int64_t deltaToFinalTarget = finalTargetAddr - branchAddr;
1512 // if final target within range, change to branch there directly
1513 if ( (deltaToFinalTarget > -b128MegLimit) && (deltaToFinalTarget < b128MegLimit) ) {
1514 uint32_t newInstr = (instr & 0xFC000000) | ((deltaToFinalTarget >> 2) & 0x03FFFFFF);
1515 E::set32(instructions[i], newInstr);
1516 _branchesDirectCount++;
1519 // see if there is an existing branch island in range that can be used
1520 std::vector<uint64_t>& existingBranchIslands = targetToBranchIslands[finalTargetAddr];
1521 for (uint64_t branchIslandAddr : existingBranchIslands) {
1522 int64_t deltaToBranchIsland = branchIslandAddr - branchAddr;
1523 // if final target within range, change to branch deltaToBranchIsland directly
1524 if ( (deltaToBranchIsland > -b128MegLimit) && (deltaToFinalTarget < b128MegLimit) ) {
1525 uint32_t newInstr = (instr & 0xFC000000) | ((deltaToBranchIsland >> 2) & 0x03FFFFFF);
1526 E::set32(instructions[i], newInstr);
1527 _branchesIslandCount++;
1533 verboseLog("%5u branches in __text, %5u changed to direct branches, %5u changed to indirect for %s\n",
1534 _branchesCount, _branchesDirectCount, _branchesIslandCount, _installName);