1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*-
3 * Copyright (c) 2015 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
26 #include <sys/types.h>
33 #include <CommonCrypto/CommonDigest.h>
36 #include <unordered_map>
37 #include <unordered_set>
39 #include "StringUtils.h"
41 #include "MachOFileAbstraction.hpp"
42 #include "MachOAnalyzer.h"
43 #include "Diagnostics.h"
44 #include "DyldSharedCache.h"
45 #include "CacheBuilder.h"
47 static const bool verbose
= false;
55 StubOptimizer(const DyldSharedCache
* cache
, macho_header
<P
>* mh
, Diagnostics
& diags
);
56 void buildStubMap(const std::unordered_set
<std::string
>& neverStubEliminate
);
58 void optimizeCallSites(std::unordered_map
<uint64_t, uint64_t>& targetAddrToOptStubAddr
);
59 const char* installName() { return _installName
; }
60 const uint8_t* exportsTrie() {
61 if ( _dyldInfo
!= nullptr )
62 return &_linkeditBias
[_dyldInfo
->export_off()];
64 return &_linkeditBias
[_exportTrie
->dataoff()];
66 uint32_t exportsTrieSize() {
67 if ( _dyldInfo
!= nullptr )
68 return _dyldInfo
->export_size();
70 return _exportTrie
->datasize();
73 uint32_t _stubCount
= 0;
74 uint32_t _stubOptimizedCount
= 0;
75 uint32_t _stubsLeftInterposable
= 0;
76 uint32_t _branchToStubCount
= 0;
77 uint32_t _branchOptimizedToDirectCount
= 0;
78 uint32_t _branchToOptimizedStubCount
= 0;
79 uint32_t _branchToReUsedOptimizedStubCount
= 0;
82 Diagnostics _diagnostics
;
84 typedef std::function
<bool(uint8_t callSiteKind
, uint64_t callSiteAddr
, uint64_t stubAddr
, uint32_t& instruction
)> CallSiteHandler
;
85 typedef typename
P::uint_t pint_t
;
86 typedef typename
P::E E
;
88 void forEachCallSiteToAStub(CallSiteHandler
);
89 void optimizeArm64CallSites(std::unordered_map
<uint64_t, uint64_t>& targetAddrToOptStubAddr
);
90 void optimizeArm64Stubs();
91 #if SUPPORT_ARCH_arm64e
92 void optimizeArm64eStubs();
94 #if SUPPORT_ARCH_arm64_32
95 void optimizeArm64_32Stubs();
97 void optimizeArmCallSites(std::unordered_map
<uint64_t, uint64_t>& targetAddrToOptStubAddr
);
98 void optimizeArmStubs();
99 uint64_t lazyPointerAddrFromArm64Stub(const uint8_t* stubInstructions
, uint64_t stubVMAddr
);
100 #if SUPPORT_ARCH_arm64e
101 uint64_t lazyPointerAddrFromArm64eStub(const uint8_t* stubInstructions
, uint64_t stubVMAddr
);
103 #if SUPPORT_ARCH_arm64_32
104 uint64_t lazyPointerAddrFromArm64_32Stub(const uint8_t* stubInstructions
, uint64_t stubVMAddr
);
106 uint32_t lazyPointerAddrFromArmStub(const uint8_t* stubInstructions
, uint32_t stubVMAddr
);
107 int32_t getDisplacementFromThumbBranch(uint32_t instruction
, uint32_t instrAddr
);
108 uint32_t setDisplacementInThumbBranch(uint32_t instruction
, uint32_t instrAddr
,
109 int32_t displacement
, bool targetIsThumb
);
112 struct AddressAndName
{ pint_t targetVMAddr
; const char* targetName
; };
113 typedef std::unordered_map
<pint_t
, AddressAndName
> StubVMAddrToTarget
;
115 static const int64_t b128MegLimit
= 0x07FFFFFF;
116 static const int64_t b16MegLimit
= 0x00FFFFFF;
120 macho_header
<P
>* _mh
;
121 int64_t _cacheSlide
= 0;
122 uint64_t _cacheUnslideAddr
= 0;
123 bool _chainedFixups
= false;
124 uint32_t _linkeditSize
= 0;
125 uint64_t _linkeditAddr
= 0;
126 const uint8_t* _linkeditBias
= nullptr;
127 const char* _installName
= nullptr;
128 const macho_symtab_command
<P
>* _symTabCmd
= nullptr;
129 const macho_dysymtab_command
<P
>* _dynSymTabCmd
= nullptr;
130 const macho_dyld_info_command
<P
>* _dyldInfo
= nullptr;
131 const macho_linkedit_data_command
<P
>* _exportTrie
= nullptr;
132 macho_linkedit_data_command
<P
>* _splitSegInfoCmd
= nullptr;
133 const macho_section
<P
>* _textSection
= nullptr;
134 const macho_section
<P
>* _stubSection
= nullptr;
135 uint32_t _textSectionIndex
= 0;
136 uint32_t _stubSectionIndex
= 0;
137 pint_t _textSegStartAddr
= 0;
138 std::vector
<macho_segment_command
<P
>*> _segCmds
;
139 std::unordered_map
<pint_t
, pint_t
> _stubAddrToLPAddr
;
140 std::unordered_map
<pint_t
, pint_t
> _lpAddrToTargetAddr
;
141 std::unordered_map
<pint_t
, const char*> _targetAddrToName
;
142 std::unordered_set
<uint64_t> _stubsToOptimize
;
146 template <typename P
>
147 StubOptimizer
<P
>::StubOptimizer(const DyldSharedCache
* cache
, macho_header
<P
>* mh
, Diagnostics
& diags
)
148 : _mh(mh
), _diagnostics(diags
)
150 _cacheSlide
= (long)cache
- cache
->unslidLoadAddress();
151 _cacheUnslideAddr
= cache
->unslidLoadAddress();
152 #if SUPPORT_ARCH_arm64e
153 _chainedFixups
= (strcmp(cache
->archName(), "arm64e") == 0);
155 _chainedFixups
= false;
157 const macho_load_command
<P
>* const cmds
= (macho_load_command
<P
>*)((uint8_t*)mh
+ sizeof(macho_header
<P
>));
158 const uint32_t cmd_count
= mh
->ncmds();
159 macho_segment_command
<P
>* segCmd
;
160 uint32_t sectionIndex
= 0;
161 const macho_load_command
<P
>* cmd
= cmds
;
162 for (uint32_t i
= 0; i
< cmd_count
; ++i
) {
163 switch (cmd
->cmd()) {
165 _installName
= ((macho_dylib_command
<P
>*)cmd
)->name();
168 _symTabCmd
= (macho_symtab_command
<P
>*)cmd
;
171 _dynSymTabCmd
= (macho_dysymtab_command
<P
>*)cmd
;
173 case LC_SEGMENT_SPLIT_INFO
:
174 _splitSegInfoCmd
= (macho_linkedit_data_command
<P
>*)cmd
;
177 case LC_DYLD_INFO_ONLY
:
178 _dyldInfo
= (macho_dyld_info_command
<P
>*)cmd
;
180 case LC_DYLD_EXPORTS_TRIE
:
181 _exportTrie
= (macho_linkedit_data_command
<P
>*)cmd
;
183 case macho_segment_command
<P
>::CMD
:
184 segCmd
=( macho_segment_command
<P
>*)cmd
;
185 _segCmds
.push_back(segCmd
);
186 if ( strcmp(segCmd
->segname(), "__LINKEDIT") == 0 ) {
187 _linkeditBias
= (uint8_t*)(segCmd
->vmaddr() + _cacheSlide
- segCmd
->fileoff());
188 _linkeditSize
= (uint32_t)segCmd
->vmsize();
189 _linkeditAddr
= segCmd
->vmaddr();
191 else if ( strcmp(segCmd
->segname(), "__TEXT") == 0 ) {
192 _textSegStartAddr
= (pint_t
)segCmd
->vmaddr();
193 const macho_section
<P
>* const sectionsStart
= (macho_section
<P
>*)((char*)segCmd
+ sizeof(macho_segment_command
<P
>));
194 const macho_section
<P
>* const sectionsEnd
= §ionsStart
[segCmd
->nsects()];
195 for (const macho_section
<P
>* sect
= sectionsStart
; sect
< sectionsEnd
; ++sect
) {
197 if ( strcmp(sect
->sectname(), "__text") == 0 ) {
199 _textSectionIndex
= sectionIndex
;
201 else if ( ((sect
->flags() & SECTION_TYPE
) == S_SYMBOL_STUBS
) && (sect
->size() != 0) ) {
203 _stubSectionIndex
= sectionIndex
;
209 cmd
= (const macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmd
->cmdsize());
215 template <typename P
>
216 uint32_t StubOptimizer
<P
>::lazyPointerAddrFromArmStub(const uint8_t* stubInstructions
, uint32_t stubVMAddr
)
218 uint32_t stubInstr1
= E::get32(*(uint32_t*)stubInstructions
);
219 uint32_t stubInstr2
= E::get32(*(uint32_t*)(stubInstructions
+4));
220 uint32_t stubInstr3
= E::get32(*(uint32_t*)(stubInstructions
+8));
221 int32_t stubData
= E::get32(*(uint32_t*)(stubInstructions
+12));
222 if ( stubInstr1
!= 0xe59fc004 ) {
223 _diagnostics
.warning("first instruction of stub (0x%08X) is not 'ldr ip, pc + 12' for stub at addr 0x%0llX in %s",
224 stubInstr1
, (uint64_t)stubVMAddr
, _installName
);
227 if ( stubInstr2
!= 0xe08fc00c ) {
228 _diagnostics
.warning("second instruction of stub (0x%08X) is not 'add ip, pc, ip' for stub at addr 0x%0llX in %s",
229 stubInstr1
, (uint64_t)stubVMAddr
, _installName
);
232 if ( stubInstr3
!= 0xe59cf000 ) {
233 _diagnostics
.warning("third instruction of stub (0x%08X) is not 'ldr pc, [ip]' for stub at addr 0x%0llX in %s",
234 stubInstr1
, (uint64_t)stubVMAddr
, _installName
);
237 return stubVMAddr
+ 12 + stubData
;
241 template <typename P
>
242 uint64_t StubOptimizer
<P
>::lazyPointerAddrFromArm64Stub(const uint8_t* stubInstructions
, uint64_t stubVMAddr
)
244 uint32_t stubInstr1
= E::get32(*(uint32_t*)stubInstructions
);
245 if ( (stubInstr1
& 0x9F00001F) != 0x90000010 ) {
246 _diagnostics
.warning("first instruction of stub (0x%08X) is not ADRP for stub at addr 0x%0llX in %s",
247 stubInstr1
, (uint64_t)stubVMAddr
, _installName
);
250 int32_t adrpValue
= ((stubInstr1
& 0x00FFFFE0) >> 3) | ((stubInstr1
& 0x60000000) >> 29);
251 if ( stubInstr1
& 0x00800000 )
252 adrpValue
|= 0xFFF00000;
253 uint32_t stubInstr2
= E::get32(*(uint32_t*)(stubInstructions
+ 4));
254 if ( (stubInstr2
& 0xFFC003FF) != 0xF9400210 ) {
255 _diagnostics
.warning("second instruction of stub (0x%08X) is not LDR for stub at addr 0x%0llX in %s",
256 stubInstr2
, (uint64_t)stubVMAddr
, _installName
);
259 uint32_t ldrValue
= ((stubInstr2
>> 10) & 0x00000FFF);
260 return (stubVMAddr
& (-4096)) + adrpValue
*4096 + ldrValue
*8;
263 #if SUPPORT_ARCH_arm64_32
264 template <typename P
>
265 uint64_t StubOptimizer
<P
>::lazyPointerAddrFromArm64_32Stub(const uint8_t* stubInstructions
, uint64_t stubVMAddr
)
267 uint32_t stubInstr1
= E::get32(*(uint32_t*)stubInstructions
);
268 if ( (stubInstr1
& 0x9F00001F) != 0x90000010 ) {
269 _diagnostics
.warning("first instruction of stub (0x%08X) is not ADRP for stub at addr 0x%0llX in %s",
270 stubInstr1
, (uint64_t)stubVMAddr
, _installName
);
273 int32_t adrpValue
= ((stubInstr1
& 0x00FFFFE0) >> 3) | ((stubInstr1
& 0x60000000) >> 29);
274 if ( stubInstr1
& 0x00800000 )
275 adrpValue
|= 0xFFF00000;
276 uint32_t stubInstr2
= E::get32(*(uint32_t*)(stubInstructions
+ 4));
277 if ( (stubInstr2
& 0xFFC003FF) != 0xB9400210 ) {
278 _diagnostics
.warning("second instruction of stub (0x%08X) is not LDR for stub at addr 0x%0llX in %s",
279 stubInstr2
, (uint64_t)stubVMAddr
, _installName
);
282 uint32_t ldrValue
= ((stubInstr2
>> 10) & 0x00000FFF);
283 return (stubVMAddr
& (-4096)) + adrpValue
*4096 + ldrValue
*4; // LDR Wn has a scale factor of 4
289 #if SUPPORT_ARCH_arm64e
290 template <typename P
>
291 uint64_t StubOptimizer
<P
>::lazyPointerAddrFromArm64eStub(const uint8_t* stubInstructions
, uint64_t stubVMAddr
)
293 uint32_t stubInstr1
= E::get32(*(uint32_t*)stubInstructions
);
294 // ADRP X17, dyld_mageLoaderCache@page
295 if ( (stubInstr1
& 0x9F00001F) != 0x90000011 ) {
296 _diagnostics
.warning("first instruction of stub (0x%08X) is not ADRP for stub at addr 0x%0llX in %s",
297 stubInstr1
, (uint64_t)stubVMAddr
, _installName
);
300 int32_t adrpValue
= ((stubInstr1
& 0x00FFFFE0) >> 3) | ((stubInstr1
& 0x60000000) >> 29);
301 if ( stubInstr1
& 0x00800000 )
302 adrpValue
|= 0xFFF00000;
304 // ADD X17, X17, dyld_mageLoaderCache@pageoff
305 uint32_t stubInstr2
= E::get32(*(uint32_t*)(stubInstructions
+ 4));
306 if ( (stubInstr2
& 0xFFC003FF) != 0x91000231 ) {
307 _diagnostics
.warning("second instruction of stub (0x%08X) is not ADD for stub at addr 0x%0llX in %s",
308 stubInstr2
, (uint64_t)stubVMAddr
, _installName
);
311 uint32_t addValue
= ((stubInstr2
& 0x003FFC00) >> 10);
314 uint32_t stubInstr3
= E::get32(*(uint32_t*)(stubInstructions
+ 8));
315 if ( stubInstr3
!= 0xF9400230 ) {
316 _diagnostics
.warning("second instruction of stub (0x%08X) is not LDR for stub at addr 0x%0llX in %s",
317 stubInstr2
, (uint64_t)stubVMAddr
, _installName
);
320 return (stubVMAddr
& (-4096)) + adrpValue
*4096 + addValue
;
325 template <typename P
>
326 void StubOptimizer
<P
>::buildStubMap(const std::unordered_set
<std::string
>& neverStubEliminate
)
328 // find all stubs and lazy pointers
329 const macho_nlist
<P
>* symbolTable
= (const macho_nlist
<P
>*)(&_linkeditBias
[_symTabCmd
->symoff()]);
330 const char* symbolStrings
= (char*)(&_linkeditBias
[_symTabCmd
->stroff()]);
331 const uint32_t* const indirectTable
= (uint32_t*)(&_linkeditBias
[_dynSymTabCmd
->indirectsymoff()]);
332 const macho_load_command
<P
>* const cmds
= (macho_load_command
<P
>*)((uint8_t*)_mh
+ sizeof(macho_header
<P
>));
333 const uint32_t cmd_count
= _mh
->ncmds();
334 const macho_load_command
<P
>* cmd
= cmds
;
335 for (uint32_t i
= 0; i
< cmd_count
; ++i
) {
336 if ( cmd
->cmd() == macho_segment_command
<P
>::CMD
) {
337 macho_segment_command
<P
>* seg
= (macho_segment_command
<P
>*)cmd
;
338 macho_section
<P
>* const sectionsStart
= (macho_section
<P
>*)((char*)seg
+ sizeof(macho_segment_command
<P
>));
339 macho_section
<P
>* const sectionsEnd
= §ionsStart
[seg
->nsects()];
340 for(macho_section
<P
>* sect
= sectionsStart
; sect
< sectionsEnd
; ++sect
) {
341 if ( sect
->size() == 0 )
343 unsigned sectionType
= (sect
->flags() & SECTION_TYPE
);
344 const uint32_t indirectTableOffset
= sect
->reserved1();
345 if ( sectionType
== S_SYMBOL_STUBS
) {
346 const uint32_t stubSize
= sect
->reserved2();
347 _stubCount
= (uint32_t)(sect
->size() / stubSize
);
348 pint_t stubVMAddr
= (pint_t
)sect
->addr();
349 for (uint32_t j
=0; j
< _stubCount
; ++j
, stubVMAddr
+= stubSize
) {
350 uint32_t symbolIndex
= E::get32(indirectTable
[indirectTableOffset
+ j
]);
351 switch ( symbolIndex
) {
352 case INDIRECT_SYMBOL_ABS
:
353 case INDIRECT_SYMBOL_LOCAL
:
354 case INDIRECT_SYMBOL_ABS
| INDIRECT_SYMBOL_LOCAL
:
357 if ( symbolIndex
>= _symTabCmd
->nsyms() ) {
358 _diagnostics
.warning("symbol index out of range (%d of %d) for stub at addr 0x%0llX in %s",
359 symbolIndex
, _symTabCmd
->nsyms(), (uint64_t)stubVMAddr
, _installName
);
362 const macho_nlist
<P
>* sym
= &symbolTable
[symbolIndex
];
363 uint32_t stringOffset
= sym
->n_strx();
364 if ( stringOffset
> _symTabCmd
->strsize() ) {
365 _diagnostics
.warning("symbol string offset out of range (%u of %u) for stub at addr 0x%0llX in %s",
366 stringOffset
, sym
->n_strx(), (uint64_t)stubVMAddr
, _installName
);
369 const char* symName
= &symbolStrings
[stringOffset
];
370 if ( neverStubEliminate
.count(symName
) ) {
371 //fprintf(stderr, "stubVMAddr=0x%llX, not bypassing stub to %s in %s because target is interposable\n", (uint64_t)stubVMAddr, symName, _installName);
372 _stubsLeftInterposable
++;
375 const uint8_t* stubInstrs
= (uint8_t*)(long)stubVMAddr
+ _cacheSlide
;
376 pint_t targetLPAddr
= 0;
377 switch ( _mh
->cputype() ) {
379 case CPU_TYPE_ARM64_32
:
380 #if SUPPORT_ARCH_arm64e
381 if (_mh
->cpusubtype() == CPU_SUBTYPE_ARM64E
)
382 targetLPAddr
= (pint_t
)lazyPointerAddrFromArm64eStub(stubInstrs
, stubVMAddr
);
385 #if SUPPORT_ARCH_arm64_32
386 if (_mh
->cputype() == CPU_TYPE_ARM64_32
)
387 targetLPAddr
= (pint_t
)lazyPointerAddrFromArm64_32Stub(stubInstrs
, stubVMAddr
);
390 targetLPAddr
= (pint_t
)lazyPointerAddrFromArm64Stub(stubInstrs
, stubVMAddr
);
393 targetLPAddr
= (pint_t
)lazyPointerAddrFromArmStub(stubInstrs
, (uint32_t)stubVMAddr
);
396 if ( targetLPAddr
!= 0 )
397 _stubAddrToLPAddr
[stubVMAddr
] = targetLPAddr
;
402 else if ( (sectionType
== S_LAZY_SYMBOL_POINTERS
) || (sectionType
== S_NON_LAZY_SYMBOL_POINTERS
) ) {
404 pint_t
* lpContent
= (pint_t
*)(sect
->addr() + _cacheSlide
);
405 uint32_t elementCount
= (uint32_t)(sect
->size() / sizeof(pint_t
));
406 uint64_t textSegStartAddr
= _segCmds
[0]->vmaddr();
407 uint64_t textSegEndAddr
= _segCmds
[0]->vmaddr() + _segCmds
[0]->vmsize();
409 for (uint32_t j
=0; j
< elementCount
; ++j
) {
410 uint32_t symbolIndex
= E::get32(indirectTable
[indirectTableOffset
+ j
]);
411 switch ( symbolIndex
) {
412 case INDIRECT_SYMBOL_ABS
:
413 case INDIRECT_SYMBOL_LOCAL
:
414 case INDIRECT_SYMBOL_LOCAL
|INDIRECT_SYMBOL_ABS
:
417 lpValue
= (pint_t
)P::getP(lpContent
[j
]);
419 // Fixup threaded rebase/bind
420 if ( _chainedFixups
) {
421 dyld3::MachOLoaded::ChainedFixupPointerOnDisk ptr
;
423 assert(ptr
.arm64e
.authRebase
.bind
== 0);
424 if ( ptr
.arm64e
.authRebase
.auth
) {
425 lpValue
= (pint_t
)(_cacheUnslideAddr
+ ptr
.arm64e
.authRebase
.target
);
428 lpValue
= (pint_t
)ptr
.arm64e
.unpackTarget();
432 lpVMAddr
= (pint_t
)sect
->addr() + j
* sizeof(pint_t
);
433 if ( symbolIndex
>= _symTabCmd
->nsyms() ) {
434 _diagnostics
.warning("symbol index out of range (%d of %d) for lazy pointer at addr 0x%0llX in %s",
435 symbolIndex
, _symTabCmd
->nsyms(), (uint64_t)lpVMAddr
, _installName
);
438 const macho_nlist
<P
>* sym
= &symbolTable
[symbolIndex
];
439 uint32_t stringOffset
= sym
->n_strx();
440 if ( stringOffset
> _symTabCmd
->strsize() ) {
441 _diagnostics
.warning("symbol string offset out of range (%u of %u) for lazy pointer at addr 0x%0llX in %s",
442 stringOffset
, sym
->n_strx(), (uint64_t)lpVMAddr
, _installName
);
445 const char* symName
= &symbolStrings
[stringOffset
];
446 if ( (lpValue
> textSegStartAddr
) && (lpValue
< textSegEndAddr
) ) {
447 //fprintf(stderr, "skipping lazy pointer at 0x%0lX to %s in %s because target is within dylib\n", (long)lpVMAddr, symName, _installName);
449 else if ( (sizeof(pint_t
) == 8) && ((lpValue
% 4) != 0) ) {
450 _diagnostics
.warning("lazy pointer at 0x%0llX does not point to 4-byte aligned address(0x%0llX) in %s",
451 (uint64_t)lpVMAddr
, (uint64_t)lpValue
, _installName
);
454 _lpAddrToTargetAddr
[lpVMAddr
] = lpValue
;
455 _targetAddrToName
[lpValue
] = symName
;
463 cmd
= (const macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmd
->cmdsize());
468 template <typename P
>
469 void StubOptimizer
<P
>::forEachCallSiteToAStub(CallSiteHandler handler
)
471 if (_diagnostics
.hasError())
473 const uint8_t* infoStart
= &_linkeditBias
[_splitSegInfoCmd
->dataoff()];
474 const uint8_t* infoEnd
= &infoStart
[_splitSegInfoCmd
->datasize()];
475 if ( *infoStart
++ != DYLD_CACHE_ADJ_V2_FORMAT
) {
476 _diagnostics
.error("malformed split seg info in %s", _installName
);
480 uint8_t* textSectionContent
= (uint8_t*)(_textSection
->addr() + _cacheSlide
);
482 // Whole :== <count> FromToSection+
483 // FromToSection :== <from-sect-index> <to-sect-index> <count> ToOffset+
484 // ToOffset :== <to-sect-offset-delta> <count> FromOffset+
485 // FromOffset :== <kind> <count> <from-sect-offset-delta>
486 const uint8_t* p
= infoStart
;
487 uint64_t sectionCount
= read_uleb128(p
, infoEnd
);
488 for (uint64_t i
=0; i
< sectionCount
; ++i
) {
489 uint64_t fromSectionIndex
= read_uleb128(p
, infoEnd
);
490 uint64_t toSectionIndex
= read_uleb128(p
, infoEnd
);
491 uint64_t toOffsetCount
= read_uleb128(p
, infoEnd
);
492 uint64_t toSectionOffset
= 0;
493 for (uint64_t j
=0; j
< toOffsetCount
; ++j
) {
494 uint64_t toSectionDelta
= read_uleb128(p
, infoEnd
);
495 uint64_t fromOffsetCount
= read_uleb128(p
, infoEnd
);
496 toSectionOffset
+= toSectionDelta
;
497 for (uint64_t k
=0; k
< fromOffsetCount
; ++k
) {
498 uint64_t kind
= read_uleb128(p
, infoEnd
);
500 _diagnostics
.error("bad kind (%llu) value in %s\n", kind
, _installName
);
502 uint64_t fromSectDeltaCount
= read_uleb128(p
, infoEnd
);
503 uint64_t fromSectionOffset
= 0;
504 for (uint64_t l
=0; l
< fromSectDeltaCount
; ++l
) {
505 uint64_t delta
= read_uleb128(p
, infoEnd
);
506 fromSectionOffset
+= delta
;
507 if ( (fromSectionIndex
== _textSectionIndex
) && (toSectionIndex
== _stubSectionIndex
) ) {
508 uint32_t* instrPtr
= (uint32_t*)(textSectionContent
+ fromSectionOffset
);
509 uint64_t instrAddr
= _textSection
->addr() + fromSectionOffset
;
510 uint64_t stubAddr
= _stubSection
->addr() + toSectionOffset
;
511 uint32_t instruction
= E::get32(*instrPtr
);
512 _branchToStubCount
++;
513 if ( handler(kind
, instrAddr
, stubAddr
, instruction
) ) {
514 E::set32(*instrPtr
, instruction
);
524 /// Extract displacement from a thumb b/bl/blx instruction.
525 template <typename P
>
526 int32_t StubOptimizer
<P
>::getDisplacementFromThumbBranch(uint32_t instruction
, uint32_t instrAddr
)
528 bool is_blx
= ((instruction
& 0xD000F800) == 0xC000F000);
529 uint32_t s
= (instruction
>> 10) & 0x1;
530 uint32_t j1
= (instruction
>> 29) & 0x1;
531 uint32_t j2
= (instruction
>> 27) & 0x1;
532 uint32_t imm10
= instruction
& 0x3FF;
533 uint32_t imm11
= (instruction
>> 16) & 0x7FF;
534 uint32_t i1
= (j1
== s
);
535 uint32_t i2
= (j2
== s
);
536 uint32_t dis
= (s
<< 24) | (i1
<< 23) | (i2
<< 22) | (imm10
<< 12) | (imm11
<< 1);
538 int32_t result
= s
? (sdis
| 0xFE000000) : sdis
;
539 if ( is_blx
&& (instrAddr
& 0x2) ) {
540 // The thumb blx instruction always has low bit of imm11 as zero. The way
541 // a 2-byte aligned blx can branch to a 4-byte aligned ARM target is that
542 // the blx instruction always 4-byte aligns the pc before adding the
543 // displacement from the blx. We must emulate that when decoding this.
549 /// Update a thumb b/bl/blx instruction, switching bl <-> blx as needed.
550 template <typename P
>
551 uint32_t StubOptimizer
<P
>::setDisplacementInThumbBranch(uint32_t instruction
, uint32_t instrAddr
,
552 int32_t displacement
, bool targetIsThumb
) {
553 if ( (displacement
> 16777214) || (displacement
< (-16777216)) ) {
554 _diagnostics
.error("thumb branch out of range at 0x%0X in %s", instrAddr
, _installName
);
557 bool is_bl
= ((instruction
& 0xD000F800) == 0xD000F000);
558 bool is_blx
= ((instruction
& 0xD000F800) == 0xC000F000);
559 bool is_b
= ((instruction
& 0xD000F800) == 0x9000F000);
560 uint32_t newInstruction
= (instruction
& 0xD000F800);
561 if (is_bl
|| is_blx
) {
563 newInstruction
= 0xD000F000; // Use bl
566 newInstruction
= 0xC000F000; // Use blx
567 // See note in getDisplacementFromThumbBranch() about blx.
573 if ( !targetIsThumb
) {
574 _diagnostics
.error("no pc-rel thumb branch instruction that switches to arm mode at 0x%0X in %s", instrAddr
, _installName
);
579 _diagnostics
.error("not b/bl/blx at 0x%0X in %s", instrAddr
, _installName
);
582 uint32_t s
= (uint32_t)(displacement
>> 24) & 0x1;
583 uint32_t i1
= (uint32_t)(displacement
>> 23) & 0x1;
584 uint32_t i2
= (uint32_t)(displacement
>> 22) & 0x1;
585 uint32_t imm10
= (uint32_t)(displacement
>> 12) & 0x3FF;
586 uint32_t imm11
= (uint32_t)(displacement
>> 1) & 0x7FF;
587 uint32_t j1
= (i1
== s
);
588 uint32_t j2
= (i2
== s
);
589 uint32_t nextDisp
= (j1
<< 13) | (j2
<< 11) | imm11
;
590 uint32_t firstDisp
= (s
<< 10) | imm10
;
591 newInstruction
|= (nextDisp
<< 16) | firstDisp
;
592 return newInstruction
;
596 template <typename P
>
597 void StubOptimizer
<P
>::optimizeArmCallSites(std::unordered_map
<uint64_t, uint64_t>& targetAddrToOptStubAddr
)
599 forEachCallSiteToAStub([&](uint8_t kind
, uint64_t callSiteAddr
, uint64_t stubAddr
, uint32_t& instruction
) -> bool {
600 if ( kind
== DYLD_CACHE_ADJ_V2_THUMB_BR22
) {
601 bool is_bl
= ((instruction
& 0xD000F800) == 0xD000F000);
602 bool is_blx
= ((instruction
& 0xD000F800) == 0xC000F000);
603 bool is_b
= ((instruction
& 0xD000F800) == 0x9000F000);
604 if ( !is_bl
&& !is_blx
&& !is_b
){
605 _diagnostics
.warning("non-branch instruction at 0x%0llX in %s", callSiteAddr
, _installName
);
608 int32_t brDelta
= getDisplacementFromThumbBranch(instruction
, (uint32_t)callSiteAddr
);
609 pint_t targetAddr
= (pint_t
)callSiteAddr
+ 4 + brDelta
;
610 if ( targetAddr
!= stubAddr
) {
611 _diagnostics
.warning("stub target mismatch at callsite 0x%0llX in %s", callSiteAddr
, _installName
);
614 // ignore branch if not to a known stub
615 const auto& pos
= _stubAddrToLPAddr
.find(targetAddr
);
616 if ( pos
== _stubAddrToLPAddr
.end() )
619 // ignore branch if lazy pointer is not known (resolver or interposable)
620 uint64_t lpAddr
= pos
->second
;
621 const auto& pos2
= _lpAddrToTargetAddr
.find((pint_t
)lpAddr
);
622 if ( pos2
== _lpAddrToTargetAddr
.end() )
625 uint64_t finalTargetAddr
= pos2
->second
;
626 int64_t deltaToFinalTarget
= finalTargetAddr
- (callSiteAddr
+ 4);
627 // if final target within range, change to branch there directly
628 if ( (deltaToFinalTarget
> -b16MegLimit
) && (deltaToFinalTarget
< b16MegLimit
) ) {
629 bool targetIsThumb
= (finalTargetAddr
& 1);
630 instruction
= setDisplacementInThumbBranch(instruction
, (uint32_t)callSiteAddr
, (int32_t)deltaToFinalTarget
, targetIsThumb
);
631 if (_diagnostics
.hasError())
633 _branchOptimizedToDirectCount
++;
637 // try to re-use an existing optimized stub
638 const auto& pos3
= targetAddrToOptStubAddr
.find(finalTargetAddr
);
639 if ( pos3
!= targetAddrToOptStubAddr
.end() ) {
640 uint64_t existingStub
= pos3
->second
;
641 if ( existingStub
!= stubAddr
) {
642 int64_t deltaToOptStub
= existingStub
- (callSiteAddr
+ 4);
643 if ( (deltaToOptStub
> -b16MegLimit
) && (deltaToOptStub
< b16MegLimit
) ) {
644 bool targetIsThumb
= (existingStub
& 1);
645 instruction
= setDisplacementInThumbBranch(instruction
, (uint32_t)callSiteAddr
, (int32_t)deltaToOptStub
, targetIsThumb
);
646 if (_diagnostics
.hasError())
648 _branchToReUsedOptimizedStubCount
++;
654 // leave as BL to stub, but optimize the stub
655 _stubsToOptimize
.insert(stubAddr
);
656 targetAddrToOptStubAddr
[finalTargetAddr
] = stubAddr
;
657 _branchToOptimizedStubCount
++;
660 else if ( kind
== DYLD_CACHE_ADJ_V2_ARM_BR24
) {
661 // too few of these to be worth trying to optimize
666 if (_diagnostics
.hasError())
671 template <typename P
>
672 void StubOptimizer
<P
>::optimizeArmStubs()
674 for (const auto& stubEntry
: _stubAddrToLPAddr
) {
675 pint_t stubVMAddr
= stubEntry
.first
;
676 pint_t lpVMAddr
= stubEntry
.second
;
677 const auto& pos
= _lpAddrToTargetAddr
.find(lpVMAddr
);
678 if ( pos
== _lpAddrToTargetAddr
.end() )
680 pint_t targetVMAddr
= pos
->second
;
682 int32_t delta
= (int32_t)(targetVMAddr
- (stubVMAddr
+ 12));
683 uint32_t* stubInstructions
= (uint32_t*)((uint8_t*)(long)stubVMAddr
+ _cacheSlide
);
684 assert(stubInstructions
[0] == 0xe59fc004);
685 stubInstructions
[0] = 0xe59fc000; // ldr ip, L0
686 stubInstructions
[1] = 0xe08ff00c; // add pc, pc, ip
687 stubInstructions
[2] = delta
; // L0: .long xxxx
688 stubInstructions
[3] = 0xe7ffdefe; // trap
689 _stubOptimizedCount
++;
695 template <typename P
>
696 void StubOptimizer
<P
>::optimizeArm64Stubs()
698 for (const uint64_t stubVMAddr
: _stubsToOptimize
) {
699 pint_t lpVMAddr
= _stubAddrToLPAddr
[(pint_t
)stubVMAddr
];
700 const auto& pos
= _lpAddrToTargetAddr
.find(lpVMAddr
);
701 if ( pos
== _lpAddrToTargetAddr
.end() )
703 pint_t targetVMAddr
= pos
->second
;
705 int64_t adrpDelta
= (targetVMAddr
& -4096) - (stubVMAddr
& -4096);
706 // Note: ADRP/ADD can only span +/-4GB
707 uint32_t* stubInstructions
= (uint32_t*)((uint8_t*)(long)stubVMAddr
+ _cacheSlide
);
708 bool rightInstr1
= ((stubInstructions
[0] & 0x9F00001F) == 0x90000010); // ADRP X16, lp@page
709 bool rightInstr2
= ((stubInstructions
[1] & 0xFFC003FF) == 0xF9400210); // LDR X16, [X16, lp@pageoff]
710 bool rightInstr3
= (stubInstructions
[2] == 0xD61F0200); // BR X16
712 if ( rightInstr1
&& rightInstr2
&& rightInstr3
) {
713 uint32_t immhi
= (adrpDelta
>> 9) & (0x00FFFFE0);
714 uint32_t immlo
= (adrpDelta
<< 17) & (0x60000000);
715 uint32_t newADRP
= (0x90000010) | immlo
| immhi
;
716 uint32_t off12
= (targetVMAddr
& 0xFFF);
717 uint32_t newADD
= (0x91000210) | (off12
<< 10);
719 stubInstructions
[0] = newADRP
; // ADRP X16, target@page
720 stubInstructions
[1] = newADD
; // ADD X16, X16, target@pageoff
721 stubInstructions
[2] = 0xD61F0200; // BR X16
722 _stubOptimizedCount
++;
727 #if SUPPORT_ARCH_arm64e
728 template <typename P
>
729 void StubOptimizer
<P
>::optimizeArm64eStubs()
731 for (const uint64_t stubVMAddr
: _stubsToOptimize
) {
732 pint_t lpVMAddr
= _stubAddrToLPAddr
[(pint_t
)stubVMAddr
];
733 const auto& pos
= _lpAddrToTargetAddr
.find(lpVMAddr
);
734 if ( pos
== _lpAddrToTargetAddr
.end() )
736 pint_t targetVMAddr
= pos
->second
;
738 int64_t adrpDelta
= (targetVMAddr
& -4096) - (stubVMAddr
& -4096);
739 // Note: ADRP/ADD can only span +/-4GB
740 uint32_t* stubInstructions
= (uint32_t*)((uint8_t*)(long)stubVMAddr
+ _cacheSlide
);
741 bool rightInstr1
= ((stubInstructions
[0] & 0x9F00001F) == 0x90000011); // ADRP X17, lp@page
742 bool rightInstr2
= ((stubInstructions
[1] & 0xFFC003FF) == 0x91000231); // ADD X17, [X17, lp@pageoff]
743 bool rightInstr3
= (stubInstructions
[2] == 0xF9400230); // LDR X16, [X17]
744 bool rightInstr4
= (stubInstructions
[3] == 0xD71F0A11); // BRAA X16, X17
746 if ( rightInstr1
&& rightInstr2
&& rightInstr3
&& rightInstr4
) {
747 uint32_t immhi
= (adrpDelta
>> 9) & (0x00FFFFE0);
748 uint32_t immlo
= (adrpDelta
<< 17) & (0x60000000);
749 uint32_t newADRP
= (0x90000010) | immlo
| immhi
;
750 uint32_t off12
= (targetVMAddr
& 0xFFF);
751 uint32_t newADD
= (0x91000210) | (off12
<< 10);
753 stubInstructions
[0] = newADRP
; // ADRP X16, target@page
754 stubInstructions
[1] = newADD
; // ADD X16, X16, target@pageoff
755 stubInstructions
[2] = 0xD61F0200; // BR X16
756 stubInstructions
[3] = 0xD4200020; // TRAP
757 _stubOptimizedCount
++;
763 #if SUPPORT_ARCH_arm64_32
764 template <typename P
>
765 void StubOptimizer
<P
>::optimizeArm64_32Stubs()
767 for (const uint64_t stubVMAddr
: _stubsToOptimize
) {
768 pint_t lpVMAddr
= _stubAddrToLPAddr
[(pint_t
)stubVMAddr
];
769 const auto& pos
= _lpAddrToTargetAddr
.find(lpVMAddr
);
770 if ( pos
== _lpAddrToTargetAddr
.end() )
772 pint_t targetVMAddr
= pos
->second
;
774 int64_t adrpDelta
= (targetVMAddr
& -4096) - (stubVMAddr
& -4096);
775 uint32_t* stubInstructions
= (uint32_t*)((uint8_t*)(long)stubVMAddr
+ _cacheSlide
);
776 bool rightInstr1
= ((stubInstructions
[0] & 0x9F00001F) == 0x90000010); // ADRP X16, lp@page
777 bool rightInstr2
= ((stubInstructions
[1] & 0xFFC003FF) == 0xB9400210); // LDR W16, [X16, lp@pageoff]
778 bool rightInstr3
= (stubInstructions
[2] == 0xD61F0200); // BR X16
780 if ( rightInstr1
&& rightInstr2
&& rightInstr3
) {
781 uint32_t immhi
= (adrpDelta
>> 9) & (0x00FFFFE0);
782 uint32_t immlo
= (adrpDelta
<< 17) & (0x60000000);
783 uint32_t newADRP
= (0x90000010) | immlo
| immhi
;
784 uint32_t off12
= (targetVMAddr
& 0xFFF);
785 uint32_t newADD
= (0x91000210) | (off12
<< 10);
787 stubInstructions
[0] = newADRP
; // ADRP X16, target@page
788 stubInstructions
[1] = newADD
; // ADD X16, X16, target@pageoff
789 stubInstructions
[2] = 0xD61F0200; // BR X16
790 _stubOptimizedCount
++;
797 template <typename P
>
798 void StubOptimizer
<P
>::optimizeArm64CallSites(std::unordered_map
<uint64_t, uint64_t>& targetAddrToOptStubAddr
)
800 forEachCallSiteToAStub([&](uint8_t kind
, uint64_t callSiteAddr
, uint64_t stubAddr
, uint32_t& instruction
) -> bool {
801 if ( kind
!= DYLD_CACHE_ADJ_V2_ARM64_BR26
)
803 // skip all but BL or B
804 if ( (instruction
& 0x7C000000) != 0x14000000 )
806 // compute target of branch instruction
807 int32_t brDelta
= (instruction
& 0x03FFFFFF) << 2;
808 if ( brDelta
& 0x08000000 )
809 brDelta
|= 0xF0000000;
810 uint64_t targetAddr
= callSiteAddr
+ (int64_t)brDelta
;
811 if ( targetAddr
!= stubAddr
) {
812 _diagnostics
.warning("stub target mismatch");
815 // ignore branch if not to a known stub
816 const auto& pos
= _stubAddrToLPAddr
.find((pint_t
)targetAddr
);
817 if ( pos
== _stubAddrToLPAddr
.end() )
820 // ignore branch if lazy pointer is not known (resolver or interposable)
821 uint64_t lpAddr
= pos
->second
;
822 const auto& pos2
= _lpAddrToTargetAddr
.find((pint_t
)lpAddr
);
823 if ( pos2
== _lpAddrToTargetAddr
.end() )
826 uint64_t finalTargetAddr
= pos2
->second
;
827 int64_t deltaToFinalTarget
= finalTargetAddr
- callSiteAddr
;
828 // if final target within range, change to branch there directly
829 if ( (deltaToFinalTarget
> -b128MegLimit
) && (deltaToFinalTarget
< b128MegLimit
) ) {
830 instruction
= (instruction
& 0xFC000000) | ((deltaToFinalTarget
>> 2) & 0x03FFFFFF);
831 _branchOptimizedToDirectCount
++;
835 // try to re-use an existing optimized stub
836 const auto& pos3
= targetAddrToOptStubAddr
.find((pint_t
)finalTargetAddr
);
837 if ( pos3
!= targetAddrToOptStubAddr
.end() ) {
838 uint64_t existingStub
= pos3
->second
;
839 if ( existingStub
!= stubAddr
) {
840 int64_t deltaToOptStub
= existingStub
- callSiteAddr
;
841 if ( (deltaToOptStub
> -b128MegLimit
) && (deltaToOptStub
< b128MegLimit
) ) {
842 instruction
= (instruction
& 0xFC000000) | ((deltaToOptStub
>> 2) & 0x03FFFFFF);
843 _branchToReUsedOptimizedStubCount
++;
849 // leave as BL to stub, but optimize the stub
850 _stubsToOptimize
.insert(stubAddr
);
851 targetAddrToOptStubAddr
[(pint_t
)finalTargetAddr
] = (pint_t
)stubAddr
;
852 _branchToOptimizedStubCount
++;
855 if (_diagnostics
.hasError())
860 template <typename P
>
861 void StubOptimizer
<P
>::optimizeCallSites(std::unordered_map
<uint64_t, uint64_t>& targetAddrToOptStubAddr
)
863 if ( _textSection
== NULL
)
865 if ( _stubSection
== NULL
)
869 switch ( _mh
->cputype() ) {
871 optimizeArm64CallSites(targetAddrToOptStubAddr
);
872 #if SUPPORT_ARCH_arm64e
873 if (_mh
->cpusubtype() == CPU_SUBTYPE_ARM64E
)
874 optimizeArm64eStubs();
877 optimizeArm64Stubs();
879 #if SUPPORT_ARCH_arm64_32
880 case CPU_TYPE_ARM64_32
:
881 optimizeArm64CallSites(targetAddrToOptStubAddr
);
882 optimizeArm64_32Stubs();
886 optimizeArmCallSites(targetAddrToOptStubAddr
);
891 _diagnostics
.verbose("dylib has %6u BLs to %4u stubs. Changed %5u, %5u, %5u BLs to use direct branch, optimized stub, neighbor's optimized stub. "
892 "%5u stubs left interposable, %4u stubs optimized. path=%s\n",
893 _branchToStubCount
, _stubCount
, _branchOptimizedToDirectCount
, _branchToOptimizedStubCount
, _branchToReUsedOptimizedStubCount
,
894 _stubsLeftInterposable
, _stubOptimizedCount
, _installName
);
899 template <typename P
>
900 void bypassStubs(DyldSharedCache
* cache
, const std::string
& archName
, std::unordered_map
<uint64_t, uint64_t>& targetAddrToOptStubAddr
,
901 const char* const neverStubEliminateDylibs
[], const char* const neverStubEliminateSymbols
[],
904 diags
.verbose("Stub elimination optimization:\n");
906 // construct a StubOptimizer for each image
907 __block
std::vector
<StubOptimizer
<P
>*> optimizers
;
908 cache
->forEachImage(^(const mach_header
* mh
, const char* installName
) {
909 optimizers
.push_back(new StubOptimizer
<P
>(cache
, (macho_header
<P
>*)mh
, diags
));
912 // build set of functions to never stub-eliminate because tools may need to override them
913 std::unordered_set
<std::string
> neverStubEliminate
;
914 for (const char* const* p
=neverStubEliminateSymbols
; *p
!= nullptr; ++p
) {
915 neverStubEliminate
.insert(*p
);
917 for (const char* const* d
=neverStubEliminateDylibs
; *d
!= nullptr; ++d
) {
918 for (StubOptimizer
<P
>* op
: optimizers
) {
919 if ( strcmp(op
->installName(), *d
) == 0 ) {
921 const uint8_t* exportsStart
= op
->exportsTrie();
922 const uint8_t* exportsEnd
= exportsStart
+ op
->exportsTrieSize();
923 std::vector
<ExportInfoTrie::Entry
> exports
;
924 if ( !ExportInfoTrie::parseTrie(exportsStart
, exportsEnd
, exports
) ) {
925 diags
.error("malformed exports trie in %s", *d
);
928 for(const ExportInfoTrie::Entry
& entry
: exports
) {
929 neverStubEliminate
.insert(entry
.name
);
935 // build maps of stubs-to-lp and lp-to-target
936 for (StubOptimizer
<P
>* op
: optimizers
)
937 op
->buildStubMap(neverStubEliminate
);
939 // optimize call sites to by-pass stubs or jump through island
940 for (StubOptimizer
<P
>* op
: optimizers
)
941 op
->optimizeCallSites(targetAddrToOptStubAddr
);
943 // write total optimization info
944 uint32_t callSiteCount
= 0;
945 uint32_t callSiteDirectOptCount
= 0;
946 for (StubOptimizer
<P
>* op
: optimizers
) {
947 callSiteCount
+= op
->_branchToStubCount
;
948 callSiteDirectOptCount
+= op
->_branchOptimizedToDirectCount
;
950 diags
.verbose(" cache contains %u call sites of which %u were direct bound\n", callSiteCount
, callSiteDirectOptCount
);
953 for (StubOptimizer
<P
>* op
: optimizers
)
957 void CacheBuilder::optimizeAwayStubs()
959 std::unordered_map
<uint64_t, uint64_t> targetAddrToOptStubAddr
;
961 DyldSharedCache
* dyldCache
= (DyldSharedCache
*)_readExecuteRegion
.buffer
;
962 std::string archName
= dyldCache
->archName();
963 #if SUPPORT_ARCH_arm64_32
964 if ( startsWith(archName
, "arm64_32") )
965 bypassStubs
<Pointer32
<LittleEndian
> >(dyldCache
, archName
, targetAddrToOptStubAddr
, _s_neverStubEliminateDylibs
, _s_neverStubEliminateSymbols
, _diagnostics
);
968 if ( startsWith(archName
, "arm64") )
969 bypassStubs
<Pointer64
<LittleEndian
> >(dyldCache
, archName
, targetAddrToOptStubAddr
, _s_neverStubEliminateDylibs
, _s_neverStubEliminateSymbols
, _diagnostics
);
970 else if ( archName
== "armv7k" )
971 bypassStubs
<Pointer32
<LittleEndian
>>(dyldCache
, archName
, targetAddrToOptStubAddr
, _s_neverStubEliminateDylibs
, _s_neverStubEliminateSymbols
, _diagnostics
);
972 // no stub optimization done for other arches