dyld-832.7.1.tar.gz
[apple/dyld.git] / dyld3 / shared-cache / AdjustDylibSegments.cpp
1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*-
2 *
3 * Copyright (c) 2014 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25
26 #include <dirent.h>
27 #include <sys/errno.h>
28 #include <sys/fcntl.h>
29 #include <mach-o/loader.h>
30 #include <mach-o/fat.h>
31 #include <assert.h>
32
33 #include <fstream>
34 #include <string>
35 #include <algorithm>
36 #include <unordered_map>
37 #include <unordered_set>
38
39 #include "CacheBuilder.h"
40 #include "Diagnostics.h"
41 #include "DyldSharedCache.h"
42 #include "Trie.hpp"
43 #include "MachOFileAbstraction.hpp"
44 #include "MachOLoaded.h"
45 #include "MachOAnalyzer.h"
46 #include "mach-o/fixup-chains.h"
47
48
49 #ifndef EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE
50 #define EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE 0x02
51 #endif
52
53 namespace {
54
55 template <typename P>
56 class Adjustor {
57 public:
58 Adjustor(uint64_t cacheBaseAddress, dyld3::MachOAnalyzer* mh, const char* dylibID,
59 const std::vector<CacheBuilder::SegmentMappingInfo>& mappingInfo, Diagnostics& diag);
60 void adjustImageForNewSegmentLocations(CacheBuilder::ASLR_Tracker& aslrTracker,
61 CacheBuilder::LOH_Tracker* lohTracker,
62 const CacheBuilder::CacheCoalescedText* coalescedText,
63 const CacheBuilder::DylibTextCoalescer& textCoalescer);
64
65 private:
66 void adjustReferencesUsingInfoV2(CacheBuilder::ASLR_Tracker& aslrTracker,
67 CacheBuilder::LOH_Tracker* lohTracker,
68 const CacheBuilder::CacheCoalescedText* coalescedText,
69 const CacheBuilder::DylibTextCoalescer& textCoalescer);
70 void adjustReference(uint32_t kind, uint8_t* mappedAddr, uint64_t fromNewAddress, uint64_t toNewAddress, int64_t adjust, int64_t targetSlide,
71 uint64_t imageStartAddress, uint64_t imageEndAddress, bool convertRebaseChains,
72 CacheBuilder::ASLR_Tracker& aslrTracker, CacheBuilder::LOH_Tracker* lohTracker,
73 uint32_t*& lastMappedAddr32, uint32_t& lastKind, uint64_t& lastToNewAddress);
74 void adjustDataPointers(CacheBuilder::ASLR_Tracker& aslrTracker);
75 void adjustRebaseChains(CacheBuilder::ASLR_Tracker& aslrTracker);
76 void slidePointer(int segIndex, uint64_t segOffset, uint8_t type, CacheBuilder::ASLR_Tracker& aslrTracker);
77 void adjustSymbolTable();
78 void adjustChainedFixups(const CacheBuilder::DylibTextCoalescer& textCoalescer);
79 void adjustExternalRelocations();
80 void adjustExportsTrie(std::vector<uint8_t>& newTrieBytes);
81 void rebuildLinkEdit();
82 void adjustCode();
83 void adjustInstruction(uint8_t kind, uint8_t* textLoc, uint64_t codeToDataDelta);
84 void rebuildLinkEditAndLoadCommands(const CacheBuilder::DylibTextCoalescer& textCoalescer);
85 uint64_t slideForOrigAddress(uint64_t addr);
86 void convertGeneric64RebaseToIntermediate(dyld3::MachOLoaded::ChainedFixupPointerOnDisk* chainPtr, CacheBuilder::ASLR_Tracker& aslrTracker, uint64_t targetSlide);
87 void convertArm64eRebaseToIntermediate(dyld3::MachOLoaded::ChainedFixupPointerOnDisk* chainPtr, CacheBuilder::ASLR_Tracker& aslrTracker,
88 uint64_t targetSlide, bool convertRebaseChains);
89
90
91 typedef typename P::uint_t pint_t;
92 typedef typename P::E E;
93
94 uint64_t _cacheBaseAddress = 0;
95 dyld3::MachOAnalyzer* _mh;
96 Diagnostics& _diagnostics;
97 const uint8_t* _linkeditBias = nullptr;
98 unsigned _linkeditSegIndex = 0;
99 bool _maskPointers = false;
100 bool _splitSegInfoV2 = false;
101 const char* _dylibID = nullptr;
102 symtab_command* _symTabCmd = nullptr;
103 dysymtab_command* _dynSymTabCmd = nullptr;
104 dyld_info_command* _dyldInfo = nullptr;
105 linkedit_data_command* _splitSegInfoCmd = nullptr;
106 linkedit_data_command* _functionStartsCmd = nullptr;
107 linkedit_data_command* _dataInCodeCmd = nullptr;
108 linkedit_data_command* _exportTrieCmd = nullptr;
109 linkedit_data_command* _chainedFixupsCmd = nullptr;
110 uint16_t _chainedFixupsFormat = 0;
111 std::vector<uint64_t> _segOrigStartAddresses;
112 std::vector<uint64_t> _segSizes;
113 std::vector<uint64_t> _segSlides;
114 std::vector<macho_segment_command<P>*> _segCmds;
115 const std::vector<CacheBuilder::SegmentMappingInfo>& _mappingInfo;
116 };
117
118 template <typename P>
119 Adjustor<P>::Adjustor(uint64_t cacheBaseAddress, dyld3::MachOAnalyzer* mh, const char* dylibID,
120 const std::vector<CacheBuilder::SegmentMappingInfo>& mappingInfo, Diagnostics& diag)
121 : _cacheBaseAddress(cacheBaseAddress), _mh(mh), _diagnostics(diag), _dylibID(dylibID), _mappingInfo(mappingInfo)
122 {
123 assert((_mh->magic == MH_MAGIC) || (_mh->magic == MH_MAGIC_64));
124
125 __block unsigned segIndex = 0;
126 mh->forEachLoadCommand(diag, ^(const load_command *cmd, bool &stop) {
127 switch ( cmd->cmd ) {
128 case LC_SYMTAB:
129 _symTabCmd = (symtab_command*)cmd;
130 break;
131 case LC_DYSYMTAB:
132 _dynSymTabCmd = (dysymtab_command*)cmd;
133 break;
134 case LC_DYLD_INFO:
135 case LC_DYLD_INFO_ONLY:
136 _dyldInfo = (dyld_info_command*)cmd;
137 break;
138 case LC_SEGMENT_SPLIT_INFO:
139 _splitSegInfoCmd = (linkedit_data_command*)cmd;
140 break;
141 case LC_FUNCTION_STARTS:
142 _functionStartsCmd = (linkedit_data_command*)cmd;
143 break;
144 case LC_DATA_IN_CODE:
145 _dataInCodeCmd = (linkedit_data_command*)cmd;
146 break;
147 case LC_DYLD_CHAINED_FIXUPS:
148 _chainedFixupsCmd = (linkedit_data_command*)cmd;
149 _chainedFixupsFormat = dyld3::MachOAnalyzer::chainedPointerFormat((dyld_chained_fixups_header*)&_linkeditBias[_chainedFixupsCmd->dataoff]);
150 break;
151 case LC_DYLD_EXPORTS_TRIE:
152 _exportTrieCmd = (linkedit_data_command*)cmd;
153 break;
154 case macho_segment_command<P>::CMD:
155 macho_segment_command<P>* segCmd = (macho_segment_command<P>*)cmd;
156 _segCmds.push_back(segCmd);
157 _segOrigStartAddresses.push_back(segCmd->vmaddr());
158 _segSizes.push_back(segCmd->vmsize());
159 _segSlides.push_back(_mappingInfo[segIndex].dstCacheUnslidAddress - segCmd->vmaddr());
160 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
161 _linkeditBias = (uint8_t*)_mappingInfo[segIndex].dstSegment - segCmd->fileoff();
162 _linkeditSegIndex = segIndex;
163 }
164 ++segIndex;
165 break;
166 }
167 });
168
169 _maskPointers = (mh->cputype == CPU_TYPE_ARM64) || (mh->cputype == CPU_TYPE_ARM64_32);
170 if ( _splitSegInfoCmd != NULL ) {
171 const uint8_t* infoStart = &_linkeditBias[_splitSegInfoCmd->dataoff];
172 _splitSegInfoV2 = (*infoStart == DYLD_CACHE_ADJ_V2_FORMAT);
173 }
174 else {
175 bool canHaveMissingSplitSeg = false;
176 #if BUILDING_APP_CACHE_UTIL
177 if ( mh->isKextBundle() ) {
178 if ( mh->isArch("x86_64") || mh->isArch("x86_64h") )
179 canHaveMissingSplitSeg = true;
180 }
181 #endif
182 if ( !canHaveMissingSplitSeg )
183 _diagnostics.error("missing LC_SEGMENT_SPLIT_INFO in %s", _dylibID);
184 }
185
186 // Set the chained pointer format on old arm64e binaries using threaded rebase, and
187 // which don't have LC_DYLD_CHAINED_FIXUPS
188 if ( (_chainedFixupsCmd == nullptr) && mh->isArch("arm64e") ) {
189 _chainedFixupsFormat = DYLD_CHAINED_PTR_ARM64E;
190 }
191 }
192
193 template <typename P>
194 void Adjustor<P>::adjustImageForNewSegmentLocations(CacheBuilder::ASLR_Tracker& aslrTracker,
195 CacheBuilder::LOH_Tracker* lohTracker,
196 const CacheBuilder::CacheCoalescedText* coalescedText,
197 const CacheBuilder::DylibTextCoalescer& textCoalescer)
198 {
199 if ( _diagnostics.hasError() )
200 return;
201 if ( _splitSegInfoV2 ) {
202 adjustReferencesUsingInfoV2(aslrTracker, lohTracker, coalescedText, textCoalescer);
203 adjustChainedFixups(textCoalescer);
204 }
205 else if ( _chainedFixupsCmd != nullptr ) {
206 // need to adjust the chain fixup segment_offset fields in LINKEDIT before chains can be walked
207 adjustChainedFixups(textCoalescer);
208 adjustRebaseChains(aslrTracker);
209 adjustCode();
210 }
211 else {
212 adjustDataPointers(aslrTracker);
213 adjustCode();
214 }
215 if ( _diagnostics.hasError() )
216 return;
217 adjustSymbolTable();
218 if ( _diagnostics.hasError() )
219 return;
220
221 adjustExternalRelocations();
222 if ( _diagnostics.hasError() )
223 return;
224 rebuildLinkEditAndLoadCommands(textCoalescer);
225
226 #if DEBUG
227 Diagnostics diag;
228 _mh->validateDyldCacheDylib(diag, _dylibID);
229 if ( diag.hasError() ) {
230 fprintf(stderr, "%s\n", diag.errorMessage().c_str());
231 }
232 #endif
233 }
234
235 template <typename P>
236 uint64_t Adjustor<P>::slideForOrigAddress(uint64_t addr)
237 {
238 for (unsigned i=0; i < _segOrigStartAddresses.size(); ++i) {
239 if ( (_segOrigStartAddresses[i] <= addr) && (addr < (_segOrigStartAddresses[i]+_segCmds[i]->vmsize())) )
240 return _segSlides[i];
241 }
242 // On arm64, high nibble of pointers can have extra bits
243 if ( _maskPointers && (addr & 0xF000000000000000) ) {
244 return slideForOrigAddress(addr & 0x0FFFFFFFFFFFFFFF);
245 }
246 _diagnostics.error("slide not known for dylib address 0x%llX in %s", addr, _dylibID);
247 return 0;
248 }
249
250 template <typename P>
251 void Adjustor<P>::rebuildLinkEditAndLoadCommands(const CacheBuilder::DylibTextCoalescer& textCoalescer)
252 {
253 // Exports trie is only data structure in LINKEDIT that might grow
254 std::vector<uint8_t> newTrieBytes;
255 adjustExportsTrie(newTrieBytes);
256
257 // Remove: code signature, rebase info, code-sign-dirs, split seg info
258 uint32_t chainedFixupsOffset = 0;
259 uint32_t chainedFixupsSize = _chainedFixupsCmd ? _chainedFixupsCmd->datasize : 0;
260 uint32_t bindOffset = chainedFixupsOffset + chainedFixupsSize;
261 uint32_t bindSize = _dyldInfo ? _dyldInfo->bind_size : 0;
262 uint32_t weakBindOffset = bindOffset + bindSize;
263 uint32_t weakBindSize = _dyldInfo ? _dyldInfo->weak_bind_size : 0;
264 uint32_t lazyBindOffset = weakBindOffset + weakBindSize;
265 uint32_t lazyBindSize = _dyldInfo ? _dyldInfo->lazy_bind_size : 0;
266 uint32_t exportOffset = lazyBindOffset + lazyBindSize;
267 uint32_t exportSize = (uint32_t)newTrieBytes.size();
268 uint32_t splitSegInfoOffset = exportOffset + exportSize;
269 uint32_t splitSegInfosSize = (_splitSegInfoCmd ? _splitSegInfoCmd->datasize : 0);
270 uint32_t funcStartsOffset = splitSegInfoOffset + splitSegInfosSize;
271 uint32_t funcStartsSize = (_functionStartsCmd ? _functionStartsCmd->datasize : 0);
272 uint32_t dataInCodeOffset = funcStartsOffset + funcStartsSize;
273 uint32_t dataInCodeSize = (_dataInCodeCmd ? _dataInCodeCmd->datasize : 0);
274 uint32_t symbolTableOffset = dataInCodeOffset + dataInCodeSize;
275 uint32_t symbolTableSize = _symTabCmd->nsyms * sizeof(macho_nlist<P>);
276 uint32_t indirectTableOffset = symbolTableOffset + symbolTableSize;
277 uint32_t indirectTableSize = _dynSymTabCmd ? (_dynSymTabCmd->nindirectsyms * sizeof(uint32_t)) : 0;
278 uint32_t externalRelocOffset = indirectTableOffset + indirectTableSize;
279 uint32_t externalRelocSize = _dynSymTabCmd ? (_dynSymTabCmd->nextrel * sizeof(relocation_info)) : 0;
280 uint32_t symbolStringsOffset = externalRelocOffset + externalRelocSize;
281 uint32_t symbolStringsSize = _symTabCmd->strsize;
282 uint32_t newLinkEditSize = symbolStringsOffset + symbolStringsSize;
283
284 size_t linkeditBufferSize = align(_segCmds[_linkeditSegIndex]->vmsize(), 12);
285 if ( linkeditBufferSize < newLinkEditSize ) {
286 _diagnostics.error("LINKEDIT overflow in %s", _dylibID);
287 return;
288 }
289
290 uint8_t* newLinkeditBufer = (uint8_t*)::calloc(linkeditBufferSize, 1);
291 if ( chainedFixupsSize )
292 memcpy(&newLinkeditBufer[chainedFixupsOffset], &_linkeditBias[_chainedFixupsCmd->dataoff], chainedFixupsSize);
293 if ( bindSize )
294 memcpy(&newLinkeditBufer[bindOffset], &_linkeditBias[_dyldInfo->bind_off], bindSize);
295 if ( lazyBindSize )
296 memcpy(&newLinkeditBufer[lazyBindOffset], &_linkeditBias[_dyldInfo->lazy_bind_off], lazyBindSize);
297 if ( weakBindSize )
298 memcpy(&newLinkeditBufer[weakBindOffset], &_linkeditBias[_dyldInfo->weak_bind_off], weakBindSize);
299 if ( exportSize )
300 memcpy(&newLinkeditBufer[exportOffset], &newTrieBytes[0], exportSize);
301 if ( splitSegInfosSize )
302 memcpy(&newLinkeditBufer[splitSegInfoOffset], &_linkeditBias[_splitSegInfoCmd->dataoff], splitSegInfosSize);
303 if ( funcStartsSize )
304 memcpy(&newLinkeditBufer[funcStartsOffset], &_linkeditBias[_functionStartsCmd->dataoff], funcStartsSize);
305 if ( dataInCodeSize )
306 memcpy(&newLinkeditBufer[dataInCodeOffset], &_linkeditBias[_dataInCodeCmd->dataoff], dataInCodeSize);
307 if ( symbolTableSize )
308 memcpy(&newLinkeditBufer[symbolTableOffset], &_linkeditBias[_symTabCmd->symoff], symbolTableSize);
309 if ( indirectTableSize )
310 memcpy(&newLinkeditBufer[indirectTableOffset], &_linkeditBias[_dynSymTabCmd->indirectsymoff], indirectTableSize);
311 if ( externalRelocSize )
312 memcpy(&newLinkeditBufer[externalRelocOffset], &_linkeditBias[_dynSymTabCmd->extreloff], externalRelocSize);
313 if ( symbolStringsSize )
314 memcpy(&newLinkeditBufer[symbolStringsOffset], &_linkeditBias[_symTabCmd->stroff], symbolStringsSize);
315
316 memcpy(_mappingInfo[_linkeditSegIndex].dstSegment, newLinkeditBufer, newLinkEditSize);
317 ::bzero(((uint8_t*)_mappingInfo[_linkeditSegIndex].dstSegment)+newLinkEditSize, linkeditBufferSize-newLinkEditSize);
318 ::free(newLinkeditBufer);
319 uint32_t linkeditStartOffset = (uint32_t)_mappingInfo[_linkeditSegIndex].dstCacheFileOffset;
320
321 // updates load commands and removed ones no longer needed
322
323 __block unsigned segIndex = 0;
324 _mh->forEachLoadCommand(_diagnostics, ^(const load_command *cmd, bool &stop) {
325 symtab_command* symTabCmd;
326 dysymtab_command* dynSymTabCmd;
327 dyld_info_command* dyldInfo;
328 linkedit_data_command* functionStartsCmd;
329 linkedit_data_command* dataInCodeCmd;
330 linkedit_data_command* chainedFixupsCmd;
331 linkedit_data_command* exportTrieCmd;
332 linkedit_data_command* splitSegInfoCmd;
333 macho_segment_command<P>* segCmd;
334 macho_routines_command<P>* routinesCmd;
335 dylib_command* dylibIDCmd;
336 int32_t segFileOffsetDelta;
337 switch ( cmd->cmd ) {
338 case LC_ID_DYLIB:
339 dylibIDCmd = (dylib_command*)cmd;
340 dylibIDCmd->dylib.timestamp = 2; // match what static linker sets in LC_LOAD_DYLIB
341 break;
342 case LC_SYMTAB:
343 symTabCmd = (symtab_command*)cmd;
344 symTabCmd->symoff = linkeditStartOffset + symbolTableOffset;
345 symTabCmd->stroff = linkeditStartOffset + symbolStringsOffset;
346 break;
347 case LC_DYSYMTAB:
348 dynSymTabCmd = (dysymtab_command*)cmd;
349 dynSymTabCmd->indirectsymoff = linkeditStartOffset + indirectTableOffset;
350 // Clear local relocations (ie, old style rebases) as they were tracked earlier when we applied split seg
351 dynSymTabCmd->locreloff = 0;
352 dynSymTabCmd->nlocrel = 0 ;
353 // Update external relocations as we need these later to resolve binds from kexts
354 dynSymTabCmd->extreloff = linkeditStartOffset + externalRelocOffset;
355 break;
356 case LC_DYLD_INFO:
357 case LC_DYLD_INFO_ONLY:
358 dyldInfo = (dyld_info_command*)cmd;
359 dyldInfo->rebase_off = 0;
360 dyldInfo->rebase_size = 0;
361 dyldInfo->bind_off = bindSize ? linkeditStartOffset + bindOffset : 0;
362 dyldInfo->bind_size = bindSize;
363 dyldInfo->weak_bind_off = weakBindSize ? linkeditStartOffset + weakBindOffset : 0;
364 dyldInfo->weak_bind_size = weakBindSize;
365 dyldInfo->lazy_bind_off = lazyBindSize ? linkeditStartOffset + lazyBindOffset : 0;
366 dyldInfo->lazy_bind_size = lazyBindSize;
367 dyldInfo->export_off = exportSize ? linkeditStartOffset + exportOffset : 0;
368 dyldInfo->export_size = exportSize;
369 break;
370 case LC_FUNCTION_STARTS:
371 functionStartsCmd = (linkedit_data_command*)cmd;
372 functionStartsCmd->dataoff = linkeditStartOffset + funcStartsOffset;
373 break;
374 case LC_DATA_IN_CODE:
375 dataInCodeCmd = (linkedit_data_command*)cmd;
376 dataInCodeCmd->dataoff = linkeditStartOffset + dataInCodeOffset;
377 break;
378 case LC_DYLD_CHAINED_FIXUPS:
379 chainedFixupsCmd = (linkedit_data_command*)cmd;
380 chainedFixupsCmd->dataoff = chainedFixupsSize ? linkeditStartOffset + chainedFixupsOffset : 0;
381 chainedFixupsCmd->datasize = chainedFixupsSize;
382 break;
383 case LC_DYLD_EXPORTS_TRIE:
384 exportTrieCmd = (linkedit_data_command*)cmd;
385 exportTrieCmd->dataoff = exportSize ? linkeditStartOffset + exportOffset : 0;
386 exportTrieCmd->datasize = exportSize;
387 break;
388 case macho_routines_command<P>::CMD:
389 routinesCmd = (macho_routines_command<P>*)cmd;
390 routinesCmd->set_init_address(routinesCmd->init_address()+slideForOrigAddress(routinesCmd->init_address()));
391 break;
392 case macho_segment_command<P>::CMD:
393 segCmd = (macho_segment_command<P>*)cmd;
394 segFileOffsetDelta = (int32_t)(_mappingInfo[segIndex].dstCacheFileOffset - segCmd->fileoff());
395 segCmd->set_vmaddr(_mappingInfo[segIndex].dstCacheUnslidAddress);
396 segCmd->set_vmsize(_mappingInfo[segIndex].dstCacheSegmentSize);
397 segCmd->set_fileoff(_mappingInfo[segIndex].dstCacheFileOffset);
398 segCmd->set_filesize(_mappingInfo[segIndex].dstCacheFileSize);
399 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 )
400 segCmd->set_vmsize(linkeditBufferSize);
401 if ( segCmd->nsects() > 0 ) {
402 macho_section<P>* const sectionsStart = (macho_section<P>*)((uint8_t*)segCmd + sizeof(macho_segment_command<P>));
403 macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
404
405 for (macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
406 bool coalescedSection = false;
407 if ( textCoalescer.sectionWasCoalesced(sect->segname(), sect->sectname())) {
408 coalescedSection = true;
409 }
410 #if BUILDING_APP_CACHE_UTIL
411 if ( strcmp(segCmd->segname(), "__CTF") == 0 ) {
412 // The kernel __CTF segment data is completely removed when we link the baseKC
413 if ( _mh->isStaticExecutable() )
414 coalescedSection = true;
415 }
416 #endif
417
418 if ( coalescedSection ) {
419 // Put coalesced sections at the end of the segment
420 sect->set_addr(segCmd->vmaddr() + segCmd->filesize());
421 sect->set_offset(0);
422 sect->set_size(0);
423 } else {
424 sect->set_addr(sect->addr() + _segSlides[segIndex]);
425 if ( sect->offset() != 0 )
426 sect->set_offset(sect->offset() + segFileOffsetDelta);
427 }
428 }
429 }
430 ++segIndex;
431 break;
432 case LC_SEGMENT_SPLIT_INFO:
433 splitSegInfoCmd = (linkedit_data_command*)cmd;
434 splitSegInfoCmd->dataoff = linkeditStartOffset + splitSegInfoOffset;
435 break;
436 default:
437 break;
438 }
439 });
440
441 _mh->removeLoadCommand(_diagnostics, ^(const load_command *cmd, bool &remove, bool &stop) {
442 switch ( cmd->cmd ) {
443 case LC_RPATH:
444 _diagnostics.warning("dyld shared cache does not support LC_RPATH found in %s", _dylibID);
445 remove = true;
446 break;
447 case LC_CODE_SIGNATURE:
448 case LC_DYLIB_CODE_SIGN_DRS:
449 remove = true;
450 break;
451 default:
452 break;
453 }
454 });
455 _mh->flags |= 0x80000000;
456 }
457
458
459 template <typename P>
460 void Adjustor<P>::adjustSymbolTable()
461 {
462 if ( _dynSymTabCmd == nullptr )
463 return;
464
465 macho_nlist<P>* symbolTable = (macho_nlist<P>*)&_linkeditBias[_symTabCmd->symoff];
466
467 // adjust global symbol table entries
468 macho_nlist<P>* lastExport = &symbolTable[_dynSymTabCmd->iextdefsym + _dynSymTabCmd->nextdefsym];
469 for (macho_nlist<P>* entry = &symbolTable[_dynSymTabCmd->iextdefsym]; entry < lastExport; ++entry) {
470 if ( (entry->n_type() & N_TYPE) == N_SECT )
471 entry->set_n_value(entry->n_value() + slideForOrigAddress(entry->n_value()));
472 }
473
474 // adjust local symbol table entries
475 macho_nlist<P>* lastLocal = &symbolTable[_dynSymTabCmd->ilocalsym+_dynSymTabCmd->nlocalsym];
476 for (macho_nlist<P>* entry = &symbolTable[_dynSymTabCmd->ilocalsym]; entry < lastLocal; ++entry) {
477 if ( (entry->n_sect() != NO_SECT) && ((entry->n_type() & N_STAB) == 0) )
478 entry->set_n_value(entry->n_value() + slideForOrigAddress(entry->n_value()));
479 }
480 }
481
482
483 template <typename P>
484 void Adjustor<P>::adjustChainedFixups(const CacheBuilder::DylibTextCoalescer& textCoalescer)
485 {
486 if ( _chainedFixupsCmd == nullptr )
487 return;
488
489 // Pass a start hint in to withChainStarts which takes account of the LINKEDIT shifting but we haven't
490 // yet updated that LC_SEGMENT to point to the new data
491 const dyld_chained_fixups_header* header = (dyld_chained_fixups_header*)&_linkeditBias[_chainedFixupsCmd->dataoff];
492 uint64_t startsOffset = ((uint64_t)header + header->starts_offset) - (uint64_t)_mh;
493
494 // segment_offset in dyld_chained_starts_in_segment is wrong. We need to move it to the new segment offset
495 _mh->withChainStarts(_diagnostics, startsOffset, ^(const dyld_chained_starts_in_image* starts) {
496 for (uint32_t segIndex=0; segIndex < starts->seg_count; ++segIndex) {
497 if ( starts->seg_info_offset[segIndex] == 0 )
498 continue;
499 dyld_chained_starts_in_segment* segInfo = (dyld_chained_starts_in_segment*)((uint8_t*)starts + starts->seg_info_offset[segIndex]);
500 segInfo->segment_offset = (uint64_t)_mappingInfo[segIndex].dstSegment - (uint64_t)_mh;
501
502 // If the whole segment was coalesced the remove its chain starts
503 if ( textCoalescer.segmentWasCoalesced(_segCmds[segIndex]->segname()) ) {
504 segInfo->page_count = 0;
505 }
506 }
507 });
508 }
509
510 template <typename P>
511 static uint64_t externalRelocBaseAddress(const dyld3::MachOAnalyzer* ma,
512 std::vector<macho_segment_command<P>*> segCmds,
513 std::vector<uint64_t> segOrigStartAddresses)
514 {
515 if ( ma->isArch("x86_64") || ma->isArch("x86_64h") ) {
516 #if BUILDING_APP_CACHE_UTIL
517 if ( ma->isKextBundle() ) {
518 // for kext bundles the reloc base address starts at __TEXT segment
519 return segOrigStartAddresses[0];
520 }
521 #endif
522 // for x86_64 reloc base address starts at first writable segment (usually __DATA)
523 for (uint32_t i=0; i < segCmds.size(); ++i) {
524 if ( segCmds[i]->initprot() & VM_PROT_WRITE )
525 return segOrigStartAddresses[i];
526 }
527 }
528 // For everyone else we start at 0
529 return 0;
530 }
531
532
533 template <typename P>
534 void Adjustor<P>::adjustExternalRelocations()
535 {
536 if ( _dynSymTabCmd == nullptr )
537 return;
538
539 // section index 0 refers to mach_header
540 uint64_t baseAddress = _mappingInfo[0].dstCacheUnslidAddress;
541
542 const uint64_t relocsStartAddress = externalRelocBaseAddress(_mh, _segCmds, _segOrigStartAddresses);
543 relocation_info* relocsStart = (relocation_info*)&_linkeditBias[_dynSymTabCmd->extreloff];
544 relocation_info* relocsEnd = &relocsStart[_dynSymTabCmd->nextrel];
545 for (relocation_info* reloc = relocsStart; reloc < relocsEnd; ++reloc) {
546 // External relocations should be relative to the base address of the mach-o as otherwise they
547 // probably won't fit in 32-bits.
548 uint64_t newAddress = reloc->r_address + slideForOrigAddress(relocsStartAddress + reloc->r_address);
549 newAddress -= baseAddress;
550 reloc->r_address = (int32_t)newAddress;
551 assert((uint64_t)reloc->r_address == newAddress);
552 }
553 }
554
555 template <typename P>
556 void Adjustor<P>::slidePointer(int segIndex, uint64_t segOffset, uint8_t type, CacheBuilder::ASLR_Tracker& aslrTracker)
557 {
558 pint_t* mappedAddrP = (pint_t*)((uint8_t*)_mappingInfo[segIndex].dstSegment + segOffset);
559 uint32_t* mappedAddr32 = (uint32_t*)mappedAddrP;
560 pint_t valueP;
561 uint32_t value32;
562 switch ( type ) {
563 case REBASE_TYPE_POINTER:
564 valueP = (pint_t)P::getP(*mappedAddrP);
565 P::setP(*mappedAddrP, valueP + slideForOrigAddress(valueP));
566 aslrTracker.add(mappedAddrP);
567 break;
568
569 case REBASE_TYPE_TEXT_ABSOLUTE32:
570 value32 = P::E::get32(*mappedAddr32);
571 P::E::set32(*mappedAddr32, value32 + (uint32_t)slideForOrigAddress(value32));
572 break;
573
574 case REBASE_TYPE_TEXT_PCREL32:
575 // general text relocs not support
576 default:
577 _diagnostics.error("unknown rebase type 0x%02X in %s", type, _dylibID);
578 }
579 }
580
581
582 static bool isThumbMovw(uint32_t instruction)
583 {
584 return ( (instruction & 0x8000FBF0) == 0x0000F240 );
585 }
586
587 static bool isThumbMovt(uint32_t instruction)
588 {
589 return ( (instruction & 0x8000FBF0) == 0x0000F2C0 );
590 }
591
592 static uint16_t getThumbWord(uint32_t instruction)
593 {
594 uint32_t i = ((instruction & 0x00000400) >> 10);
595 uint32_t imm4 = (instruction & 0x0000000F);
596 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
597 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
598 return ((imm4 << 12) | (i << 11) | (imm3 << 8) | imm8);
599 }
600
601 static uint32_t setThumbWord(uint32_t instruction, uint16_t word) {
602 uint32_t imm4 = (word & 0xF000) >> 12;
603 uint32_t i = (word & 0x0800) >> 11;
604 uint32_t imm3 = (word & 0x0700) >> 8;
605 uint32_t imm8 = word & 0x00FF;
606 return (instruction & 0x8F00FBF0) | imm4 | (i << 10) | (imm3 << 28) | (imm8 << 16);
607 }
608
609 static bool isArmMovw(uint32_t instruction)
610 {
611 return (instruction & 0x0FF00000) == 0x03000000;
612 }
613
614 static bool isArmMovt(uint32_t instruction)
615 {
616 return (instruction & 0x0FF00000) == 0x03400000;
617 }
618
619 static uint16_t getArmWord(uint32_t instruction)
620 {
621 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
622 uint32_t imm12 = (instruction & 0x00000FFF);
623 return (imm4 << 12) | imm12;
624 }
625
626 static uint32_t setArmWord(uint32_t instruction, uint16_t word) {
627 uint32_t imm4 = (word & 0xF000) >> 12;
628 uint32_t imm12 = word & 0x0FFF;
629 return (instruction & 0xFFF0F000) | (imm4 << 16) | imm12;
630 }
631
632
633 template <typename P>
634 void Adjustor<P>::convertArm64eRebaseToIntermediate(dyld3::MachOLoaded::ChainedFixupPointerOnDisk* chainPtr, CacheBuilder::ASLR_Tracker& aslrTracker,
635 uint64_t targetSlide, bool convertRebaseChains)
636 {
637 assert(chainPtr->arm64e.authRebase.bind == 0);
638 assert( (_chainedFixupsFormat == DYLD_CHAINED_PTR_ARM64E)
639 || (_chainedFixupsFormat == DYLD_CHAINED_PTR_ARM64E_USERLAND)
640 || (_chainedFixupsFormat == DYLD_CHAINED_PTR_ARM64E_USERLAND24)
641 || (_chainedFixupsFormat == DYLD_CHAINED_PTR_ARM64E_KERNEL) );
642 dyld3::MachOLoaded::ChainedFixupPointerOnDisk orgPtr = *chainPtr;
643 dyld3::MachOLoaded::ChainedFixupPointerOnDisk tmp;
644 if ( chainPtr->arm64e.authRebase.auth ) {
645 uint64_t targetVMAddr = orgPtr.arm64e.authRebase.target + _segOrigStartAddresses[0] + targetSlide;
646
647 // The merging code may have set the high bits, eg, to a tagged pointer
648 // Note authRebase has no high8, so this is invalid if it occurs
649 uint8_t high8 = targetVMAddr >> 56;
650 if ( high8 ) {
651 // The kernel uses the high bits in the vmAddr, so don't error there
652 bool badPointer = true;
653 if ( _chainedFixupsFormat == DYLD_CHAINED_PTR_ARM64E_KERNEL ) {
654 uint64_t vmOffset = targetVMAddr - _cacheBaseAddress;
655 if ( (vmOffset >> 56) == 0 )
656 badPointer = false;
657 }
658
659 if ( badPointer ) {
660 _diagnostics.error("Cannot set tag on pointer in '%s' as high bits are incompatible with pointer authentication", _dylibID);
661 return;
662 }
663 }
664
665 if ( _chainedFixupsFormat == DYLD_CHAINED_PTR_ARM64E_USERLAND ) {
666 // <rdar://60351693> the rebase target is a vmoffset, so we need to switch to tracking the target out of line
667 aslrTracker.setAuthData(chainPtr, chainPtr->arm64e.authRebase.diversity, chainPtr->arm64e.authRebase.addrDiv, chainPtr->arm64e.authRebase.key);
668 aslrTracker.setRebaseTarget64(chainPtr, targetVMAddr);
669 chainPtr->arm64e.rebase.target = 0; // actual target vmAddr stored in side table
670 chainPtr->arm64e.rebase.high8 = 0;
671 chainPtr->arm64e.rebase.next = orgPtr.arm64e.rebase.next;
672 chainPtr->arm64e.rebase.bind = 0;
673 chainPtr->arm64e.rebase.auth = 0;
674 return;
675 }
676
677 if ( convertRebaseChains ) {
678 // This chain has been broken by merging CF constants.
679 // Instead of trying to maintain the chain, just set the raw value now
680 aslrTracker.setAuthData(chainPtr, chainPtr->arm64e.authRebase.diversity, chainPtr->arm64e.authRebase.addrDiv, chainPtr->arm64e.authRebase.key);
681 chainPtr->raw64 = targetVMAddr;
682 return;
683 }
684
685 // we need to change the rebase to point to the new address in the dyld cache, but it may not fit
686 tmp.arm64e.authRebase.target = targetVMAddr;
687 if ( tmp.arm64e.authRebase.target == targetVMAddr ) {
688 // everything fits, just update target
689 chainPtr->arm64e.authRebase.target = targetVMAddr;
690 return;
691 }
692 // see if it fits in a plain rebase
693 tmp.arm64e.rebase.target = targetVMAddr;
694 if ( tmp.arm64e.rebase.target == targetVMAddr ) {
695 // does fit in plain rebase, so convert to that and store auth data in side table
696 aslrTracker.setAuthData(chainPtr, chainPtr->arm64e.authRebase.diversity, chainPtr->arm64e.authRebase.addrDiv, chainPtr->arm64e.authRebase.key);
697 chainPtr->arm64e.rebase.target = targetVMAddr;
698 chainPtr->arm64e.rebase.high8 = 0;
699 chainPtr->arm64e.rebase.next = orgPtr.arm64e.rebase.next;
700 chainPtr->arm64e.rebase.bind = 0;
701 chainPtr->arm64e.rebase.auth = 0;
702 return;
703 }
704 // target cannot fit into rebase chain, so store target in side table
705 aslrTracker.setAuthData(chainPtr, chainPtr->arm64e.authRebase.diversity, chainPtr->arm64e.authRebase.addrDiv, chainPtr->arm64e.authRebase.key);
706 aslrTracker.setRebaseTarget64(chainPtr, targetVMAddr);
707 chainPtr->arm64e.rebase.target = 0; // actual target vmAddr stored in side table
708 chainPtr->arm64e.rebase.high8 = 0;
709 chainPtr->arm64e.rebase.next = orgPtr.arm64e.rebase.next;
710 chainPtr->arm64e.rebase.bind = 0;
711 chainPtr->arm64e.rebase.auth = 0;
712 return;
713 }
714 else {
715 uint64_t targetVMAddr = 0;
716 switch (_chainedFixupsFormat) {
717 case DYLD_CHAINED_PTR_ARM64E:
718 targetVMAddr = orgPtr.arm64e.rebase.target + targetSlide;
719 break;
720 case DYLD_CHAINED_PTR_ARM64E_USERLAND:
721 case DYLD_CHAINED_PTR_ARM64E_USERLAND24:
722 // <rdar://60351693> the rebase target is a vmoffset, so we need to switch to tracking the target out of line
723 aslrTracker.setRebaseTarget64(chainPtr, orgPtr.arm64e.rebase.target + targetSlide);
724 orgPtr.arm64e.rebase.target = 0;
725 targetVMAddr = 0;
726 break;
727 case DYLD_CHAINED_PTR_ARM64E_KERNEL:
728 targetVMAddr = orgPtr.arm64e.rebase.target + _segOrigStartAddresses[0] + targetSlide;
729 break;
730 default:
731 _diagnostics.error("Unknown chain format");
732 return;
733 }
734
735 // The merging code may have set the high bits, eg, to a tagged pointer
736 uint8_t high8 = targetVMAddr >> 56;
737 if ( chainPtr->arm64e.rebase.high8 ) {
738 if ( high8 ) {
739 _diagnostics.error("Cannot set tag on pointer as high bits are in use");
740 return;
741 }
742 aslrTracker.setHigh8(chainPtr, chainPtr->arm64e.rebase.high8);
743 } else {
744 if ( high8 ) {
745 aslrTracker.setHigh8(chainPtr, high8);
746 targetVMAddr &= 0x00FFFFFFFFFFFFFF;
747 }
748 }
749
750 if ( convertRebaseChains ) {
751 // This chain has been broken by merging CF constants.
752 // Instead of trying to maintain the chain, just set the raw value now
753 chainPtr->raw64 = targetVMAddr;
754 return;
755 }
756
757 tmp.arm64e.rebase.target = targetVMAddr;
758 if ( tmp.arm64e.rebase.target == targetVMAddr ) {
759 // target dyld cache address fits in plain rebase, so all we need to do is adjust that
760 chainPtr->arm64e.rebase.target = targetVMAddr;
761 return;
762 }
763
764 // target cannot fit into rebase chain, so store target in side table
765 aslrTracker.setRebaseTarget64(chainPtr, targetVMAddr);
766 chainPtr->arm64e.rebase.target = 0; // actual target vmAddr stored in side table
767 }
768 }
769
770
771 template <typename P>
772 void Adjustor<P>::convertGeneric64RebaseToIntermediate(dyld3::MachOLoaded::ChainedFixupPointerOnDisk* chainPtr, CacheBuilder::ASLR_Tracker& aslrTracker, uint64_t targetSlide)
773 {
774 assert( (_chainedFixupsFormat == DYLD_CHAINED_PTR_64) || (_chainedFixupsFormat == DYLD_CHAINED_PTR_64_OFFSET) );
775 dyld3::MachOLoaded::ChainedFixupPointerOnDisk orgPtr = *chainPtr;
776 dyld3::MachOLoaded::ChainedFixupPointerOnDisk tmp;
777
778 uint64_t targetVMAddr = 0;
779 switch (_chainedFixupsFormat) {
780 case DYLD_CHAINED_PTR_64:
781 targetVMAddr = orgPtr.generic64.rebase.target + targetSlide;
782 break;
783 case DYLD_CHAINED_PTR_64_OFFSET:
784 // <rdar://60351693> the rebase target is a vmoffset, so we need to switch to tracking the target out of line
785 targetVMAddr = orgPtr.generic64.rebase.target + _segOrigStartAddresses[0] + targetSlide;
786 aslrTracker.setRebaseTarget64(chainPtr, targetVMAddr);
787 chainPtr->generic64.rebase.target = 0;
788 return;
789 break;
790 default:
791 _diagnostics.error("Unknown chain format");
792 return;
793 }
794
795 // we need to change the rebase to point to the new address in the dyld cache, but it may not fit
796 tmp.generic64.rebase.target = targetVMAddr;
797 if ( tmp.generic64.rebase.target == targetVMAddr ) {
798 // everything fits, just update target
799 chainPtr->generic64.rebase.target = targetVMAddr;
800 return;
801 }
802
803 // target cannot fit into rebase chain, so store target in side table
804 aslrTracker.setRebaseTarget64(chainPtr, targetVMAddr);
805 chainPtr->generic64.rebase.target = 0; // actual target vmAddr stored in side table
806 }
807
808
809 template <typename P>
810 void Adjustor<P>::adjustReference(uint32_t kind, uint8_t* mappedAddr, uint64_t fromNewAddress, uint64_t toNewAddress,
811 int64_t adjust, int64_t targetSlide, uint64_t imageStartAddress, uint64_t imageEndAddress,
812 bool convertRebaseChains,
813 CacheBuilder::ASLR_Tracker& aslrTracker, CacheBuilder::LOH_Tracker* lohTracker,
814 uint32_t*& lastMappedAddr32, uint32_t& lastKind, uint64_t& lastToNewAddress)
815 {
816 uint64_t value64;
817 uint64_t* mappedAddr64 = 0;
818 uint32_t value32;
819 uint32_t* mappedAddr32 = 0;
820 uint32_t instruction;
821 dyld3::MachOLoaded::ChainedFixupPointerOnDisk* chainPtr;
822 uint32_t newPageOffset;
823 int64_t delta;
824 switch ( kind ) {
825 case DYLD_CACHE_ADJ_V2_DELTA_32:
826 mappedAddr32 = (uint32_t*)mappedAddr;
827 value32 = P::E::get32(*mappedAddr32);
828 delta = (int32_t)value32;
829 delta += adjust;
830 if ( (delta > 0x80000000) || (-delta > 0x80000000) ) {
831 _diagnostics.error("DYLD_CACHE_ADJ_V2_DELTA_32 can't be adjust by 0x%016llX in %s", adjust, _dylibID);
832 return;
833 }
834 P::E::set32(*mappedAddr32, (int32_t)delta);
835 break;
836 case DYLD_CACHE_ADJ_V2_POINTER_32:
837 mappedAddr32 = (uint32_t*)mappedAddr;
838 if ( _chainedFixupsCmd != nullptr ) {
839 chainPtr = (dyld3::MachOLoaded::ChainedFixupPointerOnDisk*)mappedAddr32;
840 switch (_chainedFixupsFormat) {
841 case DYLD_CHAINED_PTR_32:
842 // ignore binds, fix up rebases to have new targets
843 if ( chainPtr->generic32.rebase.bind == 0 ) {
844 // there is not enough space in 32-bit pointer to store new vmaddr in cache in 26-bit target
845 // so store target in side table that will be applied when binds are resolved
846 aslrTracker.add(mappedAddr32);
847 uint32_t target = (uint32_t)(chainPtr->generic32.rebase.target + targetSlide);
848 aslrTracker.setRebaseTarget32(chainPtr, target);
849 chainPtr->generic32.rebase.target = 0; // actual target stored in side table
850 }
851 break;
852 default:
853 _diagnostics.error("unknown 32-bit chained fixup format %d in %s", _chainedFixupsFormat, _dylibID);
854 break;
855 }
856 }
857 else if ( _mh->usesClassicRelocationsInKernelCollection() ) {
858 // Classic relocs are not guaranteed to be aligned, so always store them in the side table
859 if ( (uint32_t)toNewAddress != (uint32_t)(E::get32(*mappedAddr32) + targetSlide) ) {
860 _diagnostics.error("bad DYLD_CACHE_ADJ_V2_POINTER_32 value not as expected at address 0x%llX in %s", fromNewAddress, _dylibID);
861 return;
862 }
863 aslrTracker.setRebaseTarget32(mappedAddr32, (uint32_t)toNewAddress);
864 E::set32(*mappedAddr32, 0);
865 aslrTracker.add(mappedAddr32);
866 }
867 else {
868 if ( toNewAddress != (uint64_t)(E::get32(*mappedAddr32) + targetSlide) ) {
869 _diagnostics.error("bad DYLD_CACHE_ADJ_V2_POINTER_32 value not as expected at address 0x%llX in %s", fromNewAddress, _dylibID);
870 return;
871 }
872 E::set32(*mappedAddr32, (uint32_t)toNewAddress);
873 aslrTracker.add(mappedAddr32);
874 }
875 break;
876 case DYLD_CACHE_ADJ_V2_POINTER_64:
877 mappedAddr64 = (uint64_t*)mappedAddr;
878 if ( _chainedFixupsCmd != nullptr ) {
879 chainPtr = (dyld3::MachOLoaded::ChainedFixupPointerOnDisk*)mappedAddr64;
880 switch (_chainedFixupsFormat) {
881 case DYLD_CHAINED_PTR_ARM64E:
882 case DYLD_CHAINED_PTR_ARM64E_USERLAND:
883 case DYLD_CHAINED_PTR_ARM64E_USERLAND24:
884 case DYLD_CHAINED_PTR_ARM64E_KERNEL:
885 // ignore binds and adjust rebases to new segment locations
886 if ( chainPtr->arm64e.authRebase.bind == 0 ) {
887 convertArm64eRebaseToIntermediate(chainPtr, aslrTracker, targetSlide, convertRebaseChains);
888 // Note, the pointer remains a chain with just the target of the rebase adjusted to the new target location
889 aslrTracker.add(chainPtr);
890 }
891 break;
892 case DYLD_CHAINED_PTR_64:
893 case DYLD_CHAINED_PTR_64_OFFSET:
894 // ignore binds and adjust rebases to new segment locations
895 if ( chainPtr->generic64.rebase.bind == 0 ) {
896 convertGeneric64RebaseToIntermediate(chainPtr, aslrTracker, targetSlide);
897 // Note, the pointer remains a chain with just the target of the rebase adjusted to the new target location
898 aslrTracker.add(chainPtr);
899 }
900 break;
901 default:
902 _diagnostics.error("unknown 64-bit chained fixup format %d in %s", _chainedFixupsFormat, _dylibID);
903 break;
904 }
905 }
906 else if ( _mh->usesClassicRelocationsInKernelCollection() ) {
907 if ( toNewAddress != (E::get64(*mappedAddr64) + targetSlide) ) {
908 _diagnostics.error("bad DYLD_CACHE_ADJ_V2_POINTER_64 value not as expected at address 0x%llX in %s", fromNewAddress, _dylibID);
909 return;
910 }
911 aslrTracker.setRebaseTarget64(mappedAddr64, toNewAddress);
912 E::set64(*mappedAddr64, 0); // actual target vmAddr stored in side table
913 aslrTracker.add(mappedAddr64);
914 uint8_t high8 = toNewAddress >> 56;
915 if ( high8 )
916 aslrTracker.setHigh8(mappedAddr64, high8);
917 }
918 else {
919 if ( toNewAddress != (E::get64(*mappedAddr64) + targetSlide) ) {
920 _diagnostics.error("bad DYLD_CACHE_ADJ_V2_POINTER_64 value not as expected at address 0x%llX in %s", fromNewAddress, _dylibID);
921 return;
922 }
923 E::set64(*mappedAddr64, toNewAddress);
924 aslrTracker.add(mappedAddr64);
925 uint8_t high8 = toNewAddress >> 56;
926 if ( high8 )
927 aslrTracker.setHigh8(mappedAddr64, high8);
928 }
929 break;
930 case DYLD_CACHE_ADJ_V2_THREADED_POINTER_64:
931 // old style arm64e binary
932 chainPtr = (dyld3::MachOLoaded::ChainedFixupPointerOnDisk*)mappedAddr;
933 // ignore binds, they are proccessed later
934 if ( chainPtr->arm64e.authRebase.bind == 0 ) {
935 convertArm64eRebaseToIntermediate(chainPtr, aslrTracker, targetSlide, convertRebaseChains);
936 // Note, the pointer remains a chain with just the target of the rebase adjusted to the new target location
937 aslrTracker.add(chainPtr);
938 }
939 break;
940 case DYLD_CACHE_ADJ_V2_DELTA_64:
941 mappedAddr64 = (uint64_t*)mappedAddr;
942 value64 = P::E::get64(*mappedAddr64);
943 E::set64(*mappedAddr64, value64 + adjust);
944 break;
945 case DYLD_CACHE_ADJ_V2_IMAGE_OFF_32:
946 if ( adjust == 0 )
947 break;
948 mappedAddr32 = (uint32_t*)mappedAddr;
949 value32 = P::E::get32(*mappedAddr32);
950 value64 = toNewAddress - imageStartAddress;
951 if ( value64 > imageEndAddress ) {
952 _diagnostics.error("DYLD_CACHE_ADJ_V2_IMAGE_OFF_32 can't be adjust to 0x%016llX in %s", toNewAddress, _dylibID);
953 return;
954 }
955 P::E::set32(*mappedAddr32, (uint32_t)value64);
956 break;
957 case DYLD_CACHE_ADJ_V2_ARM64_ADRP:
958 mappedAddr32 = (uint32_t*)mappedAddr;
959 if (lohTracker)
960 (*lohTracker)[toNewAddress].insert(mappedAddr);
961 instruction = P::E::get32(*mappedAddr32);
962 if ( (instruction & 0x9F000000) == 0x90000000 ) {
963 int64_t pageDistance = ((toNewAddress & ~0xFFF) - (fromNewAddress & ~0xFFF));
964 int64_t newPage21 = pageDistance >> 12;
965 if ( (newPage21 > 2097151) || (newPage21 < -2097151) ) {
966 _diagnostics.error("DYLD_CACHE_ADJ_V2_ARM64_ADRP can't be adjusted that far in %s", _dylibID);
967 return;
968 }
969 instruction = (instruction & 0x9F00001F) | ((newPage21 << 29) & 0x60000000) | ((newPage21 << 3) & 0x00FFFFE0);
970 P::E::set32(*mappedAddr32, instruction);
971 }
972 else {
973 // ADRP instructions are sometimes optimized to other instructions (e.g. ADR) after the split-seg-info is generated
974 }
975 break;
976 case DYLD_CACHE_ADJ_V2_ARM64_OFF12:
977 mappedAddr32 = (uint32_t*)mappedAddr;
978 if (lohTracker)
979 (*lohTracker)[toNewAddress].insert(mappedAddr);
980 instruction = P::E::get32(*mappedAddr32);
981 // This is a page offset, so if we pack both the __TEXT page with the add/ldr, and
982 // the destination page with the target data, then the adjust isn't correct. Instead
983 // we always want the page offset of the target, ignoring where the source add/ldr slid
984 newPageOffset = (uint32_t)(toNewAddress & 0xFFF);
985 if ( (instruction & 0x3B000000) == 0x39000000 ) {
986 // LDR/STR imm12
987 uint32_t encodedAddend = ((instruction & 0x003FFC00) >> 10);
988 uint32_t newAddend = 0;
989 switch ( instruction & 0xC0000000 ) {
990 case 0x00000000:
991 if ( (instruction & 0x04800000) == 0x04800000 ) {
992 if ( newPageOffset & 0xF ) {
993 _diagnostics.error("can't adjust off12 scale=16 instruction to %d bytes at mapped address=%p in %s", newPageOffset, mappedAddr, _dylibID);
994 return;
995 }
996 if ( encodedAddend*16 >= 4096 ) {
997 _diagnostics.error("off12 scale=16 instruction points outside its page at mapped address=%p in %s", mappedAddr, _dylibID);
998 }
999 newAddend = (newPageOffset/16);
1000 }
1001 else {
1002 // scale=1
1003 newAddend = newPageOffset;
1004 }
1005 break;
1006 case 0x40000000:
1007 if ( newPageOffset & 1 ) {
1008 _diagnostics.error("can't adjust off12 scale=2 instruction to %d bytes at mapped address=%p in %s", newPageOffset, mappedAddr, _dylibID);
1009 return;
1010 }
1011 if ( encodedAddend*2 >= 4096 ) {
1012 _diagnostics.error("off12 scale=2 instruction points outside its page at mapped address=%p in %s", mappedAddr, _dylibID);
1013 return;
1014 }
1015 newAddend = (newPageOffset/2);
1016 break;
1017 case 0x80000000:
1018 if ( newPageOffset & 3 ) {
1019 _diagnostics.error("can't adjust off12 scale=4 instruction to %d bytes at mapped address=%p in %s", newPageOffset, mappedAddr, _dylibID);
1020 return;
1021 }
1022 if ( encodedAddend*4 >= 4096 ) {
1023 _diagnostics.error("off12 scale=4 instruction points outside its page at mapped address=%p in %s", mappedAddr, _dylibID);
1024 return;
1025 }
1026 newAddend = (newPageOffset/4);
1027 break;
1028 case 0xC0000000:
1029 if ( newPageOffset & 7 ) {
1030 _diagnostics.error("can't adjust off12 scale=8 instruction to %d bytes at mapped address=%p in %s", newPageOffset, mappedAddr, _dylibID);
1031 return;
1032 }
1033 if ( encodedAddend*8 >= 4096 ) {
1034 _diagnostics.error("off12 scale=8 instruction points outside its page at mapped address=%p in %s", mappedAddr, _dylibID);
1035 return;
1036 }
1037 newAddend = (newPageOffset/8);
1038 break;
1039 }
1040 uint32_t newInstruction = (instruction & 0xFFC003FF) | (newAddend << 10);
1041 P::E::set32(*mappedAddr32, newInstruction);
1042 }
1043 else if ( (instruction & 0xFFC00000) == 0x91000000 ) {
1044 // ADD imm12
1045 if ( instruction & 0x00C00000 ) {
1046 _diagnostics.error("ADD off12 uses shift at mapped address=%p in %s", mappedAddr, _dylibID);
1047 return;
1048 }
1049 uint32_t newAddend = newPageOffset;
1050 uint32_t newInstruction = (instruction & 0xFFC003FF) | (newAddend << 10);
1051 P::E::set32(*mappedAddr32, newInstruction);
1052 }
1053 else if ( instruction != 0xD503201F ) {
1054 // ignore imm12 instructions optimized into a NOP, but warn about others
1055 _diagnostics.error("unknown off12 instruction 0x%08X at 0x%0llX in %s", instruction, fromNewAddress, _dylibID);
1056 return;
1057 }
1058 break;
1059 case DYLD_CACHE_ADJ_V2_THUMB_MOVW_MOVT:
1060 mappedAddr32 = (uint32_t*)mappedAddr;
1061 // to update a movw/movt pair we need to extract the 32-bit they will make,
1062 // add the adjust and write back the new movw/movt pair.
1063 if ( lastKind == kind ) {
1064 if ( lastToNewAddress == toNewAddress ) {
1065 uint32_t instruction1 = P::E::get32(*lastMappedAddr32);
1066 uint32_t instruction2 = P::E::get32(*mappedAddr32);
1067 if ( isThumbMovw(instruction1) && isThumbMovt(instruction2) ) {
1068 uint16_t high = getThumbWord(instruction2);
1069 uint16_t low = getThumbWord(instruction1);
1070 uint32_t full = high << 16 | low;
1071 full += adjust;
1072 instruction1 = setThumbWord(instruction1, full & 0xFFFF);
1073 instruction2 = setThumbWord(instruction2, full >> 16);
1074 }
1075 else if ( isThumbMovt(instruction1) && isThumbMovw(instruction2) ) {
1076 uint16_t high = getThumbWord(instruction1);
1077 uint16_t low = getThumbWord(instruction2);
1078 uint32_t full = high << 16 | low;
1079 full += adjust;
1080 instruction2 = setThumbWord(instruction2, full & 0xFFFF);
1081 instruction1 = setThumbWord(instruction1, full >> 16);
1082 }
1083 else {
1084 _diagnostics.error("two DYLD_CACHE_ADJ_V2_THUMB_MOVW_MOVT in a row but not paried in %s", _dylibID);
1085 return;
1086 }
1087 P::E::set32(*lastMappedAddr32, instruction1);
1088 P::E::set32(*mappedAddr32, instruction2);
1089 kind = 0;
1090 }
1091 else {
1092 _diagnostics.error("two DYLD_CACHE_ADJ_V2_THUMB_MOVW_MOVT in a row but target different addresses in %s", _dylibID);
1093 return;
1094 }
1095 }
1096 break;
1097 case DYLD_CACHE_ADJ_V2_ARM_MOVW_MOVT:
1098 mappedAddr32 = (uint32_t*)mappedAddr;
1099 // to update a movw/movt pair we need to extract the 32-bit they will make,
1100 // add the adjust and write back the new movw/movt pair.
1101 if ( lastKind == kind ) {
1102 if ( lastToNewAddress == toNewAddress ) {
1103 uint32_t instruction1 = P::E::get32(*lastMappedAddr32);
1104 uint32_t instruction2 = P::E::get32(*mappedAddr32);
1105 if ( isArmMovw(instruction1) && isArmMovt(instruction2) ) {
1106 uint16_t high = getArmWord(instruction2);
1107 uint16_t low = getArmWord(instruction1);
1108 uint32_t full = high << 16 | low;
1109 full += adjust;
1110 instruction1 = setArmWord(instruction1, full & 0xFFFF);
1111 instruction2 = setArmWord(instruction2, full >> 16);
1112 }
1113 else if ( isArmMovt(instruction1) && isArmMovw(instruction2) ) {
1114 uint16_t high = getArmWord(instruction1);
1115 uint16_t low = getArmWord(instruction2);
1116 uint32_t full = high << 16 | low;
1117 full += adjust;
1118 instruction2 = setArmWord(instruction2, full & 0xFFFF);
1119 instruction1 = setArmWord(instruction1, full >> 16);
1120 }
1121 else {
1122 _diagnostics.error("two DYLD_CACHE_ADJ_V2_ARM_MOVW_MOVT in a row but not paired in %s", _dylibID);
1123 return;
1124 }
1125 P::E::set32(*lastMappedAddr32, instruction1);
1126 P::E::set32(*mappedAddr32, instruction2);
1127 kind = 0;
1128 }
1129 else {
1130 _diagnostics.error("two DYLD_CACHE_ADJ_V2_ARM_MOVW_MOVT in a row but target different addresses in %s", _dylibID);
1131 return;
1132 }
1133 }
1134 break;
1135 case DYLD_CACHE_ADJ_V2_ARM64_BR26: {
1136 if ( adjust == 0 )
1137 break;
1138 mappedAddr32 = (uint32_t*)mappedAddr;
1139 instruction = P::E::get32(*mappedAddr32);
1140
1141 int64_t deltaToFinalTarget = toNewAddress - fromNewAddress;
1142 // Make sure the target is in range
1143 static const int64_t b128MegLimit = 0x07FFFFFF;
1144 if ( (deltaToFinalTarget > -b128MegLimit) && (deltaToFinalTarget < b128MegLimit) ) {
1145 instruction = (instruction & 0xFC000000) | ((deltaToFinalTarget >> 2) & 0x03FFFFFF);
1146 P::E::set32(*mappedAddr32, instruction);
1147 break;
1148 } else {
1149 _diagnostics.error("br26 instruction exceeds maximum range at mapped address=%p in %s", mappedAddr, _dylibID);
1150 return;
1151 }
1152 }
1153 case DYLD_CACHE_ADJ_V2_THUMB_BR22:
1154 case DYLD_CACHE_ADJ_V2_ARM_BR24:
1155 // nothing to do with calls to stubs
1156 break;
1157 default:
1158 _diagnostics.error("unknown split seg kind=%d in %s", kind, _dylibID);
1159 return;
1160 }
1161 lastKind = kind;
1162 lastToNewAddress = toNewAddress;
1163 lastMappedAddr32 = mappedAddr32;
1164 }
1165
1166 template <typename P>
1167 void Adjustor<P>::adjustReferencesUsingInfoV2(CacheBuilder::ASLR_Tracker& aslrTracker,
1168 CacheBuilder::LOH_Tracker* lohTracker,
1169 const CacheBuilder::CacheCoalescedText* coalescedText,
1170 const CacheBuilder::DylibTextCoalescer& textCoalescer)
1171 {
1172 static const bool logDefault = false;
1173 bool log = logDefault;
1174
1175 const uint8_t* infoStart = &_linkeditBias[_splitSegInfoCmd->dataoff];
1176 const uint8_t* infoEnd = &infoStart[_splitSegInfoCmd->datasize];
1177 if ( *infoStart++ != DYLD_CACHE_ADJ_V2_FORMAT ) {
1178 _diagnostics.error("malformed split seg info in %s", _dylibID);
1179 return;
1180 }
1181 // build section arrays of slide and mapped address for each section
1182 std::vector<uint64_t> sectionSlides;
1183 std::vector<uint64_t> sectionNewAddress;
1184 std::vector<uint8_t*> sectionMappedAddress;
1185
1186 // Also track coalesced sections, if we have any
1187 typedef CacheBuilder::DylibTextCoalescer::DylibSectionOffsetToCacheSectionOffset DylibSectionOffsetToCacheSectionOffset;
1188 std::vector<uint64_t> coalescedSectionOriginalVMAddrs;
1189 std::vector<uint64_t> coalescedSectionNewVMAddrs;
1190 std::vector<uint8_t*> coalescedSectionBufferAddrs;
1191 std::vector<const DylibSectionOffsetToCacheSectionOffset*> coalescedSectionOffsetMaps;
1192 std::vector<uint64_t> coalescedSectionObjcTags;
1193
1194 sectionSlides.reserve(16);
1195 sectionNewAddress.reserve(16);
1196 sectionMappedAddress.reserve(16);
1197 coalescedSectionOriginalVMAddrs.reserve(16);
1198 coalescedSectionNewVMAddrs.reserve(16);
1199 coalescedSectionBufferAddrs.reserve(16);
1200 coalescedSectionOffsetMaps.reserve(16);
1201 coalescedSectionObjcTags.reserve(16);
1202
1203 // section index 0 refers to mach_header
1204 sectionMappedAddress.push_back((uint8_t*)_mappingInfo[0].dstSegment);
1205 sectionSlides.push_back(_segSlides[0]);
1206 sectionNewAddress.push_back(_mappingInfo[0].dstCacheUnslidAddress);
1207 coalescedSectionOriginalVMAddrs.push_back(0);
1208 coalescedSectionNewVMAddrs.push_back(0);
1209 coalescedSectionBufferAddrs.push_back(nullptr);
1210 coalescedSectionOffsetMaps.push_back(nullptr);
1211 coalescedSectionObjcTags.push_back(0);
1212
1213 uint64_t imageStartAddress = sectionNewAddress.front();
1214 uint64_t imageEndAddress = 0;
1215
1216 // section 1 and later refer to real sections
1217 unsigned sectionIndex = 0;
1218 unsigned objcSelRefsSectionIndex = ~0U;
1219 for (unsigned segmentIndex=0; segmentIndex < _segCmds.size(); ++segmentIndex) {
1220 macho_segment_command<P>* segCmd = _segCmds[segmentIndex];
1221 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
1222 macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
1223
1224 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
1225 if ( textCoalescer.sectionWasCoalesced(sect->segname(), sect->sectname())) {
1226 // If we coalesced the segment then the sections aren't really there to be fixed up
1227 const DylibSectionOffsetToCacheSectionOffset& offsetMap = textCoalescer.getSectionCoalescer(sect->segname(),
1228 sect->sectname());
1229 uint64_t coalescedSectionNewVMAddr = coalescedText->getSectionVMAddr(sect->segname(), sect->sectname());
1230 uint8_t* coalescedSectionNewBufferAddr = coalescedText->getSectionBufferAddr(sect->segname(), sect->sectname());
1231 uint64_t coalescedSectionObjcTag = coalescedText->getSectionObjcTag(sect->segname(), sect->sectname());
1232 sectionMappedAddress.push_back(nullptr);
1233 sectionSlides.push_back(0);
1234 sectionNewAddress.push_back(0);
1235 coalescedSectionOriginalVMAddrs.push_back(sect->addr());
1236 coalescedSectionNewVMAddrs.push_back(coalescedSectionNewVMAddr);
1237 coalescedSectionBufferAddrs.push_back(coalescedSectionNewBufferAddr);
1238 coalescedSectionOffsetMaps.push_back(&offsetMap);
1239 coalescedSectionObjcTags.push_back(coalescedSectionObjcTag);
1240 ++sectionIndex;
1241 if (log) {
1242 fprintf(stderr, " %s/%s, sectIndex=%d, mapped at=%p\n",
1243 sect->segname(), sect->sectname(), sectionIndex, sectionMappedAddress.back());
1244 }
1245 } else {
1246 sectionMappedAddress.push_back((uint8_t*)_mappingInfo[segmentIndex].dstSegment + sect->addr() - segCmd->vmaddr());
1247 sectionSlides.push_back(_segSlides[segmentIndex]);
1248 sectionNewAddress.push_back(_mappingInfo[segmentIndex].dstCacheUnslidAddress + sect->addr() - segCmd->vmaddr());
1249 coalescedSectionOriginalVMAddrs.push_back(0);
1250 coalescedSectionNewVMAddrs.push_back(0);
1251 coalescedSectionBufferAddrs.push_back(nullptr);
1252 coalescedSectionOffsetMaps.push_back(nullptr);
1253 coalescedSectionObjcTags.push_back(0);
1254 ++sectionIndex;
1255 if (log) {
1256 fprintf(stderr, " %s/%s, sectIndex=%d, mapped at=%p\n",
1257 sect->segname(), sect->sectname(), sectionIndex, sectionMappedAddress.back());
1258 }
1259 if (!strcmp(sect->segname(), "__DATA") && !strcmp(sect->sectname(), "__objc_selrefs"))
1260 objcSelRefsSectionIndex = sectionIndex;
1261
1262 imageEndAddress = sectionNewAddress.back();
1263 }
1264 }
1265 }
1266
1267 // Whole :== <count> FromToSection+
1268 // FromToSection :== <from-sect-index> <to-sect-index> <count> ToOffset+
1269 // ToOffset :== <to-sect-offset-delta> <count> FromOffset+
1270 // FromOffset :== <kind> <count> <from-sect-offset-delta>
1271 const uint8_t* p = infoStart;
1272 uint64_t sectionCount = read_uleb128(p, infoEnd);
1273 for (uint64_t i=0; i < sectionCount; ++i) {
1274 uint32_t* lastMappedAddr32 = NULL;
1275 uint32_t lastKind = 0;
1276 uint64_t lastToNewAddress = 0;
1277 uint64_t fromSectionIndex = read_uleb128(p, infoEnd);
1278 uint64_t toSectionIndex = read_uleb128(p, infoEnd);
1279 uint64_t toOffsetCount = read_uleb128(p, infoEnd);
1280 uint64_t fromSectionSlide = sectionSlides[fromSectionIndex];
1281 uint64_t fromSectionNewAddress = sectionNewAddress[fromSectionIndex];
1282 uint8_t* fromSectionMappedAddress = sectionMappedAddress[fromSectionIndex];
1283 uint64_t toSectionSlide = sectionSlides[toSectionIndex];
1284 uint64_t toSectionNewAddress = sectionNewAddress[toSectionIndex];
1285 CacheBuilder::LOH_Tracker* lohTrackerPtr = (toSectionIndex == objcSelRefsSectionIndex) ? lohTracker : nullptr;
1286 if (log) printf(" from sect=%lld (mapped=%p), to sect=%lld (new addr=0x%llX):\n", fromSectionIndex, fromSectionMappedAddress, toSectionIndex, toSectionNewAddress);
1287 uint64_t toSectionOffset = 0;
1288
1289 for (uint64_t j=0; j < toOffsetCount; ++j) {
1290 uint64_t toSectionDelta = read_uleb128(p, infoEnd);
1291 uint64_t fromOffsetCount = read_uleb128(p, infoEnd);
1292 toSectionOffset += toSectionDelta;
1293 for (uint64_t k=0; k < fromOffsetCount; ++k) {
1294 uint64_t kind = read_uleb128(p, infoEnd);
1295 if ( kind > 13 ) {
1296 _diagnostics.error("unknown split seg info v2 kind value (%llu) in %s", kind, _dylibID);
1297 return;
1298 }
1299 uint64_t fromSectDeltaCount = read_uleb128(p, infoEnd);
1300 uint64_t fromSectionOffset = 0;
1301 for (uint64_t l=0; l < fromSectDeltaCount; ++l) {
1302 uint64_t delta = read_uleb128(p, infoEnd);
1303 fromSectionOffset += delta;
1304 if (log) printf(" kind=%lld, from offset=0x%0llX, to offset=0x%0llX, adjust=0x%llX, targetSlide=0x%llX\n", kind, fromSectionOffset, toSectionOffset, delta, toSectionSlide);
1305
1306 // It's possible for all either of from/to sectiobs to be coalesced/optimized.
1307 // Handle each of those combinations.
1308 uint8_t* fromMappedAddr = nullptr;
1309 uint64_t fromNewAddress = 0;
1310 uint64_t fromAtomSlide = 0;
1311 bool convertRebaseChains = false;
1312 if ( coalescedSectionOffsetMaps[fromSectionIndex] != nullptr ) {
1313 // From was optimized/coalesced
1314 // This is only supported on pointer kind fixups, ie, pointers in RW segments
1315 assert( (kind == DYLD_CACHE_ADJ_V2_POINTER_64) || (kind == DYLD_CACHE_ADJ_V2_THREADED_POINTER_64) );
1316 // Find where the atom moved to with its new section
1317 // CFString's and similar may have fixups in the middle of the atom, but the map only
1318 // tracks the start offset for the atom. We use lower_bound to find the atom containing
1319 // the offset we are looking for
1320 const DylibSectionOffsetToCacheSectionOffset* offsetMap = coalescedSectionOffsetMaps[fromSectionIndex];
1321 auto offsetIt = offsetMap->lower_bound((uint32_t)fromSectionOffset);
1322 if ( offsetIt->first != fromSectionOffset ) {
1323 // This points to the middle of the atom, so check the previous atom
1324 assert(offsetIt != offsetMap->begin());
1325 --offsetIt;
1326 assert(offsetIt->first <= fromSectionOffset);
1327 }
1328 assert(offsetIt != offsetMap->end());
1329 // FIXME: Other CF type's have different atom sizes
1330 uint64_t offsetInAtom = fromSectionOffset - offsetIt->first;
1331 assert(offsetInAtom < (uint64_t)DyldSharedCache::ConstantClasses::cfStringAtomSize);
1332
1333 uint8_t* baseMappedAddr = coalescedSectionBufferAddrs[fromSectionIndex];
1334 fromMappedAddr = baseMappedAddr + offsetIt->second + offsetInAtom;
1335 uint64_t baseVMAddr = coalescedSectionNewVMAddrs[fromSectionIndex];
1336 fromNewAddress = baseVMAddr + offsetIt->second + offsetInAtom;
1337
1338 // The 'from' section is gone, but we still need the 'from' slide. Instead of a section slide,
1339 // compute the slide for this individual atom
1340 uint64_t fromAtomOriginalVMAddr = coalescedSectionOriginalVMAddrs[fromSectionIndex] + fromSectionOffset;
1341 fromAtomSlide = fromNewAddress - fromAtomOriginalVMAddr;
1342 convertRebaseChains = true;
1343 } else {
1344 // From was not optimized/coalesced
1345 fromMappedAddr = fromSectionMappedAddress + fromSectionOffset;
1346 fromNewAddress = fromSectionNewAddress + fromSectionOffset;
1347 fromAtomSlide = fromSectionSlide;
1348 }
1349
1350 uint64_t toNewAddress = 0;
1351 uint64_t toAtomSlide = 0;
1352 if ( coalescedSectionOffsetMaps[toSectionIndex] != nullptr ) {
1353 // To was optimized/coalesced
1354 const DylibSectionOffsetToCacheSectionOffset* offsetMap = coalescedSectionOffsetMaps[toSectionIndex];
1355 auto offsetIt = offsetMap->find((uint32_t)toSectionOffset);
1356 assert(offsetIt != offsetMap->end());
1357 uint64_t baseVMAddr = coalescedSectionNewVMAddrs[toSectionIndex];
1358 toNewAddress = baseVMAddr + offsetIt->second;
1359
1360 // Add in the high bits which are the tagged pointer TBI bits
1361 toNewAddress |= coalescedSectionObjcTags[toSectionIndex];
1362
1363 // The 'to' section is gone, but we still need the 'to' slide. Instead of a section slide,
1364 // compute the slide for this individual atom
1365 uint64_t toAtomOriginalVMAddr = coalescedSectionOriginalVMAddrs[toSectionIndex] + toSectionOffset;
1366 toAtomSlide = toNewAddress - toAtomOriginalVMAddr;
1367 } else {
1368 // To was not optimized/coalesced
1369 toNewAddress = toSectionNewAddress + toSectionOffset;
1370 toAtomSlide = toSectionSlide;
1371 }
1372
1373 int64_t deltaAdjust = toAtomSlide - fromAtomSlide;
1374 if (log) {
1375 printf(" kind=%lld, from offset=0x%0llX, to offset=0x%0llX, adjust=0x%llX, targetSlide=0x%llX\n",
1376 kind, fromSectionOffset, toSectionOffset, deltaAdjust, toSectionSlide);
1377 }
1378 adjustReference((uint32_t)kind, fromMappedAddr, fromNewAddress, toNewAddress, deltaAdjust,
1379 toAtomSlide, imageStartAddress, imageEndAddress, convertRebaseChains,
1380 aslrTracker, lohTrackerPtr,
1381 lastMappedAddr32, lastKind, lastToNewAddress);
1382 if ( _diagnostics.hasError() )
1383 return;
1384 }
1385 }
1386 }
1387 }
1388
1389 }
1390
1391 template <typename P>
1392 void Adjustor<P>::adjustRebaseChains(CacheBuilder::ASLR_Tracker& aslrTracker)
1393 {
1394 const dyld_chained_fixups_header* chainHeader = (dyld_chained_fixups_header*)(&_linkeditBias[_chainedFixupsCmd->dataoff]);
1395 const dyld_chained_starts_in_image* startsInfo = (dyld_chained_starts_in_image*)((uint8_t*)chainHeader + chainHeader->starts_offset);
1396 _mh->forEachFixupInAllChains(_diagnostics, startsInfo, false,
1397 ^(dyld3::MachOLoaded::ChainedFixupPointerOnDisk* fixupLoc, const dyld_chained_starts_in_segment* segInfo, bool& stop) {
1398 switch ( segInfo->pointer_format ) {
1399 case DYLD_CHAINED_PTR_64:
1400 // only look at rebases
1401 if ( fixupLoc->generic64.rebase.bind == 0 ) {
1402 uint64_t rebaseTargetInDylib = fixupLoc->generic64.rebase.target;
1403 uint64_t rebaseTargetInDyldcache = fixupLoc->generic64.rebase.target + slideForOrigAddress(rebaseTargetInDylib);
1404 convertGeneric64RebaseToIntermediate(fixupLoc, aslrTracker, rebaseTargetInDyldcache);
1405 aslrTracker.add(fixupLoc);
1406 }
1407 break;
1408 case DYLD_CHAINED_PTR_64_OFFSET:
1409 _diagnostics.error("unhandled 64-bit chained fixup format %d in %s", _chainedFixupsFormat, _dylibID);
1410 break;
1411 default:
1412 _diagnostics.error("unsupported chained fixup format %d", segInfo->pointer_format);
1413 stop = true;
1414 }
1415 });
1416 }
1417
1418 static int uint32Sorter(const void* l, const void* r) {
1419 if ( *((uint32_t*)l) < *((uint32_t*)r) )
1420 return -1;
1421 else
1422 return 1;
1423 }
1424
1425 template <typename P>
1426 static uint64_t localRelocBaseAddress(const dyld3::MachOAnalyzer* ma,
1427 std::vector<macho_segment_command<P>*> segCmds,
1428 std::vector<uint64_t> segOrigStartAddresses)
1429 {
1430 if ( ma->isArch("x86_64") || ma->isArch("x86_64h") ) {
1431 #if BUILDING_APP_CACHE_UTIL
1432 if ( ma->isKextBundle() ) {
1433 // for kext bundles the reloc base address starts at __TEXT segment
1434 return segOrigStartAddresses[0];
1435 }
1436 #endif
1437 // for all other kinds, the x86_64 reloc base address starts at first writable segment (usually __DATA)
1438 for (uint32_t i=0; i < segCmds.size(); ++i) {
1439 if ( segCmds[i]->initprot() & VM_PROT_WRITE )
1440 return segOrigStartAddresses[i];
1441 }
1442 }
1443 return segOrigStartAddresses[0];
1444 }
1445
1446 static bool segIndexAndOffsetForAddress(uint64_t addr, const std::vector<uint64_t>& segOrigStartAddresses,
1447 std::vector<uint64_t> segSizes, uint32_t& segIndex, uint64_t& segOffset)
1448 {
1449 for (uint32_t i=0; i < segOrigStartAddresses.size(); ++i) {
1450 if ( (segOrigStartAddresses[i] <= addr) && (addr < (segOrigStartAddresses[i] + segSizes[i])) ) {
1451 segIndex = i;
1452 segOffset = addr - segOrigStartAddresses[i];
1453 return true;
1454 }
1455 }
1456 return false;
1457 }
1458
1459 template <typename P>
1460 void Adjustor<P>::adjustDataPointers(CacheBuilder::ASLR_Tracker& aslrTracker)
1461 {
1462 if ( (_dynSymTabCmd != nullptr) && (_dynSymTabCmd->locreloff != 0) ) {
1463 // kexts may have old style relocations instead of dyldinfo rebases
1464 assert(_dyldInfo == nullptr);
1465
1466 // old binary, walk relocations
1467 const uint64_t relocsStartAddress = localRelocBaseAddress(_mh, _segCmds, _segOrigStartAddresses);
1468 const relocation_info* const relocsStart = (const relocation_info* const)&_linkeditBias[_dynSymTabCmd->locreloff];
1469 const relocation_info* const relocsEnd = &relocsStart[_dynSymTabCmd->nlocrel];
1470 bool stop = false;
1471 const uint8_t relocSize = (_mh->is64() ? 3 : 2);
1472 STACK_ALLOC_OVERFLOW_SAFE_ARRAY(uint32_t, relocAddrs, 2048);
1473 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
1474 if ( reloc->r_length != relocSize ) {
1475 _diagnostics.error("local relocation has wrong r_length");
1476 break;
1477 }
1478 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
1479 _diagnostics.error("local relocation has wrong r_type");
1480 break;
1481 }
1482 relocAddrs.push_back(reloc->r_address);
1483 }
1484 if ( !relocAddrs.empty() ) {
1485 ::qsort(&relocAddrs[0], relocAddrs.count(), sizeof(uint32_t), &uint32Sorter);
1486 for (uint32_t addrOff : relocAddrs) {
1487 uint32_t segIndex = 0;
1488 uint64_t segOffset = 0;
1489 if ( segIndexAndOffsetForAddress(relocsStartAddress+addrOff, _segOrigStartAddresses, _segSizes, segIndex, segOffset) ) {
1490 uint8_t type = REBASE_TYPE_POINTER;
1491 assert(_mh->cputype != CPU_TYPE_I386);
1492 slidePointer(segIndex, segOffset, type, aslrTracker);
1493 }
1494 else {
1495 _diagnostics.error("local relocation has out of range r_address");
1496 break;
1497 }
1498 }
1499 }
1500 // then process indirect symbols
1501 // FIXME: Do we need indirect symbols? Aren't those handled as binds?
1502
1503 return;
1504 }
1505
1506 if ( _dyldInfo == NULL )
1507 return;
1508
1509 const uint8_t* p = &_linkeditBias[_dyldInfo->rebase_off];
1510 const uint8_t* end = &p[_dyldInfo->rebase_size];
1511
1512 uint8_t type = 0;
1513 int segIndex = 0;
1514 uint64_t segOffset = 0;
1515 uint64_t count;
1516 uint64_t skip;
1517 bool done = false;
1518 while ( !done && (p < end) ) {
1519 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
1520 uint8_t opcode = *p & REBASE_OPCODE_MASK;
1521 ++p;
1522 switch (opcode) {
1523 case REBASE_OPCODE_DONE:
1524 done = true;
1525 break;
1526 case REBASE_OPCODE_SET_TYPE_IMM:
1527 type = immediate;
1528 break;
1529 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1530 segIndex = immediate;
1531 segOffset = read_uleb128(p, end);
1532 break;
1533 case REBASE_OPCODE_ADD_ADDR_ULEB:
1534 segOffset += read_uleb128(p, end);
1535 break;
1536 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
1537 segOffset += immediate*sizeof(pint_t);
1538 break;
1539 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
1540 for (int i=0; i < immediate; ++i) {
1541 slidePointer(segIndex, segOffset, type, aslrTracker);
1542 segOffset += sizeof(pint_t);
1543 }
1544 break;
1545 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
1546 count = read_uleb128(p, end);
1547 for (uint32_t i=0; i < count; ++i) {
1548 slidePointer(segIndex, segOffset, type, aslrTracker);
1549 segOffset += sizeof(pint_t);
1550 }
1551 break;
1552 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
1553 slidePointer(segIndex, segOffset, type, aslrTracker);
1554 segOffset += read_uleb128(p, end) + sizeof(pint_t);
1555 break;
1556 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
1557 count = read_uleb128(p, end);
1558 skip = read_uleb128(p, end);
1559 for (uint32_t i=0; i < count; ++i) {
1560 slidePointer(segIndex, segOffset, type, aslrTracker);
1561 segOffset += skip + sizeof(pint_t);
1562 }
1563 break;
1564 default:
1565 _diagnostics.error("unknown rebase opcode 0x%02X in %s", opcode, _dylibID);
1566 done = true;
1567 break;
1568 }
1569 }
1570 }
1571
1572
1573 template <typename P>
1574 void Adjustor<P>::adjustInstruction(uint8_t kind, uint8_t* textLoc, uint64_t codeToDataDelta)
1575 {
1576 uint32_t* fixupLoc32 = (uint32_t*)textLoc;
1577 uint64_t* fixupLoc64 = (uint64_t*)textLoc;
1578 uint32_t instruction;
1579 uint32_t value32;
1580 uint64_t value64;
1581
1582 switch (kind) {
1583 case 1: // 32-bit pointer (including x86_64 RIP-rel)
1584 value32 = P::E::get32(*fixupLoc32);
1585 value32 += codeToDataDelta;
1586 P::E::set32(*fixupLoc32, value32);
1587 break;
1588 case 2: // 64-bit pointer
1589 value64 = P::E::get64(*fixupLoc64);
1590 value64 += codeToDataDelta;
1591 P::E::set64(*fixupLoc64, value64);
1592 break;
1593 case 4: // only used for i386, a reference to something in the IMPORT segment
1594 break;
1595 case 5: // used by thumb2 movw
1596 instruction = P::E::get32(*fixupLoc32);
1597 // slide is always a multiple of 4096, so only top 4 bits of lo16 will ever need adjusting
1598 value32 = (instruction & 0x0000000F) + ((uint32_t)codeToDataDelta >> 12);
1599 instruction = (instruction & 0xFFFFFFF0) | (value32 & 0x0000000F);
1600 P::E::set32(*fixupLoc32, instruction);
1601 break;
1602 case 6: // used by ARM movw
1603 instruction = P::E::get32(*fixupLoc32);
1604 // slide is always a multiple of 4096, so only top 4 bits of lo16 will ever need adjusting
1605 value32 = ((instruction & 0x000F0000) >> 16) + ((uint32_t)codeToDataDelta >> 12);
1606 instruction = (instruction & 0xFFF0FFFF) | ((value32 <<16) & 0x000F0000);
1607 P::E::set32(*fixupLoc32, instruction);
1608 break;
1609 case 0x10:
1610 case 0x11:
1611 case 0x12:
1612 case 0x13:
1613 case 0x14:
1614 case 0x15:
1615 case 0x16:
1616 case 0x17:
1617 case 0x18:
1618 case 0x19:
1619 case 0x1A:
1620 case 0x1B:
1621 case 0x1C:
1622 case 0x1D:
1623 case 0x1E:
1624 case 0x1F:
1625 // used by thumb2 movt (low nibble of kind is high 4-bits of paired movw)
1626 {
1627 instruction = P::E::get32(*fixupLoc32);
1628 assert((instruction & 0x8000FBF0) == 0x0000F2C0);
1629 // extract 16-bit value from instruction
1630 uint32_t i = ((instruction & 0x00000400) >> 10);
1631 uint32_t imm4 = (instruction & 0x0000000F);
1632 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
1633 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
1634 uint32_t imm16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
1635 // combine with codeToDataDelta and kind nibble
1636 uint32_t targetValue = (imm16 << 16) | ((kind & 0xF) << 12);
1637 uint32_t newTargetValue = targetValue + (uint32_t)codeToDataDelta;
1638 // construct new bits slices
1639 uint32_t imm4_ = (newTargetValue & 0xF0000000) >> 28;
1640 uint32_t i_ = (newTargetValue & 0x08000000) >> 27;
1641 uint32_t imm3_ = (newTargetValue & 0x07000000) >> 24;
1642 uint32_t imm8_ = (newTargetValue & 0x00FF0000) >> 16;
1643 // update instruction to match codeToDataDelta
1644 uint32_t newInstruction = (instruction & 0x8F00FBF0) | imm4_ | (i_ << 10) | (imm3_ << 28) | (imm8_ << 16);
1645 P::E::set32(*fixupLoc32, newInstruction);
1646 }
1647 break;
1648 case 0x20:
1649 case 0x21:
1650 case 0x22:
1651 case 0x23:
1652 case 0x24:
1653 case 0x25:
1654 case 0x26:
1655 case 0x27:
1656 case 0x28:
1657 case 0x29:
1658 case 0x2A:
1659 case 0x2B:
1660 case 0x2C:
1661 case 0x2D:
1662 case 0x2E:
1663 case 0x2F:
1664 // used by arm movt (low nibble of kind is high 4-bits of paired movw)
1665 {
1666 instruction = P::E::get32(*fixupLoc32);
1667 // extract 16-bit value from instruction
1668 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
1669 uint32_t imm12 = (instruction & 0x00000FFF);
1670 uint32_t imm16 = (imm4 << 12) | imm12;
1671 // combine with codeToDataDelta and kind nibble
1672 uint32_t targetValue = (imm16 << 16) | ((kind & 0xF) << 12);
1673 uint32_t newTargetValue = targetValue + (uint32_t)codeToDataDelta;
1674 // construct new bits slices
1675 uint32_t imm4_ = (newTargetValue & 0xF0000000) >> 28;
1676 uint32_t imm12_ = (newTargetValue & 0x0FFF0000) >> 16;
1677 // update instruction to match codeToDataDelta
1678 uint32_t newInstruction = (instruction & 0xFFF0F000) | (imm4_ << 16) | imm12_;
1679 P::E::set32(*fixupLoc32, newInstruction);
1680 }
1681 break;
1682 case 3: // used for arm64 ADRP
1683 instruction = P::E::get32(*fixupLoc32);
1684 if ( (instruction & 0x9F000000) == 0x90000000 ) {
1685 // codeToDataDelta is always a multiple of 4096, so only top 4 bits of lo16 will ever need adjusting
1686 value64 = ((instruction & 0x60000000) >> 17) | ((instruction & 0x00FFFFE0) << 9);
1687 value64 += codeToDataDelta;
1688 instruction = (instruction & 0x9F00001F) | ((value64 << 17) & 0x60000000) | ((value64 >> 9) & 0x00FFFFE0);
1689 P::E::set32(*fixupLoc32, instruction);
1690 }
1691 break;
1692 default:
1693 break;
1694 }
1695 }
1696
1697 template <typename P>
1698 void Adjustor<P>::adjustCode()
1699 {
1700 // find compressed info on how code needs to be updated
1701 if ( _splitSegInfoCmd == nullptr )
1702 return;
1703
1704 const uint8_t* infoStart = &_linkeditBias[_splitSegInfoCmd->dataoff];
1705 const uint8_t* infoEnd = &infoStart[_splitSegInfoCmd->datasize];;
1706
1707 // This encoding only works if all data segments slide by the same amount
1708 uint64_t codeToDataDelta = _segSlides[1] - _segSlides[0];
1709
1710 // compressed data is: [ <kind> [uleb128-delta]+ <0> ] + <0>
1711 for (const uint8_t* p = infoStart; (*p != 0) && (p < infoEnd);) {
1712 uint8_t kind = *p++;
1713 uint8_t* textLoc = (uint8_t*)_mappingInfo[0].dstSegment;
1714 while (uint64_t delta = read_uleb128(p, infoEnd)) {
1715 textLoc += delta;
1716 adjustInstruction(kind, textLoc, codeToDataDelta);
1717 }
1718 }
1719 }
1720
1721
1722 template <typename P>
1723 void Adjustor<P>::adjustExportsTrie(std::vector<uint8_t>& newTrieBytes)
1724 {
1725 // if no export info, nothing to adjust
1726 uint32_t exportOffset = 0;
1727 uint32_t exportSize = 0;
1728 if ( _dyldInfo != nullptr ) {
1729 exportOffset = _dyldInfo->export_off;
1730 exportSize = _dyldInfo->export_size;
1731 } else if (_exportTrieCmd != nullptr) {
1732 exportOffset = _exportTrieCmd->dataoff;
1733 exportSize = _exportTrieCmd->datasize;
1734 }
1735
1736 if ( exportSize == 0 )
1737 return;
1738
1739 // since export info addresses are offsets from mach_header, everything in __TEXT is fine
1740 // only __DATA addresses need to be updated
1741 const uint8_t* start = &_linkeditBias[exportOffset];
1742 const uint8_t* end = &start[exportSize];
1743 std::vector<ExportInfoTrie::Entry> originalExports;
1744 if ( !ExportInfoTrie::parseTrie(start, end, originalExports) ) {
1745 _diagnostics.error("malformed exports trie in %s", _dylibID);
1746 return;
1747 }
1748
1749 std::vector<ExportInfoTrie::Entry> newExports;
1750 newExports.reserve(originalExports.size());
1751 uint64_t baseAddress = _segOrigStartAddresses[0];
1752 uint64_t baseAddressSlide = slideForOrigAddress(baseAddress);
1753 for (auto& entry: originalExports) {
1754 // remove symbols used by the static linker only
1755 if ( (strncmp(entry.name.c_str(), "$ld$", 4) == 0)
1756 || (strncmp(entry.name.c_str(), ".objc_class_name",16) == 0)
1757 || (strncmp(entry.name.c_str(), ".objc_category_name",19) == 0) ) {
1758 continue;
1759 }
1760 // adjust symbols in slid segments
1761 if ( (entry.info.flags & EXPORT_SYMBOL_FLAGS_KIND_MASK) != EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE )
1762 entry.info.address += (slideForOrigAddress(entry.info.address + baseAddress) - baseAddressSlide);
1763 newExports.push_back(entry);
1764 }
1765
1766 // rebuild export trie
1767 newTrieBytes.reserve(exportSize);
1768
1769 ExportInfoTrie(newExports).emit(newTrieBytes);
1770 // align
1771 while ( (newTrieBytes.size() % sizeof(pint_t)) != 0 )
1772 newTrieBytes.push_back(0);
1773 }
1774
1775
1776 } // anonymous namespace
1777
1778 void CacheBuilder::adjustDylibSegments(const DylibInfo& dylib, Diagnostics& diag,
1779 uint64_t cacheBaseAddress,
1780 CacheBuilder::ASLR_Tracker& aslrTracker,
1781 CacheBuilder::LOH_Tracker* lohTracker,
1782 const CacheBuilder::CacheCoalescedText* coalescedText) const
1783 {
1784
1785 dyld3::MachOAnalyzer* mh = (dyld3::MachOAnalyzer*)dylib.cacheLocation[0].dstSegment;
1786 if ( _is64 ) {
1787 Adjustor<Pointer64<LittleEndian>> adjustor64(cacheBaseAddress,
1788 mh,
1789 dylib.dylibID.c_str(),
1790 dylib.cacheLocation, diag);
1791 adjustor64.adjustImageForNewSegmentLocations(aslrTracker, lohTracker, coalescedText, dylib.textCoalescer);
1792 }
1793 else {
1794 Adjustor<Pointer32<LittleEndian>> adjustor32(cacheBaseAddress,
1795 mh,
1796 dylib.dylibID.c_str(),
1797 dylib.cacheLocation, diag);
1798 adjustor32.adjustImageForNewSegmentLocations(aslrTracker, lohTracker, coalescedText, dylib.textCoalescer);
1799 }
1800 }
1801
1802
1803
1804
1805
1806