1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
25 #ifndef __MACHO_REBASER__
26 #define __MACHO_REBASER__
28 #include <sys/types.h>
31 #include <mach/mach.h>
38 #include <mach-o/loader.h>
39 #include <mach-o/fat.h>
40 #include <mach-o/reloc.h>
41 #include <mach-o/x86_64/reloc.h>
42 #include <mach-o/arm/reloc.h>
46 #include "MachOFileAbstraction.hpp"
47 #include "Architectures.hpp"
48 #include "MachOLayout.hpp"
49 #include "MachOTrie.hpp"
56 virtual cpu_type_t getArchitecture() const = 0;
57 virtual uint64_t getBaseAddress() const = 0;
58 virtual uint64_t getVMSize() const = 0;
59 virtual bool rebase(std::vector<void*>&) = 0;
64 class Rebaser : public AbstractRebaser
67 Rebaser(const MachOLayoutAbstraction&);
70 virtual cpu_type_t getArchitecture() const;
71 virtual uint64_t getBaseAddress() const;
72 virtual uint64_t getVMSize() const;
73 virtual bool rebase(std::vector<void*>&);
76 typedef typename A::P P;
77 typedef typename A::P::E E;
78 typedef typename A::P::uint_t pint_t;
80 pint_t* mappedAddressForNewAddress(pint_t vmaddress);
81 pint_t getSlideForNewAddress(pint_t newAddress);
84 void adjustLoadCommands();
85 void adjustSymbolTable();
88 void applyRebaseInfo(std::vector<void*>& pointersInData);
89 void adjustReferencesUsingInfoV2(std::vector<void*>& pointersInData);
90 void adjustReference(uint32_t kind, uint8_t* mappedAddr, uint64_t fromNewAddress, uint64_t toNewAddress, int64_t adjust, int64_t targetSlide,
91 uint64_t imageStartAddress, uint64_t imageEndAddress, std::vector<void*>& pointersInData);
92 bool adjustExportInfo();
93 void doRebase(int segIndex, uint64_t segOffset, uint8_t type, std::vector<void*>& pointersInData);
94 pint_t getSlideForVMAddress(pint_t vmaddress);
95 pint_t maskedVMAddress(pint_t vmaddress);
96 pint_t* mappedAddressForVMAddress(pint_t vmaddress);
97 const uint8_t* doCodeUpdateForEachULEB128Address(const uint8_t* p, uint8_t kind, uint64_t orgBaseAddress, int64_t codeToDataDelta, int64_t codeToImportDelta);
98 void doCodeUpdate(uint8_t kind, uint64_t address, int64_t codeToDataDelta, int64_t codeToImportDelta);
99 void doLocalRelocation(const macho_relocation_info<P>* reloc);
100 bool unequalSlides() const;
103 const macho_header<P>* fHeader;
104 uint8_t* fLinkEditBase; // add file offset to this to get linkedit content
105 const MachOLayoutAbstraction& fLayout;
107 const macho_symtab_command<P>* fSymbolTable;
108 const macho_dysymtab_command<P>* fDynamicSymbolTable;
109 const macho_dyld_info_command<P>* fDyldInfo;
110 const macho_linkedit_data_command<P>* fSplitSegInfo;
111 bool fSplittingSegments;
112 bool fHasSplitSegInfoV2;
113 std::vector<uint64_t> fSectionOffsetsInSegment;
117 template <typename A>
118 Rebaser<A>::Rebaser(const MachOLayoutAbstraction& layout)
119 : fLayout(layout), fLinkEditBase(0), fSymbolTable(NULL), fDynamicSymbolTable(NULL),
120 fDyldInfo(NULL), fSplitSegInfo(NULL), fSplittingSegments(false), fHasSplitSegInfoV2(false)
122 fHeader = (const macho_header<P>*)fLayout.getSegments()[0].mappedAddress();
123 switch ( fHeader->filetype() ) {
128 throw "file is not a dylib or bundle";
131 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
132 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
133 const MachOLayoutAbstraction::Segment& seg = *it;
134 if ( strcmp(seg.name(), "__LINKEDIT") == 0 ) {
135 fLinkEditBase = (uint8_t*)seg.mappedAddress() - seg.fileOffset();
139 if ( fLinkEditBase == NULL )
140 throw "no __LINKEDIT segment";
142 // get symbol table info
143 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
144 const uint32_t cmd_count = fHeader->ncmds();
145 const macho_load_command<P>* cmd = cmds;
146 for (uint32_t i = 0; i < cmd_count; ++i) {
147 switch (cmd->cmd()) {
149 fSymbolTable = (macho_symtab_command<P>*)cmd;
152 fDynamicSymbolTable = (macho_dysymtab_command<P>*)cmd;
155 case LC_DYLD_INFO_ONLY:
156 fDyldInfo = (macho_dyld_info_command<P>*)cmd;
158 case LC_SEGMENT_SPLIT_INFO:
159 fSplitSegInfo = (macho_linkedit_data_command<P>*)cmd;
161 case macho_segment_command<P>::CMD: {
162 // update segment/section file offsets
163 macho_segment_command<P>* segCmd = (macho_segment_command<P>*)cmd;
164 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
165 macho_section<P>* const sectionsEnd = §ionsStart[segCmd->nsects()];
166 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
167 fSectionOffsetsInSegment.push_back(sect->addr() - segCmd->vmaddr());
171 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
174 if ( fDyldInfo == NULL )
175 throw "no LC_DYLD_INFO load command";
177 fSplittingSegments = layout.hasSplitSegInfo() && this->unequalSlides();
179 if ( fSplitSegInfo != NULL ) {
180 const uint8_t* infoStart = &fLinkEditBase[fSplitSegInfo->dataoff()];
181 fHasSplitSegInfoV2 = ( *infoStart == DYLD_CACHE_ADJ_V2_FORMAT );
185 template <> cpu_type_t Rebaser<x86>::getArchitecture() const { return CPU_TYPE_I386; }
186 template <> cpu_type_t Rebaser<x86_64>::getArchitecture() const { return CPU_TYPE_X86_64; }
187 template <> cpu_type_t Rebaser<arm>::getArchitecture() const { return CPU_TYPE_ARM; }
188 template <> cpu_type_t Rebaser<arm64>::getArchitecture() const { return CPU_TYPE_ARM64; }
190 template <typename A>
191 bool Rebaser<A>::unequalSlides() const
193 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
194 uint64_t slide = segments[0].newAddress() - segments[0].address();
195 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
196 const MachOLayoutAbstraction::Segment& seg = *it;
197 if ( (seg.newAddress() - seg.address()) != slide )
203 template <typename A>
204 uint64_t Rebaser<A>::getBaseAddress() const
206 return fLayout.getSegments()[0].address();
209 template <typename A>
210 uint64_t Rebaser<A>::getVMSize() const
212 uint64_t highestVMAddress = 0;
213 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
214 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
215 const MachOLayoutAbstraction::Segment& seg = *it;
216 if ( seg.address() > highestVMAddress )
217 highestVMAddress = seg.address();
219 return (((highestVMAddress - getBaseAddress()) + 4095) & (-4096));
224 template <typename A>
225 bool Rebaser<A>::rebase(std::vector<void*>& pointersInData)
227 if ( fHasSplitSegInfoV2 ) {
228 this->adjustReferencesUsingInfoV2(pointersInData);
231 //fprintf(stderr, "warning: dylib with old split-seg info: %s\n", fLayout.getFilePath());
232 // update writable segments that have internal pointers
233 this->applyRebaseInfo(pointersInData);
235 // if splitting segments, update code-to-data references
239 // update load commands
240 this->adjustLoadCommands();
242 // update symbol table
243 this->adjustSymbolTable();
245 // update export info
246 return this->adjustExportInfo();
249 template <typename A>
250 void Rebaser<A>::adjustLoadCommands()
252 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
253 const uint32_t cmd_count = fHeader->ncmds();
254 const macho_load_command<P>* cmd = cmds;
255 for (uint32_t i = 0; i < cmd_count; ++i) {
256 switch ( cmd->cmd() ) {
258 if ( (fHeader->flags() & MH_PREBOUND) != 0 ) {
259 // clear timestamp so that any prebound clients are invalidated
260 macho_dylib_command<P>* dylib = (macho_dylib_command<P>*)cmd;
261 dylib->set_timestamp(1);
265 case LC_LOAD_WEAK_DYLIB:
266 case LC_REEXPORT_DYLIB:
267 case LC_LOAD_UPWARD_DYLIB:
268 if ( (fHeader->flags() & MH_PREBOUND) != 0 ) {
269 // clear expected timestamps so that this image will load with invalid prebinding
270 macho_dylib_command<P>* dylib = (macho_dylib_command<P>*)cmd;
271 dylib->set_timestamp(2);
274 case macho_routines_command<P>::CMD:
275 // update -init command
277 struct macho_routines_command<P>* routines = (struct macho_routines_command<P>*)cmd;
278 routines->set_init_address(routines->init_address() + this->getSlideForVMAddress(routines->init_address()));
281 case macho_segment_command<P>::CMD:
282 // update segment commands
284 macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
285 pint_t slide = this->getSlideForVMAddress(seg->vmaddr());
286 seg->set_vmaddr(seg->vmaddr() + slide);
287 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
288 macho_section<P>* const sectionsEnd = §ionsStart[seg->nsects()];
289 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
290 sect->set_addr(sect->addr() + slide);
295 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
300 uint64_t Rebaser<arm64>::maskedVMAddress(pint_t vmaddress)
302 return (vmaddress & 0x0FFFFFFFFFFFFFFF);
305 template <typename A>
306 typename A::P::uint_t Rebaser<A>::maskedVMAddress(pint_t vmaddress)
312 template <typename A>
313 typename A::P::uint_t Rebaser<A>::getSlideForVMAddress(pint_t vmaddress)
315 pint_t vmaddr = this->maskedVMAddress(vmaddress);
316 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
317 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
318 const MachOLayoutAbstraction::Segment& seg = *it;
319 if ( (seg.address() <= vmaddr) && (seg.size() != 0) && ((vmaddr < (seg.address()+seg.size())) || (seg.address() == vmaddr)) ) {
320 return seg.newAddress() - seg.address();
323 throwf("vm address 0x%08llX not found", (uint64_t)vmaddr);
327 template <typename A>
328 typename A::P::uint_t* Rebaser<A>::mappedAddressForVMAddress(pint_t vmaddress)
330 pint_t vmaddr = this->maskedVMAddress(vmaddress);
331 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
332 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
333 const MachOLayoutAbstraction::Segment& seg = *it;
334 if ( (seg.address() <= vmaddr) && (vmaddr < (seg.address()+seg.size())) ) {
335 return (pint_t*)((vmaddr - seg.address()) + (uint8_t*)seg.mappedAddress());
338 throwf("mappedAddressForVMAddress(0x%08llX) not found", (uint64_t)vmaddr);
341 template <typename A>
342 typename A::P::uint_t* Rebaser<A>::mappedAddressForNewAddress(pint_t vmaddress)
344 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
345 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
346 const MachOLayoutAbstraction::Segment& seg = *it;
347 if ( (seg.newAddress() <= vmaddress) && (vmaddress < (seg.newAddress()+seg.size())) ) {
348 return (pint_t*)((vmaddress - seg.newAddress()) + (uint8_t*)seg.mappedAddress());
351 throwf("mappedAddressForNewAddress(0x%08llX) not found", (uint64_t)vmaddress);
354 template <typename A>
355 typename A::P::uint_t Rebaser<A>::getSlideForNewAddress(pint_t newAddress)
357 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
358 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
359 const MachOLayoutAbstraction::Segment& seg = *it;
360 if ( (seg.newAddress() <= newAddress) && (newAddress < (seg.newAddress()+seg.size())) ) {
361 return seg.newAddress() - seg.address();
364 throwf("new address 0x%08llX not found", (uint64_t)newAddress);
367 template <typename A>
368 void Rebaser<A>::adjustSymbolTable()
370 macho_nlist<P>* symbolTable = (macho_nlist<P>*)(&fLinkEditBase[fSymbolTable->symoff()]);
372 // walk all exports and slide their n_value
373 macho_nlist<P>* lastExport = &symbolTable[fDynamicSymbolTable->iextdefsym()+fDynamicSymbolTable->nextdefsym()];
374 for (macho_nlist<P>* entry = &symbolTable[fDynamicSymbolTable->iextdefsym()]; entry < lastExport; ++entry) {
375 if ( (entry->n_type() & N_TYPE) == N_SECT )
376 entry->set_n_value(entry->n_value() + this->getSlideForVMAddress(entry->n_value()));
379 // walk all local symbols and slide their n_value (don't adjust any stabs)
380 macho_nlist<P>* lastLocal = &symbolTable[fDynamicSymbolTable->ilocalsym()+fDynamicSymbolTable->nlocalsym()];
381 for (macho_nlist<P>* entry = &symbolTable[fDynamicSymbolTable->ilocalsym()]; entry < lastLocal; ++entry) {
382 if ( (entry->n_sect() != NO_SECT) && ((entry->n_type() & N_STAB) == 0) )
383 entry->set_n_value(entry->n_value() + this->getSlideForVMAddress(entry->n_value()));
387 template <typename A>
388 bool Rebaser<A>::adjustExportInfo()
390 // if no export info, nothing to adjust
391 if ( fDyldInfo->export_size() == 0 )
394 // since export info addresses are offsets from mach_header, everything in __TEXT is fine
395 // only __DATA addresses need to be updated
396 const uint8_t* start = fLayout.getDyldInfoExports();
397 const uint8_t* end = &start[fDyldInfo->export_size()];
398 std::vector<mach_o::trie::Entry> originalExports;
400 parseTrie(start, end, originalExports);
402 catch (const char* msg) {
403 throwf("%s in %s", msg, fLayout.getFilePath());
406 std::vector<mach_o::trie::Entry> newExports;
407 newExports.reserve(originalExports.size());
408 pint_t baseAddress = this->getBaseAddress();
409 pint_t baseAddressSlide = this->getSlideForVMAddress(baseAddress);
410 for (std::vector<mach_o::trie::Entry>::iterator it=originalExports.begin(); it != originalExports.end(); ++it) {
411 // remove symbols used by the static linker only
412 if ( (strncmp(it->name, "$ld$", 4) == 0)
413 || (strncmp(it->name, ".objc_class_name",16) == 0)
414 || (strncmp(it->name, ".objc_category_name",19) == 0) ) {
415 //fprintf(stderr, "ignoring symbol %s\n", it->name);
418 // adjust symbols in slid segments
419 //uint32_t oldOffset = it->address;
420 it->address += (this->getSlideForVMAddress(it->address + baseAddress) - baseAddressSlide);
421 //fprintf(stderr, "orig=0x%08X, new=0x%08llX, sym=%s\n", oldOffset, it->address, it->name);
422 newExports.push_back(*it);
425 // rebuild export trie
426 std::vector<uint8_t> newExportTrieBytes;
427 newExportTrieBytes.reserve(fDyldInfo->export_size());
428 mach_o::trie::makeTrie(newExports, newExportTrieBytes);
430 while ( (newExportTrieBytes.size() % sizeof(pint_t)) != 0 )
431 newExportTrieBytes.push_back(0);
433 uint32_t newExportsSize = newExportTrieBytes.size();
434 if ( newExportsSize <= fDyldInfo->export_size() ) {
435 // override existing trie in place
436 uint8_t *realStart = &fLinkEditBase[fDyldInfo->export_off()];
437 bzero(realStart, fDyldInfo->export_size());
438 memcpy(realStart, &newExportTrieBytes[0], newExportsSize);
439 fLayout.setDyldInfoExports(realStart);
443 // allocate new buffer and set export_off in layout object to use new buffer instead
444 uint8_t* sideTrie = new uint8_t[newExportsSize];
445 memcpy(sideTrie, &newExportTrieBytes[0], newExportsSize);
446 fLayout.setDyldInfoExports(sideTrie);
447 ((macho_dyld_info_command<P>*)fDyldInfo)->set_export_off(0); // invalidate old trie
448 ((macho_dyld_info_command<P>*)fDyldInfo)->set_export_size(newExportsSize);
455 template <typename A>
456 void Rebaser<A>::doCodeUpdate(uint8_t kind, uint64_t address, int64_t codeToDataDelta, int64_t codeToImportDelta)
458 //fprintf(stderr, "doCodeUpdate(kind=%d, address=0x%0llX, dataDelta=0x%08llX, importDelta=0x%08llX, path=%s)\n",
459 // kind, address, codeToDataDelta, codeToImportDelta, fLayout.getFilePath());
461 uint32_t instruction;
465 case 1: // 32-bit pointer
466 p = (uint32_t*)mappedAddressForVMAddress(address);
467 value = A::P::E::get32(*p);
468 value += codeToDataDelta;
469 A::P::E::set32(*p, value);
471 case 2: // 64-bit pointer
472 p = (uint32_t*)mappedAddressForVMAddress(address);
473 value64 = A::P::E::get64(*(uint64_t*)p);
474 value64 += codeToDataDelta;
475 A::P::E::set64(*(uint64_t*)p, value64);
477 case 4: // only used for i386, a reference to something in the IMPORT segment
478 p = (uint32_t*)mappedAddressForVMAddress(address);
479 value = A::P::E::get32(*p);
480 value += codeToImportDelta;
481 A::P::E::set32(*p, value);
483 case 5: // used by thumb2 movw
484 p = (uint32_t*)mappedAddressForVMAddress(address);
485 instruction = A::P::E::get32(*p);
486 // codeToDataDelta is always a multiple of 4096, so only top 4 bits of lo16 will ever need adjusting
487 value = (instruction & 0x0000000F) + (codeToDataDelta >> 12);
488 instruction = (instruction & 0xFFFFFFF0) | (value & 0x0000000F);
489 A::P::E::set32(*p, instruction);
491 case 6: // used by ARM movw
492 p = (uint32_t*)mappedAddressForVMAddress(address);
493 instruction = A::P::E::get32(*p);
494 // codeToDataDelta is always a multiple of 4096, so only top 4 bits of lo16 will ever need adjusting
495 value = ((instruction & 0x000F0000) >> 16) + (codeToDataDelta >> 12);
496 instruction = (instruction & 0xFFF0FFFF) | ((value <<16) & 0x000F0000);
497 A::P::E::set32(*p, instruction);
515 // used by thumb2 movt (low nibble of kind is high 4-bits of paired movw)
517 p = (uint32_t*)mappedAddressForVMAddress(address);
518 instruction = A::P::E::get32(*p);
519 // extract 16-bit value from instruction
520 uint32_t i = ((instruction & 0x00000400) >> 10);
521 uint32_t imm4 = (instruction & 0x0000000F);
522 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
523 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
524 uint32_t imm16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
525 // combine with codeToDataDelta and kind nibble
526 uint32_t targetValue = (imm16 << 16) | ((kind & 0xF) << 12);
527 uint32_t newTargetValue = targetValue + codeToDataDelta;
528 // construct new bits slices
529 uint32_t imm4_ = (newTargetValue & 0xF0000000) >> 28;
530 uint32_t i_ = (newTargetValue & 0x08000000) >> 27;
531 uint32_t imm3_ = (newTargetValue & 0x07000000) >> 24;
532 uint32_t imm8_ = (newTargetValue & 0x00FF0000) >> 16;
533 // update instruction to match codeToDataDelta
534 uint32_t newInstruction = (instruction & 0x8F00FBF0) | imm4_ | (i_ << 10) | (imm3_ << 28) | (imm8_ << 16);
535 A::P::E::set32(*p, newInstruction);
554 // used by arm movt (low nibble of kind is high 4-bits of paired movw)
556 p = (uint32_t*)mappedAddressForVMAddress(address);
557 instruction = A::P::E::get32(*p);
558 // extract 16-bit value from instruction
559 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
560 uint32_t imm12 = (instruction & 0x00000FFF);
561 uint32_t imm16 = (imm4 << 12) | imm12;
562 // combine with codeToDataDelta and kind nibble
563 uint32_t targetValue = (imm16 << 16) | ((kind & 0xF) << 12);
564 uint32_t newTargetValue = targetValue + codeToDataDelta;
565 // construct new bits slices
566 uint32_t imm4_ = (newTargetValue & 0xF0000000) >> 28;
567 uint32_t imm12_ = (newTargetValue & 0x0FFF0000) >> 16;
568 // update instruction to match codeToDataDelta
569 uint32_t newInstruction = (instruction & 0xFFF0F000) | (imm4_ << 16) | imm12_;
570 A::P::E::set32(*p, newInstruction);
573 case 3: // used for arm64 ADRP
574 p = (uint32_t*)mappedAddressForVMAddress(address);
575 instruction = A::P::E::get32(*p);
576 if ( (instruction & 0x9F000000) == 0x90000000 ) {
577 // codeToDataDelta is always a multiple of 4096, so only top 4 bits of lo16 will ever need adjusting
578 value64 = ((instruction & 0x60000000) >> 17) | ((instruction & 0x00FFFFE0) << 9);
579 value64 += codeToDataDelta;
580 instruction = (instruction & 0x9F00001F) | ((value64 << 17) & 0x60000000) | ((value64 >> 9) & 0x00FFFFE0);
581 A::P::E::set32(*p, instruction);
585 throwf("invalid kind=%d in split seg info", kind);
589 template <typename A>
590 const uint8_t* Rebaser<A>::doCodeUpdateForEachULEB128Address(const uint8_t* p, uint8_t kind, uint64_t orgBaseAddress, int64_t codeToDataDelta, int64_t codeToImportDelta)
592 uint64_t address = 0;
598 delta |= ((byte & 0x7F) << shift);
603 doCodeUpdate(kind, address+orgBaseAddress, codeToDataDelta, codeToImportDelta);
615 template <typename A>
616 void Rebaser<A>::adjustCode()
618 if ( fSplittingSegments ) {
619 // get uleb128 compressed runs of code addresses to update
620 const uint8_t* infoStart = &fLinkEditBase[fSplitSegInfo->dataoff()];
621 const uint8_t* infoEnd = &infoStart[fSplitSegInfo->datasize()];;
622 // calculate how much we need to slide writable segments
623 const uint64_t orgBaseAddress = this->getBaseAddress();
624 int64_t codeToDataDelta = 0;
625 int64_t codeToImportDelta = 0;
626 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
627 const MachOLayoutAbstraction::Segment& codeSeg = segments[0];
628 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
629 const MachOLayoutAbstraction::Segment& dataSeg = *it;
630 if ( dataSeg.writable() ) {
631 if ( (strcmp(dataSeg.name(), "__DATA") != 0) && (strcmp(dataSeg.name(), "__OBJC") != 0) )
632 throwf("only one rw segment named '__DATA' can be used in dylibs placed in the dyld shared cache (%s)", fLayout.getFilePath());
633 codeToDataDelta = (dataSeg.newAddress() - codeSeg.newAddress()) - (dataSeg.address() - codeSeg.address());
636 // decompress and call doCodeUpdate() on each address
637 for(const uint8_t* p = infoStart; (*p != 0) && (p < infoEnd);) {
639 p = this->doCodeUpdateForEachULEB128Address(p, kind, orgBaseAddress, codeToDataDelta, codeToImportDelta);
644 template <typename A>
645 void Rebaser<A>::doRebase(int segIndex, uint64_t segOffset, uint8_t type, std::vector<void*>& pointersInData)
647 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
648 if ( segIndex > segments.size() )
649 throw "bad segment index in rebase info";
650 const MachOLayoutAbstraction::Segment& seg = segments[segIndex];
651 uint8_t* mappedAddr = (uint8_t*)seg.mappedAddress() + segOffset;
652 pint_t* mappedAddrP = (pint_t*)mappedAddr;
653 uint32_t* mappedAddr32 = (uint32_t*)mappedAddr;
660 case REBASE_TYPE_POINTER:
661 valueP= P::getP(*mappedAddrP);
663 P::setP(*mappedAddrP, valueP + this->getSlideForVMAddress(valueP));
665 catch (const char* msg) {
666 throwf("at offset=0x%08llX in seg=%s, pointer cannot be rebased because it does not point to __TEXT or __DATA. %s\n",
667 segOffset, seg.name(), msg);
671 case REBASE_TYPE_TEXT_ABSOLUTE32:
672 value32 = E::get32(*mappedAddr32);
673 E::set32(*mappedAddr32, value32 + this->getSlideForVMAddress(value32));
676 case REBASE_TYPE_TEXT_PCREL32:
677 svalue32 = E::get32(*mappedAddr32);
678 valueP = seg.address() + segOffset + 4 + svalue32;
679 valuePnew = valueP + this->getSlideForVMAddress(valueP);
680 svalue32new = seg.address() + segOffset + 4 - valuePnew;
681 E::set32(*mappedAddr32, svalue32new);
685 throw "bad rebase type";
687 pointersInData.push_back(mappedAddr);
691 template <typename A>
692 void Rebaser<A>::applyRebaseInfo(std::vector<void*>& pointersInData)
694 const uint8_t* p = &fLinkEditBase[fDyldInfo->rebase_off()];
695 const uint8_t* end = &p[fDyldInfo->rebase_size()];
699 uint64_t segOffset = 0;
703 while ( !done && (p < end) ) {
704 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
705 uint8_t opcode = *p & REBASE_OPCODE_MASK;
708 case REBASE_OPCODE_DONE:
711 case REBASE_OPCODE_SET_TYPE_IMM:
714 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
715 segIndex = immediate;
716 segOffset = read_uleb128(p, end);
718 case REBASE_OPCODE_ADD_ADDR_ULEB:
719 segOffset += read_uleb128(p, end);
721 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
722 segOffset += immediate*sizeof(pint_t);
724 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
725 for (int i=0; i < immediate; ++i) {
726 doRebase(segIndex, segOffset, type, pointersInData);
727 segOffset += sizeof(pint_t);
730 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
731 count = read_uleb128(p, end);
732 for (uint32_t i=0; i < count; ++i) {
733 doRebase(segIndex, segOffset, type, pointersInData);
734 segOffset += sizeof(pint_t);
737 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
738 doRebase(segIndex, segOffset, type, pointersInData);
739 segOffset += read_uleb128(p, end) + sizeof(pint_t);
741 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
742 count = read_uleb128(p, end);
743 skip = read_uleb128(p, end);
744 for (uint32_t i=0; i < count; ++i) {
745 doRebase(segIndex, segOffset, type, pointersInData);
746 segOffset += skip + sizeof(pint_t);
750 throwf("bad rebase opcode %d", *p);
756 void Rebaser<arm64>::adjustReference(uint32_t kind, uint8_t* mappedAddr, uint64_t fromNewAddress, uint64_t toNewAddress, int64_t adjust, int64_t targetSlide,
757 uint64_t imageStartAddress, uint64_t imageEndAddress, std::vector<void*>& pointersInData)
760 uint64_t* mappedAddr64;
762 uint32_t* mappedAddr32;
763 uint32_t instruction;
764 int64_t offsetAdjust;
766 case DYLD_CACHE_ADJ_V2_DELTA_32:
767 mappedAddr32 = (uint32_t*)mappedAddr;
768 value32 = arm64::P::E::get32(*mappedAddr32);
769 E::set32(*mappedAddr32, value32 + adjust);
771 case DYLD_CACHE_ADJ_V2_POINTER_64:
772 mappedAddr64 = (uint64_t*)mappedAddr;
773 if ( toNewAddress != (E::get64(*mappedAddr64) + targetSlide) )
774 throwf("bad DYLD_CACHE_ADJ_V2_POINTER_64 value not as expected at address 0x%llX\n", fromNewAddress);
775 E::set64(*mappedAddr64, toNewAddress);
776 pointersInData.push_back(mappedAddr);
778 case DYLD_CACHE_ADJ_V2_DELTA_64:
779 mappedAddr64 = (uint64_t*)mappedAddr;
780 value64 = arm64::P::E::get64(*mappedAddr64);
781 E::set64(*mappedAddr64, value64 + adjust);
783 case DYLD_CACHE_ADJ_V2_IMAGE_OFF_32:
784 mappedAddr32 = (uint32_t*)mappedAddr;
785 value64 = toNewAddress - imageStartAddress;
786 E::set32(*mappedAddr32, (uint32_t)value64);
788 case DYLD_CACHE_ADJ_V2_ARM64_ADRP:
789 mappedAddr32 = (uint32_t*)mappedAddr;
790 instruction = arm64::P::E::get32(*mappedAddr32);
791 if ( (instruction & 0x9F000000) == 0x90000000 ) {
792 //value64 = ((instruction & 0x60000000) >> 17) | ((instruction & 0x00FFFFE0) << 9);
793 uint32_t newPage21 = ((toNewAddress & ~0xFFF) - (fromNewAddress & ~0xFFF)) >> 12;
794 instruction = (instruction & 0x9F00001F) | ((newPage21 << 29) & 0x60000000) | ((newPage21 << 3) & 0x00FFFFE0);
795 arm64::P::E::set32(*mappedAddr32, instruction);
798 // ADRP instructions are sometimes optimized to other instructions (e.g. ADR) after the split-seg-info is generated
801 case DYLD_CACHE_ADJ_V2_ARM64_OFF12:
802 mappedAddr32 = (uint32_t*)mappedAddr;
803 instruction = arm64::P::E::get32(*mappedAddr32);
804 offsetAdjust = (adjust & 0xFFF);
805 if ( offsetAdjust == 0 )
807 if ( (instruction & 0x3B000000) == 0x39000000 ) {
809 if ( offsetAdjust != 0 ) {
810 uint32_t encodedAddend = ((instruction & 0x003FFC00) >> 10);
812 switch ( instruction & 0xC0000000 ) {
814 if ( (instruction & 0x04800000) == 0x04800000 ) {
815 if ( offsetAdjust & 0xF )
816 throwf("can't adjust off12 scale=16 instruction by %lld bytes at mapped address=%p", offsetAdjust, mappedAddr);
817 if ( encodedAddend*16 >= 4096 )
818 throwf("off12 scale=16 instruction points outside its page at mapped address=%p", mappedAddr);
819 newAddend = (encodedAddend + offsetAdjust/16) % 256;
823 newAddend = encodedAddend + offsetAdjust;
827 if ( offsetAdjust & 1 )
828 throwf("can't adjust off12 scale=2 instruction by %lld bytes at mapped address=%p", offsetAdjust, mappedAddr);
829 if ( encodedAddend*2 >= 4096 )
830 throwf("off12 scale=2 instruction points outside its page at mapped address=%p", mappedAddr);
831 newAddend = (encodedAddend + offsetAdjust/2) % 2048;
834 if ( offsetAdjust & 3 )
835 throwf("can't adjust off12 scale=4 instruction by %lld bytes at mapped address=%p", offsetAdjust, mappedAddr);
836 if ( encodedAddend*4 >= 4096 )
837 throwf("off12 scale=4 instruction points outside its page at mapped address=%p", mappedAddr);
838 newAddend = (encodedAddend + offsetAdjust/4) % 1024;
841 if ( offsetAdjust & 7 )
842 throwf("can't adjust off12 scale=8 instruction by %lld bytes at mapped address=%p", offsetAdjust, mappedAddr);
843 if ( encodedAddend*8 >= 4096 )
844 throwf("off12 scale=8 instruction points outside its page at mapped address=%p", mappedAddr);
845 newAddend = (encodedAddend + offsetAdjust/8) % 512;
848 uint32_t newInstruction = (instruction & 0xFFC003FF) | (newAddend << 10);
849 arm64::P::E::set32(*mappedAddr32, newInstruction);
852 else if ( (instruction & 0xFFC00000) == 0x91000000 ) {
854 if ( instruction & 0x00C00000 )
855 throwf("ADD off12 uses shift at mapped address=%p", mappedAddr);
856 uint32_t encodedAddend = ((instruction & 0x003FFC00) >> 10);
857 uint32_t newAddend = (encodedAddend + offsetAdjust) & 0xFFF;
858 uint32_t newInstruction = (instruction & 0xFFC003FF) | (newAddend << 10);
859 arm64::P::E::set32(*mappedAddr32, newInstruction);
861 else if ( instruction != 0xD503201F ) {
862 // ignore imm12 instructions optimized into a NOP, but warn about others
863 fprintf(stderr, "unknown off12 instruction 0x%08X at 0x%0llX in %s\n", instruction, fromNewAddress, fLayout.getFilePath());
866 case DYLD_CACHE_ADJ_V2_ARM64_BR26:
867 // nothing to do with calls to stubs
870 throwf("unknown split seg kind=%d", kind);
874 static bool isThumbMovw(uint32_t instruction)
876 return ( (instruction & 0x8000FBF0) == 0x0000F240 );
879 static bool isThumbMovt(uint32_t instruction)
881 return ( (instruction & 0x8000FBF0) == 0x0000F2C0 );
884 static uint16_t getThumbWord(uint32_t instruction)
886 uint32_t i = ((instruction & 0x00000400) >> 10);
887 uint32_t imm4 = (instruction & 0x0000000F);
888 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
889 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
890 return ((imm4 << 12) | (i << 11) | (imm3 << 8) | imm8);
893 static uint32_t setThumbWord(uint32_t instruction, uint16_t word) {
894 uint32_t imm4 = (word & 0xF000) >> 12;
895 uint32_t i = (word & 0x0800) >> 11;
896 uint32_t imm3 = (word & 0x0700) >> 8;
897 uint32_t imm8 = word & 0x00FF;
898 return (instruction & 0x8F00FBF0) | imm4 | (i << 10) | (imm3 << 28) | (imm8 << 16);
901 static bool isArmMovw(uint32_t instruction)
903 return (instruction & 0x0FF00000) == 0x03000000;
906 static bool isArmMovt(uint32_t instruction)
908 return (instruction & 0x0FF00000) == 0x03400000;
911 static uint16_t getArmWord(uint32_t instruction)
913 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
914 uint32_t imm12 = (instruction & 0x00000FFF);
915 return (imm4 << 12) | imm12;
918 static uint32_t setArmWord(uint32_t instruction, uint16_t word) {
919 uint32_t imm4 = (word & 0xF000) >> 12;
920 uint32_t imm12 = word & 0x0FFF;
921 return (instruction & 0xFFF0F000) | (imm4 << 16) | imm12;
926 void Rebaser<arm>::adjustReference(uint32_t kind, uint8_t* mappedAddr, uint64_t fromNewAddress, uint64_t toNewAddress, int64_t adjust, int64_t targetSlide,
927 uint64_t imageStartAddress, uint64_t imageEndAddress, std::vector<void*>& pointersInData)
930 uint32_t* mappedAddr32 = (uint32_t*)mappedAddr;
931 static uint32_t* lastMappedAddr32 = NULL;
932 static uint32_t lastKind = 0;
933 static uint32_t lastToNewAddress = 0;
935 case DYLD_CACHE_ADJ_V2_DELTA_32:
936 value32 = arm64::P::E::get32(*mappedAddr32);
937 E::set32(*mappedAddr32, value32 + adjust);
939 case DYLD_CACHE_ADJ_V2_POINTER_32:
940 if ( toNewAddress != (E::get32(*mappedAddr32) + targetSlide) )
941 throwf("bad DYLD_CACHE_ADJ_V2_POINTER_32 value not as expected at address 0x%llX\n", fromNewAddress);
942 E::set32(*mappedAddr32, toNewAddress);
943 pointersInData.push_back( mappedAddr);
945 case DYLD_CACHE_ADJ_V2_IMAGE_OFF_32:
948 mappedAddr32 = (uint32_t*)mappedAddr;
949 value32 = (uint32_t)(toNewAddress - imageStartAddress);
950 E::set32(*mappedAddr32, value32);
952 case DYLD_CACHE_ADJ_V2_THUMB_MOVW_MOVT:
953 // to update a movw/movt pair we need to extract the 32-bit they will make,
954 // add the adjust and write back the new movw/movt pair.
955 if ( lastKind == kind ) {
956 if ( lastToNewAddress == toNewAddress ) {
957 uint32_t instruction1 = E::get32(*lastMappedAddr32);
958 uint32_t instruction2 = E::get32(*mappedAddr32);
959 if ( isThumbMovw(instruction1) && isThumbMovt(instruction2) ) {
960 uint16_t high = getThumbWord(instruction2);
961 uint16_t low = getThumbWord(instruction1);
962 uint32_t full = high << 16 | low;
964 instruction1 = setThumbWord(instruction1, full & 0xFFFF);
965 instruction2 = setThumbWord(instruction2, full >> 16);
967 else if ( isThumbMovt(instruction1) && isThumbMovw(instruction2) ) {
968 uint16_t high = getThumbWord(instruction1);
969 uint16_t low = getThumbWord(instruction2);
970 uint32_t full = high << 16 | low;
972 instruction2 = setThumbWord(instruction2, full & 0xFFFF);
973 instruction1 = setThumbWord(instruction1, full >> 16);
976 throw "two DYLD_CACHE_ADJ_V2_THUMB_MOVW_MOVT in a row but not";
978 E::set32(*lastMappedAddr32, instruction1);
979 E::set32(*mappedAddr32, instruction2);
983 throw "two DYLD_CACHE_ADJ_V2_THUMB_MOVW_MOVT in a row but target different addresses";
987 case DYLD_CACHE_ADJ_V2_ARM_MOVW_MOVT:
988 // to update a movw/movt pair we need to extract the 32-bit they will make,
989 // add the adjust and write back the new movw/movt pair.
990 if ( lastKind == kind ) {
991 if ( lastToNewAddress == toNewAddress ) {
992 uint32_t instruction1 = E::get32(*lastMappedAddr32);
993 uint32_t instruction2 = E::get32(*mappedAddr32);
994 if ( isArmMovw(instruction1) && isArmMovt(instruction2) ) {
995 uint16_t high = getArmWord(instruction2);
996 uint16_t low = getArmWord(instruction1);
997 uint32_t full = high << 16 | low;
999 instruction1 = setArmWord(instruction1, full & 0xFFFF);
1000 instruction2 = setArmWord(instruction2, full >> 16);
1002 else if ( isArmMovt(instruction1) && isArmMovw(instruction2) ) {
1003 uint16_t high = getArmWord(instruction1);
1004 uint16_t low = getArmWord(instruction2);
1005 uint32_t full = high << 16 | low;
1007 instruction2 = setArmWord(instruction2, full & 0xFFFF);
1008 instruction1 = setArmWord(instruction1, full >> 16);
1011 throw "two DYLD_CACHE_ADJ_V2_ARM_MOVW_MOVT in a row but not";
1013 E::set32(*lastMappedAddr32, instruction1);
1014 E::set32(*mappedAddr32, instruction2);
1018 throw "two DYLD_CACHE_ADJ_V2_ARM_MOVW_MOVT in a row but target different addresses";
1022 case DYLD_CACHE_ADJ_V2_ARM_BR24:
1023 case DYLD_CACHE_ADJ_V2_THUMB_BR22:
1024 // nothing to do with calls to stubs
1027 throwf("v2 split seg info kind (%d) not supported yet", kind);
1030 lastToNewAddress = toNewAddress;
1031 lastMappedAddr32 = mappedAddr32;
1035 template <typename A>
1036 void Rebaser<A>::adjustReference(uint32_t kind, uint8_t* mappedAddr, uint64_t fromNewAddress, uint64_t toNewAddress, int64_t adjust, int64_t targetSlide,
1037 uint64_t imageStartAddress, uint64_t imageEndAddress, std::vector<void*>& pointersInData)
1039 throw "v2 split seg info not supported yet";
1043 template <typename A>
1044 void Rebaser<A>::adjustReferencesUsingInfoV2(std::vector<void*>& pointersInData)
1046 static const bool log = false;
1048 const uint8_t* infoStart = &fLinkEditBase[fSplitSegInfo->dataoff()];
1049 const uint8_t* infoEnd = &infoStart[fSplitSegInfo->datasize()];;
1050 if ( *infoStart++ != DYLD_CACHE_ADJ_V2_FORMAT )
1051 throw "malformed split seg info";
1053 // build section arrays of slide and mapped address for each section
1054 std::vector<uint64_t> sectionSlides;
1055 std::vector<uint64_t> sectionNewAddress;
1056 std::vector<uint8_t*> sectionMappedAddress;
1057 sectionSlides.reserve(16);
1058 sectionNewAddress.reserve(16);
1059 sectionMappedAddress.reserve(16);
1060 // section index 0 refers to mach_header
1061 const MachOLayoutAbstraction::Segment& textSeg = fLayout.getSegments().front();
1062 sectionMappedAddress.push_back((uint8_t*)textSeg.mappedAddress());
1063 sectionSlides.push_back(textSeg.newAddress() - textSeg.address());
1064 sectionNewAddress.push_back(textSeg.newAddress());
1065 // section 1 and later refer to real sections
1066 unsigned sectionIndex = 0;
1067 for (const MachOLayoutAbstraction::Segment& seg : fLayout.getSegments()) {
1068 uint64_t segSlide = seg.newAddress() - seg.address();
1069 for (uint32_t i=0; i < seg.sectionCount(); ++i) {
1070 if (log) fprintf(stderr, "seg=%s, sectIndex=%d, mapped at=%p, offsetInSeg=0x%08llX\n", seg.name(), sectionIndex, seg.mappedAddress(), fSectionOffsetsInSegment[sectionIndex]);
1071 sectionMappedAddress.push_back((uint8_t*)seg.mappedAddress() + fSectionOffsetsInSegment[sectionIndex]);
1072 sectionSlides.push_back(segSlide);
1073 sectionNewAddress.push_back(seg.newAddress() + fSectionOffsetsInSegment[sectionIndex]);
1078 // Whole :== <count> FromToSection+
1079 // FromToSection :== <from-sect-index> <to-sect-index> <count> ToOffset+
1080 // ToOffset :== <to-sect-offset-delta> <count> FromOffset+
1081 // FromOffset :== <kind> <count> <from-sect-offset-delta>
1082 const uint8_t* p = infoStart;
1083 uint64_t sectionCount = read_uleb128(p, infoEnd);
1084 for (uint64_t i=0; i < sectionCount; ++i) {
1085 uint64_t fromSectionIndex = read_uleb128(p, infoEnd);
1086 uint64_t toSectionIndex = read_uleb128(p, infoEnd);
1087 uint64_t toOffsetCount = read_uleb128(p, infoEnd);
1088 uint64_t fromSectionSlide = sectionSlides[fromSectionIndex];
1089 uint64_t fromSectionNewAddress = sectionNewAddress[fromSectionIndex];
1090 uint8_t* fromSectionMappedAddress = sectionMappedAddress[fromSectionIndex];
1091 uint64_t toSectionSlide = sectionSlides[toSectionIndex];
1092 uint64_t toSectionNewAddress = sectionNewAddress[toSectionIndex];
1093 if (log) printf("from sect=%lld (mapped=%p), to sect=%lld (new addr=0x%llX):\n", fromSectionIndex, fromSectionMappedAddress, toSectionIndex, toSectionNewAddress);
1094 uint64_t toSectionOffset = 0;
1095 for (uint64_t j=0; j < toOffsetCount; ++j) {
1096 uint64_t toSectionDelta = read_uleb128(p, infoEnd);
1097 uint64_t fromOffsetCount = read_uleb128(p, infoEnd);
1098 toSectionOffset += toSectionDelta;
1099 for (uint64_t k=0; k < fromOffsetCount; ++k) {
1100 uint64_t kind = read_uleb128(p, infoEnd);
1101 uint64_t fromSectDeltaCount = read_uleb128(p, infoEnd);
1102 uint64_t fromSectionOffset = 0;
1103 for (uint64_t l=0; l < fromSectDeltaCount; ++l) {
1104 uint64_t delta = read_uleb128(p, infoEnd);
1105 fromSectionOffset += delta;
1106 int64_t deltaAdjust = toSectionSlide - fromSectionSlide;
1107 if (log) printf(" kind=%lld, from offset=0x%0llX, to offset=0x%0llX, adjust=0x%llX, targetSlide=0x%llX\n", kind, fromSectionOffset, toSectionOffset, deltaAdjust, toSectionSlide);
1108 uint8_t* fromMappedAddr = fromSectionMappedAddress + fromSectionOffset;
1109 uint64_t toNewAddress = toSectionNewAddress + toSectionOffset;
1110 uint64_t fromNewAddress = fromSectionNewAddress + fromSectionOffset;
1111 adjustReference(kind, fromMappedAddr, fromNewAddress, toNewAddress, deltaAdjust, toSectionSlide, sectionNewAddress.front(), sectionNewAddress.back(), pointersInData);
1120 #endif // __MACHO_REBASER__