1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*-
3 * Copyright (c) 2014 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
27 #include <sys/errno.h>
28 #include <sys/fcntl.h>
29 #include <mach-o/loader.h>
30 #include <mach-o/fat.h>
36 #include <unordered_map>
37 #include <unordered_set>
39 #include "MachOFileAbstraction.hpp"
41 #include "DyldSharedCache.h"
42 #include "CacheBuilder.h"
43 #include "MachOLoaded.h"
45 #define ALIGN_AS_TYPE(value, type) \
46 ((value + alignof(type) - 1) & (-alignof(type)))
51 class SortedStringPool
54 // add a string and symbol table entry index to be updated later
55 void add(uint32_t symbolIndex
, const char* symbolName
) {
56 _map
[symbolName
].push_back({ symbolIndex
, false });
59 // add a string and symbol table entry index to be updated later
60 void addIndirect(uint32_t symbolIndex
, const char* symbolName
) {
61 _map
[symbolName
].push_back({ symbolIndex
, true });
64 // copy sorted strings to buffer and update all symbol's string offsets
65 uint32_t copyPoolAndUpdateOffsets(char* dstStringPool
, macho_nlist
<P
>* symbolTable
) {
66 // walk sorted list of strings
67 dstStringPool
[0] = '\0'; // tradition for start of pool to be empty string
68 uint32_t poolOffset
= 1;
69 for (auto& entry
: _map
) {
70 const std::string
& symName
= entry
.first
;
71 // append string to pool
72 strcpy(&dstStringPool
[poolOffset
], symName
.c_str());
73 // set each string offset of each symbol using it
74 for (std::pair
<uint32_t, bool> symbolIndexAndIndirect
: entry
.second
) {
75 if ( symbolIndexAndIndirect
.second
) {
77 symbolTable
[symbolIndexAndIndirect
.first
].set_n_value(poolOffset
);
79 symbolTable
[symbolIndexAndIndirect
.first
].set_n_strx(poolOffset
);
82 poolOffset
+= symName
.size() + 1;
84 // return size of pool
90 for (auto& entry
: _map
) {
91 size
+= (entry
.first
.size() + 1);
98 std::map
<std::string
, std::vector
<std::pair
<uint32_t, bool>>> _map
;
102 } // anonymous namespace
105 struct LocalSymbolInfo
107 uint32_t dylibOffset
;
108 uint32_t nlistStartIndex
;
113 template <typename P
>
114 class LinkeditOptimizer
{
116 LinkeditOptimizer(const void* containerBuffer
, macho_header
<P
>* mh
, const char* dylibID
,
119 uint32_t linkeditSize() { return _linkeditSize
; }
120 uint64_t linkeditAddr() { return _linkeditAddr
; }
121 const char* dylibID() { return _dylibID
; }
122 void copyWeakBindingInfo(uint8_t* newLinkEditContent
, uint32_t& offset
);
123 void copyLazyBindingInfo(uint8_t* newLinkEditContent
, uint32_t& offset
);
124 void copyBindingInfo(uint8_t* newLinkEditContent
, uint32_t& offset
);
125 void copyExportInfo(uint8_t* newLinkEditContent
, uint32_t& offset
);
126 void copyExportedSymbols(uint8_t* newLinkEditContent
, SortedStringPool
<P
>& stringPool
, uint32_t& offset
, uint32_t& symbolIndex
);
127 void copyImportedSymbols(uint8_t* newLinkEditContent
, SortedStringPool
<P
>& stringPool
, uint32_t& offset
, uint32_t& symbolIndex
);
128 void copyLocalSymbols(uint8_t* newLinkEditContent
, SortedStringPool
<P
>& stringPool
, uint32_t& offset
, uint32_t& symbolIndex
,
129 bool redact
, std::vector
<LocalSymbolInfo
>& localSymbolInfos
,
130 std::vector
<macho_nlist
<P
>>& unmappedLocalSymbols
, SortedStringPool
<P
>& localSymbolsStringPool
);
131 void copyFunctionStarts(uint8_t* newLinkEditContent
, uint32_t& offset
);
132 void copyDataInCode(uint8_t* newLinkEditContent
, uint32_t& offset
);
133 void copyIndirectSymbolTable(uint8_t* newLinkEditContent
, uint32_t& offset
);
134 void updateLoadCommands(uint32_t linkeditStartOffset
, uint64_t mergedLinkeditAddr
, uint64_t newLinkeditSize
,
135 uint32_t sharedSymbolTableStartOffset
, uint32_t sharedSymbolTableCount
,
136 uint32_t sharedSymbolStringsOffset
, uint32_t sharedSymbolStringsSize
);
138 typedef CacheBuilder::DylibStripMode DylibStripMode
;
139 void setStripMode(DylibStripMode stripMode
);
141 macho_header
<P
>* machHeader() { return _mh
; }
142 const std::vector
<const char*> getDownwardDependents() { return _downDependentPaths
; }
143 const std::vector
<const char*> getAllDependents() { return _allDependentPaths
; }
144 const std::vector
<const char*> getReExportPaths() { return _reExportPaths
; }
145 const std::vector
<uint64_t> initializerAddresses() { return _initializerAddresses
; }
146 const std::vector
<macho_section
<P
>*> dofSections() { return _dofSections
; }
147 uint32_t exportsTrieLinkEditOffset() { return _newExportInfoOffset
; }
148 uint32_t exportsTrieLinkEditSize() { return _exportInfoSize
; }
149 uint32_t weakBindingLinkEditOffset() { return _newWeakBindingInfoOffset
; }
150 uint32_t weakBindingLinkEditSize() { return _newWeakBindingSize
; }
151 uint64_t dyldSectionAddress() { return _dyldSectionAddr
; }
152 const std::vector
<macho_segment_command
<P
>*>& segCmds() { return _segCmds
; }
155 static void optimizeLinkedit(CacheBuilder
& builder
, const void* containerBuffer
,
156 CacheBuilder::UnmappedRegion
* localSymbolsRegion
,
157 const std::vector
<std::tuple
<const mach_header
*, const char*, DylibStripMode
>>& images
);
158 static void mergeLinkedits(CacheBuilder
& builder
, CacheBuilder::UnmappedRegion
* localSymbolsRegion
,
159 std::vector
<LinkeditOptimizer
<P
>*>& optimizers
);
163 typedef typename
P::uint_t pint_t
;
164 typedef typename
P::E E
;
166 macho_header
<P
>* _mh
;
167 const void* _containerBuffer
;
168 Diagnostics
& _diagnostics
;
169 uint32_t _linkeditSize
= 0;
170 uint64_t _linkeditAddr
= 0;
171 const uint8_t* _linkeditBias
= nullptr;
172 const char* _dylibID
= nullptr;
173 macho_symtab_command
<P
>* _symTabCmd
= nullptr;
174 macho_dysymtab_command
<P
>* _dynSymTabCmd
= nullptr;
175 macho_dyld_info_command
<P
>* _dyldInfo
= nullptr;
176 macho_linkedit_data_command
<P
>* _exportTrieCmd
= nullptr;
177 macho_linkedit_data_command
<P
>* _functionStartsCmd
= nullptr;
178 macho_linkedit_data_command
<P
>* _dataInCodeCmd
= nullptr;
179 std::vector
<macho_segment_command
<P
>*> _segCmds
;
180 std::unordered_map
<uint32_t,uint32_t> _oldToNewSymbolIndexes
;
181 std::vector
<const char*> _reExportPaths
;
182 std::vector
<const char*> _downDependentPaths
;
183 std::vector
<const char*> _allDependentPaths
;
184 std::vector
<uint64_t> _initializerAddresses
;
185 std::vector
<macho_section
<P
>*> _dofSections
;
186 uint32_t _newWeakBindingInfoOffset
= 0;
187 uint32_t _newLazyBindingInfoOffset
= 0;
188 uint32_t _newBindingInfoOffset
= 0;
189 uint32_t _newExportInfoOffset
= 0;
190 uint32_t _exportInfoSize
= 0;
191 uint32_t _newWeakBindingSize
= 0;
192 uint32_t _newExportedSymbolsStartIndex
= 0;
193 uint32_t _newExportedSymbolCount
= 0;
194 uint32_t _newImportedSymbolsStartIndex
= 0;
195 uint32_t _newImportedSymbolCount
= 0;
196 uint32_t _newLocalSymbolsStartIndex
= 0;
197 uint32_t _newLocalSymbolCount
= 0;
198 uint32_t _newFunctionStartsOffset
= 0;
199 uint32_t _newDataInCodeOffset
= 0;
200 uint32_t _newIndirectSymbolTableOffset
= 0;
201 uint64_t _dyldSectionAddr
= 0;
202 DylibStripMode _stripMode
= DylibStripMode::stripAll
;
206 template <typename P
>
207 LinkeditOptimizer
<P
>::LinkeditOptimizer(const void* containerBuffer
, macho_header
<P
>* mh
,
208 const char* dylibID
, Diagnostics
& diag
)
209 : _mh(mh
), _dylibID(dylibID
), _containerBuffer(containerBuffer
), _diagnostics(diag
)
211 const unsigned origLoadCommandsSize
= mh
->sizeofcmds();
212 unsigned bytesRemaining
= origLoadCommandsSize
;
213 unsigned removedCount
= 0;
214 uint64_t textSegAddr
= 0;
216 const macho_load_command
<P
>* const cmds
= (macho_load_command
<P
>*)((uint8_t*)mh
+ sizeof(macho_header
<P
>));
217 const uint32_t cmdCount
= mh
->ncmds();
218 const macho_load_command
<P
>* cmd
= cmds
;
219 const macho_dylib_command
<P
>* dylibCmd
;
220 const macho_routines_command
<P
>* routinesCmd
;
221 macho_segment_command
<P
>* segCmd
;
222 for (uint32_t i
= 0; i
< cmdCount
; ++i
) {
224 switch (cmd
->cmd()) {
226 _symTabCmd
= (macho_symtab_command
<P
>*)cmd
;
229 _dynSymTabCmd
= (macho_dysymtab_command
<P
>*)cmd
;
232 case LC_DYLD_INFO_ONLY
:
233 _dyldInfo
= (macho_dyld_info_command
<P
>*)cmd
;
234 _exportInfoSize
= _dyldInfo
->export_size();
236 case LC_FUNCTION_STARTS
:
237 _functionStartsCmd
= (macho_linkedit_data_command
<P
>*)cmd
;
239 case LC_DATA_IN_CODE
:
240 _dataInCodeCmd
= (macho_linkedit_data_command
<P
>*)cmd
;
242 case LC_DYLD_EXPORTS_TRIE
:
243 _exportTrieCmd
= (macho_linkedit_data_command
<P
>*)cmd
;
244 _exportInfoSize
= _exportTrieCmd
->datasize();
248 routinesCmd
= (macho_routines_command
<P
>*)cmd
;
249 _initializerAddresses
.push_back(routinesCmd
->init_address());
251 case LC_REEXPORT_DYLIB
:
253 case LC_LOAD_WEAK_DYLIB
:
254 case LC_LOAD_UPWARD_DYLIB
:
255 dylibCmd
= (macho_dylib_command
<P
>*)cmd
;
256 _allDependentPaths
.push_back(dylibCmd
->name());
257 if ( cmd
->cmd() != LC_LOAD_UPWARD_DYLIB
)
258 _downDependentPaths
.push_back(dylibCmd
->name());
259 if ( cmd
->cmd() == LC_REEXPORT_DYLIB
)
260 _reExportPaths
.push_back(dylibCmd
->name());
262 case macho_segment_command
<P
>::CMD
:
263 segCmd
= (macho_segment_command
<P
>*)cmd
;
264 _segCmds
.push_back(segCmd
);
265 if ( strcmp(segCmd
->segname(), "__TEXT") == 0 ) {
266 textSegAddr
= segCmd
->vmaddr();
267 slide
= (uint64_t)mh
- textSegAddr
;
269 else if ( strcmp(segCmd
->segname(), "__LINKEDIT") == 0 ) {
270 _linkeditAddr
= segCmd
->vmaddr();
271 _linkeditBias
= (uint8_t*)mh
+ (_linkeditAddr
- textSegAddr
) - segCmd
->fileoff();
272 _linkeditSize
= (uint32_t)segCmd
->vmsize();
274 else if ( segCmd
->nsects() > 0 ) {
275 macho_section
<P
>* const sectionsStart
= (macho_section
<P
>*)((uint8_t*)segCmd
+ sizeof(macho_segment_command
<P
>));
276 macho_section
<P
>* const sectionsEnd
= §ionsStart
[segCmd
->nsects()];
277 for (macho_section
<P
>* sect
=sectionsStart
; sect
< sectionsEnd
; ++sect
) {
278 const uint8_t type
= sect
->flags() & SECTION_TYPE
;
279 if ( type
== S_MOD_INIT_FUNC_POINTERS
) {
280 const pint_t
* inits
= (pint_t
*)(sect
->addr()+slide
);
281 const size_t count
= sect
->size() / sizeof(pint_t
);
282 for (size_t j
=0; j
< count
; ++j
) {
283 uint64_t func
= P::getP(inits
[j
]);
284 _initializerAddresses
.push_back(func
);
287 else if ( type
== S_INIT_FUNC_OFFSETS
) {
288 const uint32_t* inits
= (uint32_t*)(sect
->addr()+slide
);
289 const size_t count
= sect
->size() / sizeof(uint32_t);
290 for (size_t j
=0; j
< count
; ++j
) {
291 uint32_t funcOffset
= E::get32(inits
[j
]);
292 _initializerAddresses
.push_back(textSegAddr
+ funcOffset
);
295 else if ( type
== S_DTRACE_DOF
) {
296 _dofSections
.push_back(sect
);
298 else if ( (strcmp(sect
->sectname(), "__dyld") == 0) && (strncmp(sect
->segname(), "__DATA", 6) == 0) ) {
299 _dyldSectionAddr
= sect
->addr();
304 case LC_DYLD_CHAINED_FIXUPS
:
305 case LC_SEGMENT_SPLIT_INFO
:
309 uint32_t cmdSize
= cmd
->cmdsize();
310 macho_load_command
<P
>* nextCmd
= (macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmdSize
);
312 ::memmove((void*)cmd
, (void*)nextCmd
, bytesRemaining
);
316 bytesRemaining
-= cmdSize
;
320 // zero out stuff removed
321 ::bzero((void*)cmd
, bytesRemaining
);
323 mh
->set_ncmds(cmdCount
- removedCount
);
324 mh
->set_sizeofcmds(origLoadCommandsSize
- bytesRemaining
);
327 template <typename P
>
328 void LinkeditOptimizer
<P
>::setStripMode(DylibStripMode stripMode
) {
329 _stripMode
= stripMode
;
333 static void dumpLoadCommands(const uint8_t* mheader)
335 const mach_header* const mh = (mach_header*)mheader;
336 const uint32_t cmd_count = mh->ncmds;
337 bool is64 = (mh->magic == MH_MAGIC_64);
338 const load_command* cmds = (load_command*)(mheader + (is64 ? sizeof(mach_header_64) : sizeof(mach_header)));
339 const load_command* cmd = cmds;
340 const segment_command* segCmd;
341 const segment_command_64* seg64Cmd;
342 const symtab_command* symTab;
343 const linkedit_data_command* leData;
344 const uint8_t* linkEditBias = NULL;
345 for (uint32_t i = 0; i < cmd_count; ++i) {
348 segCmd = (const segment_command*)cmd;
349 printf("LC_SEGMENT\n");
350 printf(" segname = %s\n", segCmd->segname);
351 printf(" vmaddr = 0x%08X\n", segCmd->vmaddr);
352 printf(" vmsize = 0x%08X\n", segCmd->vmsize);
353 printf(" fileoff = 0x%08X\n", segCmd->fileoff);
354 printf(" filesize = 0x%08X\n", segCmd->filesize);
355 if ( strcmp(segCmd->segname, "__TEXT") == 0 ) {
356 linkEditBias = mheader - segCmd->fileoff;
360 seg64Cmd = (const segment_command_64*)cmd;
361 printf("LC_SEGMENT_64\n");
362 printf(" segname = %s\n", seg64Cmd->segname);
363 printf(" vmaddr = 0x%09llX\n", seg64Cmd->vmaddr);
364 printf(" vmsize = 0x%09llX\n", seg64Cmd->vmsize);
365 printf(" fileoff = 0x%09llX\n", seg64Cmd->fileoff);
366 printf(" filesize = 0x%09llX\n", seg64Cmd->filesize);
367 if ( strcmp(seg64Cmd->segname, "__TEXT") == 0 ) {
368 linkEditBias = mheader - seg64Cmd->fileoff;
372 symTab = (const symtab_command*)cmd;
373 printf("LC_SYMTAB\n");
374 printf(" symoff = 0x%08X\n", symTab->symoff);
375 printf(" nsyms = 0x%08X\n", symTab->nsyms);
376 printf(" stroff = 0x%08X\n", symTab->stroff);
377 printf(" strsize = 0x%08X\n", symTab->strsize);
379 const char* strPool = (char*)&linkEditBias[symTab->stroff];
380 const nlist_64* sym0 = (nlist_64*)(&linkEditBias[symTab->symoff]);
381 printf(" sym[0].n_strx = 0x%08X (%s)\n", sym0->n_un.n_strx, &strPool[sym0->n_un.n_strx]);
382 printf(" sym[0].n_type = 0x%02X\n", sym0->n_type);
383 printf(" sym[0].n_sect = 0x%02X\n", sym0->n_sect);
384 printf(" sym[0].n_desc = 0x%04X\n", sym0->n_desc);
385 printf(" sym[0].n_value = 0x%llX\n", sym0->n_value);
386 const nlist_64* sym1 = (nlist_64*)(&linkEditBias[symTab->symoff+16]);
387 printf(" sym[1].n_strx = 0x%08X (%s)\n", sym1->n_un.n_strx, &strPool[sym1->n_un.n_strx]);
388 printf(" sym[1].n_type = 0x%02X\n", sym1->n_type);
389 printf(" sym[1].n_sect = 0x%02X\n", sym1->n_sect);
390 printf(" sym[1].n_desc = 0x%04X\n", sym1->n_desc);
391 printf(" sym[1].n_value = 0x%llX\n", sym1->n_value);
394 case LC_FUNCTION_STARTS:
395 leData = (const linkedit_data_command*)cmd;
396 printf("LC_FUNCTION_STARTS\n");
397 printf(" dataoff = 0x%08X\n", leData->dataoff);
398 printf(" datasize = 0x%08X\n", leData->datasize);
400 //printf("0x%08X\n", cmd->cmd);
403 cmd = (const load_command*)(((uint8_t*)cmd)+cmd->cmdsize);
408 template <typename P
>
409 void LinkeditOptimizer
<P
>::updateLoadCommands(uint32_t mergedLinkeditStartOffset
, uint64_t mergedLinkeditAddr
, uint64_t newLinkeditSize
,
410 uint32_t sharedSymbolTableStartOffset
, uint32_t sharedSymbolTableCount
,
411 uint32_t sharedSymbolStringsOffset
, uint32_t sharedSymbolStringsSize
)
413 // update __LINKEDIT segment in all dylibs to overlap the same shared region
414 for (macho_segment_command
<P
>* segCmd
: _segCmds
) {
415 if ( strcmp(segCmd
->segname(), "__LINKEDIT") == 0 ) {
416 segCmd
->set_vmaddr(mergedLinkeditAddr
);
417 segCmd
->set_vmsize(newLinkeditSize
);
418 segCmd
->set_fileoff(mergedLinkeditStartOffset
);
419 segCmd
->set_filesize(newLinkeditSize
);
421 else if ( strcmp(segCmd
->segname(), "__TEXT") == 0 ) {
422 // HACK until lldb fixed in: <rdar://problem/20357466> DynamicLoaderMacOSXDYLD fixes for Monarch dyld shared cache
423 //segCmd->set_fileoff(0);
428 // update symbol table to point to shared symbol table
429 _symTabCmd
->set_symoff(mergedLinkeditStartOffset
+ sharedSymbolTableStartOffset
+ _newLocalSymbolsStartIndex
*sizeof(macho_nlist
<P
>));
430 _symTabCmd
->set_nsyms(_newLocalSymbolCount
+_newExportedSymbolCount
+_newImportedSymbolCount
);
431 _symTabCmd
->set_stroff(mergedLinkeditStartOffset
+ sharedSymbolStringsOffset
);
432 _symTabCmd
->set_strsize(sharedSymbolStringsSize
);
434 // update dynamic symbol table to have proper offsets into shared symbol table
435 if ( _dynSymTabCmd
!= nullptr ) {
436 _dynSymTabCmd
->set_ilocalsym(0);
437 _dynSymTabCmd
->set_nlocalsym(_newLocalSymbolCount
);
438 _dynSymTabCmd
->set_iextdefsym(_newExportedSymbolsStartIndex
-_newLocalSymbolsStartIndex
);
439 _dynSymTabCmd
->set_nextdefsym(_newExportedSymbolCount
);
440 _dynSymTabCmd
->set_iundefsym(_newImportedSymbolsStartIndex
-_newLocalSymbolsStartIndex
);
441 _dynSymTabCmd
->set_nundefsym(_newImportedSymbolCount
);
442 _dynSymTabCmd
->set_tocoff(0);
443 _dynSymTabCmd
->set_ntoc(0);
444 _dynSymTabCmd
->set_modtaboff(0);
445 _dynSymTabCmd
->set_nmodtab(0);
446 _dynSymTabCmd
->set_indirectsymoff(mergedLinkeditStartOffset
+ _newIndirectSymbolTableOffset
);
447 _dynSymTabCmd
->set_extreloff(0);
448 _dynSymTabCmd
->set_locreloff(0);
449 _dynSymTabCmd
->set_nlocrel(0);
453 if ( _dyldInfo
!= nullptr ) {
454 _dyldInfo
->set_rebase_off(0);
455 _dyldInfo
->set_rebase_size(0);
456 _dyldInfo
->set_bind_off(_dyldInfo
->bind_size() ? mergedLinkeditStartOffset
+ _newBindingInfoOffset
: 0);
457 _dyldInfo
->set_weak_bind_off(_dyldInfo
->weak_bind_size() ? mergedLinkeditStartOffset
+ _newWeakBindingInfoOffset
: 0 );
458 _dyldInfo
->set_lazy_bind_off(_dyldInfo
->lazy_bind_size() ? mergedLinkeditStartOffset
+ _newLazyBindingInfoOffset
: 0 );
459 _dyldInfo
->set_export_off(mergedLinkeditStartOffset
+ _newExportInfoOffset
);
460 } else if ( _exportTrieCmd
!= nullptr ) {
461 _exportTrieCmd
->set_dataoff(mergedLinkeditStartOffset
+ _newExportInfoOffset
);
464 // update function-starts
465 if ( _functionStartsCmd
!= nullptr )
466 _functionStartsCmd
->set_dataoff(mergedLinkeditStartOffset
+_newFunctionStartsOffset
);
468 // update data-in-code
469 if ( _dataInCodeCmd
!= nullptr )
470 _dataInCodeCmd
->set_dataoff(mergedLinkeditStartOffset
+_newDataInCodeOffset
);
473 template <typename P
>
474 void LinkeditOptimizer
<P
>::copyWeakBindingInfo(uint8_t* newLinkEditContent
, uint32_t& offset
)
476 if ( _dyldInfo
== nullptr )
478 unsigned size
= _dyldInfo
->weak_bind_size();
480 ::memcpy(&newLinkEditContent
[offset
], &_linkeditBias
[_dyldInfo
->weak_bind_off()], size
);
481 _newWeakBindingInfoOffset
= offset
;
482 _newWeakBindingSize
= size
;
488 template <typename P
>
489 void LinkeditOptimizer
<P
>::copyLazyBindingInfo(uint8_t* newLinkEditContent
, uint32_t& offset
)
491 if ( _dyldInfo
== nullptr )
493 unsigned size
= _dyldInfo
->lazy_bind_size();
495 ::memcpy(&newLinkEditContent
[offset
], &_linkeditBias
[_dyldInfo
->lazy_bind_off()], size
);
496 _newLazyBindingInfoOffset
= offset
;
501 template <typename P
>
502 void LinkeditOptimizer
<P
>::copyBindingInfo(uint8_t* newLinkEditContent
, uint32_t& offset
)
504 if ( _dyldInfo
== nullptr )
506 unsigned size
= _dyldInfo
->bind_size();
508 ::memcpy(&newLinkEditContent
[offset
], &_linkeditBias
[_dyldInfo
->bind_off()], size
);
509 _newBindingInfoOffset
= offset
;
514 template <typename P
>
515 void LinkeditOptimizer
<P
>::copyExportInfo(uint8_t* newLinkEditContent
, uint32_t& offset
)
517 if ( (_dyldInfo
== nullptr) && (_exportTrieCmd
== nullptr) )
520 uint32_t exportOffset
= _exportTrieCmd
? _exportTrieCmd
->dataoff() : _dyldInfo
->export_off();
521 uint32_t exportSize
= _exportTrieCmd
? _exportTrieCmd
->datasize() : _dyldInfo
->export_size();
522 if ( exportSize
!= 0 ) {
523 ::memcpy(&newLinkEditContent
[offset
], &_linkeditBias
[exportOffset
], exportSize
);
524 _newExportInfoOffset
= offset
;
525 offset
+= exportSize
;
530 template <typename P
>
531 void LinkeditOptimizer
<P
>::copyFunctionStarts(uint8_t* newLinkEditContent
, uint32_t& offset
)
533 if ( _functionStartsCmd
== nullptr )
535 unsigned size
= _functionStartsCmd
->datasize();
536 ::memcpy(&newLinkEditContent
[offset
], &_linkeditBias
[_functionStartsCmd
->dataoff()], size
);
537 _newFunctionStartsOffset
= offset
;
541 template <typename P
>
542 void LinkeditOptimizer
<P
>::copyDataInCode(uint8_t* newLinkEditContent
, uint32_t& offset
)
544 if ( _dataInCodeCmd
== nullptr )
546 unsigned size
= _dataInCodeCmd
->datasize();
547 ::memcpy(&newLinkEditContent
[offset
], &_linkeditBias
[_dataInCodeCmd
->dataoff()], size
);
548 _newDataInCodeOffset
= offset
;
553 template <typename P
>
554 void LinkeditOptimizer
<P
>::copyLocalSymbols(uint8_t* newLinkEditContent
, SortedStringPool
<P
>& stringPool
, uint32_t& offset
, uint32_t& symbolIndex
,
555 bool redact
, std::vector
<LocalSymbolInfo
>& localSymbolInfos
,
556 std::vector
<macho_nlist
<P
>>& unmappedLocalSymbols
, SortedStringPool
<P
>& localSymbolsStringPool
)
558 localSymbolInfos
.push_back(LocalSymbolInfo());
560 LocalSymbolInfo
& localInfo
= localSymbolInfos
.back();
561 localInfo
.dylibOffset
= (uint32_t)(((uint8_t*)_mh
) - (uint8_t*)_containerBuffer
);
562 localInfo
.nlistStartIndex
= (uint32_t)unmappedLocalSymbols
.size();
563 localInfo
.nlistCount
= 0;
564 _newLocalSymbolsStartIndex
= symbolIndex
;
565 _newLocalSymbolCount
= 0;
567 switch (_stripMode
) {
568 case CacheBuilder::DylibStripMode::stripNone
:
569 case CacheBuilder::DylibStripMode::stripExports
:
571 case CacheBuilder::DylibStripMode::stripLocals
:
572 case CacheBuilder::DylibStripMode::stripAll
:
576 if ( _dynSymTabCmd
== nullptr )
579 const char* strings
= (char*)&_linkeditBias
[_symTabCmd
->stroff()];
580 const macho_nlist
<P
>* const symbolTable
= (macho_nlist
<P
>*)(&_linkeditBias
[_symTabCmd
->symoff()]);
581 const macho_nlist
<P
>* const firstExport
= &symbolTable
[_dynSymTabCmd
->ilocalsym()];
582 const macho_nlist
<P
>* const lastExport
= &symbolTable
[_dynSymTabCmd
->ilocalsym()+_dynSymTabCmd
->nlocalsym()];
583 for (const macho_nlist
<P
>* entry
= firstExport
; entry
< lastExport
; ++entry
) {
584 if ( (entry
->n_type() & N_TYPE
) != N_SECT
)
586 if ( (entry
->n_type() & N_STAB
) != 0)
588 const char* name
= &strings
[entry
->n_strx()];
589 macho_nlist
<P
>* newSymbolEntry
= (macho_nlist
<P
>*)&newLinkEditContent
[offset
];
590 *newSymbolEntry
= *entry
;
592 // if removing local symbols, change __text symbols to "<redacted>" so backtraces don't have bogus names
593 if ( entry
->n_sect() == 1 ) {
594 stringPool
.add(symbolIndex
, "<redacted>");
596 offset
+= sizeof(macho_nlist
<P
>);
598 // copy local symbol to unmmapped locals area
599 localSymbolsStringPool
.add((uint32_t)unmappedLocalSymbols
.size(), name
);
600 unmappedLocalSymbols
.push_back(*entry
);
601 unmappedLocalSymbols
.back().set_n_strx(0);
604 stringPool
.add(symbolIndex
, name
);
606 offset
+= sizeof(macho_nlist
<P
>);
609 _newLocalSymbolCount
= symbolIndex
- _newLocalSymbolsStartIndex
;
610 localInfo
.nlistCount
= (uint32_t)unmappedLocalSymbols
.size() - localInfo
.nlistStartIndex
;
614 template <typename P
>
615 void LinkeditOptimizer
<P
>::copyExportedSymbols(uint8_t* newLinkEditContent
, SortedStringPool
<P
>& stringPool
, uint32_t& offset
, uint32_t& symbolIndex
)
617 _newExportedSymbolsStartIndex
= symbolIndex
;
618 _newExportedSymbolCount
= 0;
620 switch (_stripMode
) {
621 case CacheBuilder::DylibStripMode::stripNone
:
622 case CacheBuilder::DylibStripMode::stripLocals
:
624 case CacheBuilder::DylibStripMode::stripExports
:
625 case CacheBuilder::DylibStripMode::stripAll
:
629 if ( _dynSymTabCmd
== nullptr )
632 const char* strings
= (char*)&_linkeditBias
[_symTabCmd
->stroff()];
633 const macho_nlist
<P
>* const symbolTable
= (macho_nlist
<P
>*)(&_linkeditBias
[_symTabCmd
->symoff()]);
634 const macho_nlist
<P
>* const firstExport
= &symbolTable
[_dynSymTabCmd
->iextdefsym()];
635 const macho_nlist
<P
>* const lastExport
= &symbolTable
[_dynSymTabCmd
->iextdefsym()+_dynSymTabCmd
->nextdefsym()];
636 uint32_t oldSymbolIndex
= _dynSymTabCmd
->iextdefsym();
637 for (const macho_nlist
<P
>* entry
= firstExport
; entry
< lastExport
; ++entry
, ++oldSymbolIndex
) {
638 if ( (entry
->n_type() & N_TYPE
) != N_SECT
)
640 const char* name
= &strings
[entry
->n_strx()];
641 if ( strncmp(name
, ".objc_", 6) == 0 )
643 if ( strncmp(name
, "$ld$", 4) == 0 )
645 macho_nlist
<P
>* newSymbolEntry
= (macho_nlist
<P
>*)&newLinkEditContent
[offset
];
646 *newSymbolEntry
= *entry
;
647 newSymbolEntry
->set_n_strx(0);
648 stringPool
.add(symbolIndex
, name
);
649 _oldToNewSymbolIndexes
[oldSymbolIndex
] = symbolIndex
- _newLocalSymbolsStartIndex
;
651 offset
+= sizeof(macho_nlist
<P
>);
653 _newExportedSymbolCount
= symbolIndex
- _newExportedSymbolsStartIndex
;
656 template <typename P
>
657 void LinkeditOptimizer
<P
>::copyImportedSymbols(uint8_t* newLinkEditContent
, SortedStringPool
<P
>& stringPool
, uint32_t& offset
, uint32_t& symbolIndex
)
659 _newImportedSymbolsStartIndex
= symbolIndex
;
660 _newImportedSymbolCount
= 0;
662 if ( _dynSymTabCmd
== nullptr )
665 switch (_stripMode
) {
666 case CacheBuilder::DylibStripMode::stripNone
:
668 case CacheBuilder::DylibStripMode::stripLocals
:
669 case CacheBuilder::DylibStripMode::stripExports
:
670 case CacheBuilder::DylibStripMode::stripAll
:
674 const char* strings
= (char*)&_linkeditBias
[_symTabCmd
->stroff()];
675 const macho_nlist
<P
>* const symbolTable
= (macho_nlist
<P
>*)(&_linkeditBias
[_symTabCmd
->symoff()]);
676 const macho_nlist
<P
>* const firstImport
= &symbolTable
[_dynSymTabCmd
->iundefsym()];
677 const macho_nlist
<P
>* const lastImport
= &symbolTable
[_dynSymTabCmd
->iundefsym()+_dynSymTabCmd
->nundefsym()];
678 uint32_t oldSymbolIndex
= _dynSymTabCmd
->iundefsym();
679 for (const macho_nlist
<P
>* entry
= firstImport
; entry
< lastImport
; ++entry
, ++oldSymbolIndex
) {
680 if ( (entry
->n_type() & N_TYPE
) != N_UNDF
)
682 const char* name
= &strings
[entry
->n_strx()];
683 macho_nlist
<P
>* newSymbolEntry
= (macho_nlist
<P
>*)&newLinkEditContent
[offset
];
684 *newSymbolEntry
= *entry
;
685 newSymbolEntry
->set_n_strx(0);
686 stringPool
.add(symbolIndex
, name
);
687 _oldToNewSymbolIndexes
[oldSymbolIndex
] = symbolIndex
- _newLocalSymbolsStartIndex
;
689 offset
+= sizeof(macho_nlist
<P
>);
691 _newImportedSymbolCount
= symbolIndex
- _newImportedSymbolsStartIndex
;
694 template <typename P
>
695 void LinkeditOptimizer
<P
>::copyIndirectSymbolTable(uint8_t* newLinkEditContent
, uint32_t& offset
)
697 _newIndirectSymbolTableOffset
= offset
;
699 if ( _dynSymTabCmd
== nullptr )
702 const uint32_t* const indirectTable
= (uint32_t*)&_linkeditBias
[_dynSymTabCmd
->indirectsymoff()];
703 uint32_t* newIndirectTable
= (uint32_t*)&newLinkEditContent
[offset
];
704 for (uint32_t i
=0; i
< _dynSymTabCmd
->nindirectsyms(); ++i
) {
705 uint32_t symbolIndex
= E::get32(indirectTable
[i
]);
706 if ( (symbolIndex
== INDIRECT_SYMBOL_ABS
) || (symbolIndex
== INDIRECT_SYMBOL_LOCAL
) )
707 E::set32(newIndirectTable
[i
], symbolIndex
);
709 E::set32(newIndirectTable
[i
], _oldToNewSymbolIndexes
[symbolIndex
]);
710 offset
+= sizeof(uint32_t);
714 template <typename P
>
715 void LinkeditOptimizer
<P
>::mergeLinkedits(CacheBuilder
& builder
,
716 CacheBuilder::UnmappedRegion
* localSymbolsRegion
,
717 std::vector
<LinkeditOptimizer
<P
>*>& optimizers
)
719 // allocate space for new linkedit data
720 uint64_t totalUnoptLinkeditsSize
= builder
._readOnlyRegion
.sizeInUse
- builder
._nonLinkEditReadOnlySize
;
721 uint8_t* newLinkEdit
= (uint8_t*)calloc(totalUnoptLinkeditsSize
, 1);
722 SortedStringPool
<P
> stringPool
;
725 builder
._diagnostics
.verbose("Merged LINKEDIT:\n");
727 // copy weak binding info
728 uint32_t startWeakBindInfosOffset
= offset
;
729 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
730 // Skip chained fixups as the in-place linked list isn't valid any more
731 const dyld3::MachOFile
* mf
= (dyld3::MachOFile
*)op
->machHeader();
732 if (!mf
->hasChainedFixups())
733 op
->copyWeakBindingInfo(newLinkEdit
, offset
);
735 builder
._diagnostics
.verbose(" weak bindings size: %5uKB\n", (uint32_t)(offset
-startWeakBindInfosOffset
)/1024);
738 uint32_t startExportInfosOffset
= offset
;
739 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
740 op
->copyExportInfo(newLinkEdit
, offset
);
742 builder
._diagnostics
.verbose(" exports info size: %5uKB\n", (uint32_t)(offset
-startExportInfosOffset
)/1024);
744 // in theory, an optimized cache can drop the binding info
747 uint32_t startBindingsInfosOffset
= offset
;
748 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
749 // Skip chained fixups as the in-place linked list isn't valid any more
750 const dyld3::MachOFile
* mf
= (dyld3::MachOFile
*)op
->machHeader();
751 if (!mf
->hasChainedFixups())
752 op
->copyBindingInfo(newLinkEdit
, offset
);
754 builder
._diagnostics
.verbose(" bindings size: %5uKB\n", (uint32_t)(offset
-startBindingsInfosOffset
)/1024);
756 // copy lazy binding info
757 uint32_t startLazyBindingsInfosOffset
= offset
;
758 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
759 // Skip chained fixups as the in-place linked list isn't valid any more
760 const dyld3::MachOFile
* mf
= (dyld3::MachOFile
*)op
->machHeader();
761 if (!mf
->hasChainedFixups())
762 op
->copyLazyBindingInfo(newLinkEdit
, offset
);
764 builder
._diagnostics
.verbose(" lazy bindings size: %5uKB\n", (offset
-startLazyBindingsInfosOffset
)/1024);
767 bool unmapLocals
= ( builder
._options
.localSymbolMode
== DyldSharedCache::LocalSymbolsMode::unmap
);
769 // copy symbol table entries
770 std::vector
<macho_nlist
<P
>> unmappedLocalSymbols
;
772 unmappedLocalSymbols
.reserve(0x01000000);
773 std::vector
<LocalSymbolInfo
> localSymbolInfos
;
774 localSymbolInfos
.reserve(optimizers
.size());
775 SortedStringPool
<P
> localSymbolsStringPool
;
776 uint32_t symbolIndex
= 0;
777 const uint32_t sharedSymbolTableStartOffset
= offset
;
778 uint32_t sharedSymbolTableExportsCount
= 0;
779 uint32_t sharedSymbolTableImportsCount
= 0;
780 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
781 op
->copyLocalSymbols(newLinkEdit
, stringPool
, offset
, symbolIndex
, unmapLocals
,
782 localSymbolInfos
, unmappedLocalSymbols
, localSymbolsStringPool
);
783 uint32_t x
= symbolIndex
;
784 op
->copyExportedSymbols(newLinkEdit
, stringPool
, offset
, symbolIndex
);
785 sharedSymbolTableExportsCount
+= (symbolIndex
-x
);
786 uint32_t y
= symbolIndex
;
787 op
->copyImportedSymbols(newLinkEdit
, stringPool
, offset
, symbolIndex
);
788 sharedSymbolTableImportsCount
+= (symbolIndex
-y
);
790 uint32_t sharedSymbolTableCount
= symbolIndex
;
791 const uint32_t sharedSymbolTableEndOffset
= offset
;
793 // copy function starts
794 uint32_t startFunctionStartsOffset
= offset
;
795 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
796 op
->copyFunctionStarts(newLinkEdit
, offset
);
798 builder
._diagnostics
.verbose(" function starts size: %5uKB\n", (offset
-startFunctionStartsOffset
)/1024);
800 // copy data-in-code info
801 uint32_t startDataInCodeOffset
= offset
;
802 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
803 op
->copyDataInCode(newLinkEdit
, offset
);
805 builder
._diagnostics
.verbose(" data in code size: %5uKB\n", (offset
-startDataInCodeOffset
)/1024);
807 // copy indirect symbol tables
808 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
809 op
->copyIndirectSymbolTable(newLinkEdit
, offset
);
811 // if indirect table has odd number of entries, end will not be 8-byte aligned
812 if ( (offset
% sizeof(typename
P::uint_t
)) != 0 )
816 uint32_t sharedSymbolStringsOffset
= offset
;
817 uint32_t sharedSymbolStringsSize
= stringPool
.copyPoolAndUpdateOffsets((char*)&newLinkEdit
[sharedSymbolStringsOffset
], (macho_nlist
<P
>*)&newLinkEdit
[sharedSymbolTableStartOffset
]);
818 offset
+= sharedSymbolStringsSize
;
819 uint32_t newLinkeditUnalignedSize
= offset
;
820 uint64_t newLinkeditAlignedSize
= align(offset
, 14);
821 builder
._diagnostics
.verbose(" symbol table size: %5uKB (%d exports, %d imports)\n", (sharedSymbolTableEndOffset
-sharedSymbolTableStartOffset
)/1024, sharedSymbolTableExportsCount
, sharedSymbolTableImportsCount
);
822 builder
._diagnostics
.verbose(" symbol string pool size: %5uKB\n", sharedSymbolStringsSize
/1024);
824 // overwrite mapped LINKEDIT area in cache with new merged LINKEDIT content
825 builder
._diagnostics
.verbose("LINKEDITS optimized from %uMB to %uMB\n", (uint32_t)totalUnoptLinkeditsSize
/(1024*1024), (uint32_t)newLinkeditUnalignedSize
/(1024*1024));
826 ::memcpy(builder
._readOnlyRegion
.buffer
+builder
._nonLinkEditReadOnlySize
, newLinkEdit
, newLinkeditAlignedSize
);
828 builder
._readOnlyRegion
.sizeInUse
= builder
._nonLinkEditReadOnlySize
+ newLinkeditAlignedSize
;
830 // overwrite end of un-opt linkedits to create a new unmapped region for local symbols
832 const uint32_t entriesOffset
= sizeof(dyld_cache_local_symbols_info
);
833 const uint32_t entriesCount
= (uint32_t)localSymbolInfos
.size();
834 const uint32_t nlistOffset
= (uint32_t)align(entriesOffset
+ entriesCount
* sizeof(dyld_cache_local_symbols_info
), 4); // 16-byte align start
835 const uint32_t nlistCount
= (uint32_t)unmappedLocalSymbols
.size();
836 const uint32_t stringsSize
= (uint32_t)localSymbolsStringPool
.size();
837 const uint32_t stringsOffset
= nlistOffset
+ nlistCount
* sizeof(macho_nlist
<P
>);
838 // allocate buffer for local symbols
839 const size_t localsBufferSize
= align(stringsOffset
+ stringsSize
, 14);
840 vm_address_t localsBuffer
;
841 if ( ::vm_allocate(mach_task_self(), &localsBuffer
, localsBufferSize
, VM_FLAGS_ANYWHERE
) == 0 ) {
842 dyld_cache_local_symbols_info
* infoHeader
= (dyld_cache_local_symbols_info
*)localsBuffer
;
843 // fill in header info
844 infoHeader
->nlistOffset
= nlistOffset
;
845 infoHeader
->nlistCount
= nlistCount
;
846 infoHeader
->stringsOffset
= stringsOffset
;
847 infoHeader
->stringsSize
= stringsSize
;
848 infoHeader
->entriesOffset
= entriesOffset
;
849 infoHeader
->entriesCount
= entriesCount
;
850 // copy info for each dylib
851 dyld_cache_local_symbols_entry
* entries
= (dyld_cache_local_symbols_entry
*)(((uint8_t*)infoHeader
)+entriesOffset
);
852 for (uint32_t i
=0; i
< entriesCount
; ++i
) {
853 entries
[i
].dylibOffset
= localSymbolInfos
[i
].dylibOffset
;
854 entries
[i
].nlistStartIndex
= localSymbolInfos
[i
].nlistStartIndex
;
855 entries
[i
].nlistCount
= localSymbolInfos
[i
].nlistCount
;
858 macho_nlist
<P
>* newLocalsSymbolTable
= (macho_nlist
<P
>*)(localsBuffer
+nlistOffset
);
859 ::memcpy(newLocalsSymbolTable
, &unmappedLocalSymbols
[0], nlistCount
*sizeof(macho_nlist
<P
>));
861 localSymbolsStringPool
.copyPoolAndUpdateOffsets(((char*)infoHeader
)+stringsOffset
, newLocalsSymbolTable
);
862 // return buffer of local symbols, caller to free() it
863 localSymbolsRegion
->buffer
= (uint8_t*)localsBuffer
;
864 localSymbolsRegion
->bufferSize
= localsBufferSize
;
865 localSymbolsRegion
->sizeInUse
= localsBufferSize
;
868 builder
._diagnostics
.warning("could not allocate local symbols");
872 // update all load commands to new merged layout
873 uint64_t linkeditsUnslidStartAddr
= builder
._readOnlyRegion
.unslidLoadAddress
+ builder
._nonLinkEditReadOnlySize
;
874 uint32_t linkeditsCacheFileOffset
= (uint32_t)(builder
._readOnlyRegion
.cacheFileOffset
+ builder
._nonLinkEditReadOnlySize
);
875 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
876 op
->updateLoadCommands(linkeditsCacheFileOffset
, linkeditsUnslidStartAddr
, newLinkeditUnalignedSize
,
877 sharedSymbolTableStartOffset
, sharedSymbolTableCount
,
878 sharedSymbolStringsOffset
, sharedSymbolStringsSize
);
883 template <typename P
>
884 void LinkeditOptimizer
<P
>::optimizeLinkedit(CacheBuilder
& builder
, const void* containerBuffer
,
885 CacheBuilder::UnmappedRegion
* localSymbolsRegion
,
886 const std::vector
<std::tuple
<const mach_header
*, const char*, DylibStripMode
>>& images
)
888 // construct a LinkeditOptimizer for each image
889 __block
std::vector
<LinkeditOptimizer
<P
>*> optimizers
;
890 for (std::tuple
<const mach_header
*, const char*, DylibStripMode
> image
: images
) {
891 optimizers
.push_back(new LinkeditOptimizer
<P
>(containerBuffer
, (macho_header
<P
>*)std::get
<0>(image
), std::get
<1>(image
), builder
._diagnostics
));
892 optimizers
.back()->setStripMode(std::get
<2>(image
));
895 // add optimizer for each branch pool
896 for (uint64_t poolOffset
: branchPoolOffsets
) {
897 macho_header
<P
>* mh
= (macho_header
<P
>*)((char*)cache
+ poolOffset
);
898 optimizers
.push_back(new LinkeditOptimizer
<P
>(cache
, mh
, diag
));
901 // merge linkedit info
902 mergeLinkedits(builder
, localSymbolsRegion
, optimizers
);
905 for (LinkeditOptimizer
<P
>* op
: optimizers
)
909 void CacheBuilder::optimizeLinkedit(UnmappedRegion
* localSymbolsRegion
,
910 const std::vector
<std::tuple
<const mach_header
*, const char*, DylibStripMode
>>& images
)
912 const void* buffer
= (const void*)_fullAllocatedBuffer
;
914 return LinkeditOptimizer
<Pointer64
<LittleEndian
>>::optimizeLinkedit(*this, buffer
,
915 localSymbolsRegion
, images
);
918 return LinkeditOptimizer
<Pointer32
<LittleEndian
>>::optimizeLinkedit(*this, buffer
,
919 localSymbolsRegion
, images
);