1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*-
3 * Copyright (c) 2014 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
27 #include <sys/errno.h>
28 #include <sys/fcntl.h>
29 #include <mach-o/loader.h>
30 #include <mach-o/fat.h>
36 #include <unordered_map>
37 #include <unordered_set>
39 #include "MachOFileAbstraction.hpp"
41 #include "DyldSharedCache.h"
42 #include "CacheBuilder.h"
43 #include "MachOLoaded.h"
45 #define ALIGN_AS_TYPE(value, type) \
46 ((value + alignof(type) - 1) & (-alignof(type)))
51 class SortedStringPool
54 // add a string and symbol table entry index to be updated later
55 void add(uint32_t symbolIndex
, const char* symbolName
) {
56 _map
[symbolName
].push_back(symbolIndex
);
59 // copy sorted strings to buffer and update all symbol's string offsets
60 uint32_t copyPoolAndUpdateOffsets(char* dstStringPool
, macho_nlist
<P
>* symbolTable
) {
61 // walk sorted list of strings
62 dstStringPool
[0] = '\0'; // tradition for start of pool to be empty string
63 uint32_t poolOffset
= 1;
64 for (auto& entry
: _map
) {
65 const std::string
& symName
= entry
.first
;
66 // append string to pool
67 strcpy(&dstStringPool
[poolOffset
], symName
.c_str());
68 // set each string offset of each symbol using it
69 for (uint32_t symbolIndex
: entry
.second
) {
70 symbolTable
[symbolIndex
].set_n_strx(poolOffset
);
72 poolOffset
+= symName
.size() + 1;
74 // return size of pool
80 for (auto& entry
: _map
) {
81 size
+= (entry
.first
.size() + 1);
88 std::map
<std::string
, std::vector
<uint32_t>> _map
;
92 } // anonymous namespace
95 struct LocalSymbolInfo
98 uint32_t nlistStartIndex
;
103 template <typename P
>
104 class LinkeditOptimizer
{
106 LinkeditOptimizer(void* cacheBuffer
, macho_header
<P
>* mh
, Diagnostics
& diag
);
108 uint32_t linkeditSize() { return _linkeditSize
; }
109 uint64_t linkeditAddr() { return _linkeditAddr
; }
110 const char* installName() { return _installName
; }
111 void copyWeakBindingInfo(uint8_t* newLinkEditContent
, uint32_t& offset
);
112 void copyLazyBindingInfo(uint8_t* newLinkEditContent
, uint32_t& offset
);
113 void copyBindingInfo(uint8_t* newLinkEditContent
, uint32_t& offset
);
114 void copyExportInfo(uint8_t* newLinkEditContent
, uint32_t& offset
);
115 void copyExportedSymbols(uint8_t* newLinkEditContent
, SortedStringPool
<P
>& stringPool
, uint32_t& offset
, uint32_t& symbolIndex
);
116 void copyImportedSymbols(uint8_t* newLinkEditContent
, SortedStringPool
<P
>& stringPool
, uint32_t& offset
, uint32_t& symbolIndex
);
117 void copyLocalSymbols(uint8_t* newLinkEditContent
, SortedStringPool
<P
>& stringPool
, uint32_t& offset
, uint32_t& symbolIndex
,
118 bool redact
, std::vector
<LocalSymbolInfo
>& localSymbolInfos
,
119 std::vector
<macho_nlist
<P
>>& unmappedLocalSymbols
, SortedStringPool
<P
>& localSymbolsStringPool
);
120 void copyFunctionStarts(uint8_t* newLinkEditContent
, uint32_t& offset
);
121 void copyDataInCode(uint8_t* newLinkEditContent
, uint32_t& offset
);
122 void copyIndirectSymbolTable(uint8_t* newLinkEditContent
, uint32_t& offset
);
123 void updateLoadCommands(uint32_t linkeditStartOffset
, uint64_t mergedLinkeditAddr
, uint64_t newLinkeditSize
,
124 uint32_t sharedSymbolTableStartOffset
, uint32_t sharedSymbolTableCount
,
125 uint32_t sharedSymbolStringsOffset
, uint32_t sharedSymbolStringsSize
);
127 macho_header
<P
>* machHeader() { return _mh
; }
128 const std::vector
<const char*> getDownwardDependents() { return _downDependentPaths
; }
129 const std::vector
<const char*> getAllDependents() { return _allDependentPaths
; }
130 const std::vector
<const char*> getReExportPaths() { return _reExportPaths
; }
131 const std::vector
<uint64_t> initializerAddresses() { return _initializerAddresses
; }
132 const std::vector
<macho_section
<P
>*> dofSections() { return _dofSections
; }
133 uint32_t exportsTrieLinkEditOffset() { return _newExportInfoOffset
; }
134 uint32_t exportsTrieLinkEditSize() { return _exportInfoSize
; }
135 uint32_t weakBindingLinkEditOffset() { return _newWeakBindingInfoOffset
; }
136 uint32_t weakBindingLinkEditSize() { return _newWeakBindingSize
; }
137 uint64_t dyldSectionAddress() { return _dyldSectionAddr
; }
138 const std::vector
<macho_segment_command
<P
>*>& segCmds() { return _segCmds
; }
141 static void optimizeLinkedit(CacheBuilder
& builder
);
142 static void mergeLinkedits(CacheBuilder
& builder
, std::vector
<LinkeditOptimizer
<P
>*>& optimizers
);
146 typedef typename
P::uint_t pint_t
;
147 typedef typename
P::E E
;
149 macho_header
<P
>* _mh
;
151 Diagnostics
& _diagnostics
;
152 uint32_t _linkeditSize
= 0;
153 uint64_t _linkeditAddr
= 0;
154 const uint8_t* _linkeditBias
= nullptr;
155 const char* _installName
= nullptr;
156 macho_symtab_command
<P
>* _symTabCmd
= nullptr;
157 macho_dysymtab_command
<P
>* _dynSymTabCmd
= nullptr;
158 macho_dyld_info_command
<P
>* _dyldInfo
= nullptr;
159 macho_linkedit_data_command
<P
>* _exportTrieCmd
= nullptr;
160 macho_linkedit_data_command
<P
>* _functionStartsCmd
= nullptr;
161 macho_linkedit_data_command
<P
>* _dataInCodeCmd
= nullptr;
162 std::vector
<macho_segment_command
<P
>*> _segCmds
;
163 std::unordered_map
<uint32_t,uint32_t> _oldToNewSymbolIndexes
;
164 std::vector
<const char*> _reExportPaths
;
165 std::vector
<const char*> _downDependentPaths
;
166 std::vector
<const char*> _allDependentPaths
;
167 std::vector
<uint64_t> _initializerAddresses
;
168 std::vector
<macho_section
<P
>*> _dofSections
;
169 uint32_t _newWeakBindingInfoOffset
= 0;
170 uint32_t _newLazyBindingInfoOffset
= 0;
171 uint32_t _newBindingInfoOffset
= 0;
172 uint32_t _newExportInfoOffset
= 0;
173 uint32_t _exportInfoSize
= 0;
174 uint32_t _newWeakBindingSize
= 0;
175 uint32_t _newExportedSymbolsStartIndex
= 0;
176 uint32_t _newExportedSymbolCount
= 0;
177 uint32_t _newImportedSymbolsStartIndex
= 0;
178 uint32_t _newImportedSymbolCount
= 0;
179 uint32_t _newLocalSymbolsStartIndex
= 0;
180 uint32_t _newLocalSymbolCount
= 0;
181 uint32_t _newFunctionStartsOffset
= 0;
182 uint32_t _newDataInCodeOffset
= 0;
183 uint32_t _newIndirectSymbolTableOffset
= 0;
184 uint64_t _dyldSectionAddr
= 0;
189 template <typename P
>
190 LinkeditOptimizer
<P
>::LinkeditOptimizer(void* cacheBuffer
, macho_header
<P
>* mh
, Diagnostics
& diag
)
191 : _mh(mh
), _cacheBuffer(cacheBuffer
), _diagnostics(diag
)
193 const unsigned origLoadCommandsSize
= mh
->sizeofcmds();
194 unsigned bytesRemaining
= origLoadCommandsSize
;
195 unsigned removedCount
= 0;
196 uint64_t textSegAddr
= 0;
198 const macho_load_command
<P
>* const cmds
= (macho_load_command
<P
>*)((uint8_t*)mh
+ sizeof(macho_header
<P
>));
199 const uint32_t cmdCount
= mh
->ncmds();
200 const macho_load_command
<P
>* cmd
= cmds
;
201 const macho_dylib_command
<P
>* dylibCmd
;
202 const macho_routines_command
<P
>* routinesCmd
;
203 macho_segment_command
<P
>* segCmd
;
204 for (uint32_t i
= 0; i
< cmdCount
; ++i
) {
206 switch (cmd
->cmd()) {
208 _installName
= ((macho_dylib_command
<P
>*)cmd
)->name();
211 _symTabCmd
= (macho_symtab_command
<P
>*)cmd
;
214 _dynSymTabCmd
= (macho_dysymtab_command
<P
>*)cmd
;
217 case LC_DYLD_INFO_ONLY
:
218 _dyldInfo
= (macho_dyld_info_command
<P
>*)cmd
;
219 _exportInfoSize
= _dyldInfo
->export_size();
221 case LC_FUNCTION_STARTS
:
222 _functionStartsCmd
= (macho_linkedit_data_command
<P
>*)cmd
;
224 case LC_DATA_IN_CODE
:
225 _dataInCodeCmd
= (macho_linkedit_data_command
<P
>*)cmd
;
227 case LC_DYLD_EXPORTS_TRIE
:
228 _exportTrieCmd
= (macho_linkedit_data_command
<P
>*)cmd
;
229 _exportInfoSize
= _exportTrieCmd
->datasize();
233 routinesCmd
= (macho_routines_command
<P
>*)cmd
;
234 _initializerAddresses
.push_back(routinesCmd
->init_address());
236 case LC_REEXPORT_DYLIB
:
238 case LC_LOAD_WEAK_DYLIB
:
239 case LC_LOAD_UPWARD_DYLIB
:
240 dylibCmd
= (macho_dylib_command
<P
>*)cmd
;
241 _allDependentPaths
.push_back(dylibCmd
->name());
242 if ( cmd
->cmd() != LC_LOAD_UPWARD_DYLIB
)
243 _downDependentPaths
.push_back(dylibCmd
->name());
244 if ( cmd
->cmd() == LC_REEXPORT_DYLIB
)
245 _reExportPaths
.push_back(dylibCmd
->name());
247 case macho_segment_command
<P
>::CMD
:
248 segCmd
= (macho_segment_command
<P
>*)cmd
;
249 _segCmds
.push_back(segCmd
);
250 if ( strcmp(segCmd
->segname(), "__TEXT") == 0 ) {
251 textSegAddr
= segCmd
->vmaddr();
252 slide
= (uint64_t)mh
- textSegAddr
;
254 else if ( strcmp(segCmd
->segname(), "__LINKEDIT") == 0 ) {
255 _linkeditAddr
= segCmd
->vmaddr();
256 _linkeditBias
= (uint8_t*)mh
+ (_linkeditAddr
- textSegAddr
) - segCmd
->fileoff();
257 _linkeditSize
= (uint32_t)segCmd
->vmsize();
259 else if ( segCmd
->nsects() > 0 ) {
260 macho_section
<P
>* const sectionsStart
= (macho_section
<P
>*)((uint8_t*)segCmd
+ sizeof(macho_segment_command
<P
>));
261 macho_section
<P
>* const sectionsEnd
= §ionsStart
[segCmd
->nsects()];
262 for (macho_section
<P
>* sect
=sectionsStart
; sect
< sectionsEnd
; ++sect
) {
263 const uint8_t type
= sect
->flags() & SECTION_TYPE
;
264 if ( type
== S_MOD_INIT_FUNC_POINTERS
) {
265 const pint_t
* inits
= (pint_t
*)(sect
->addr()+slide
);
266 const size_t count
= sect
->size() / sizeof(pint_t
);
267 for (size_t j
=0; j
< count
; ++j
) {
268 uint64_t func
= P::getP(inits
[j
]);
269 _initializerAddresses
.push_back(func
);
272 else if ( type
== S_INIT_FUNC_OFFSETS
) {
273 const uint32_t* inits
= (uint32_t*)(sect
->addr()+slide
);
274 const size_t count
= sect
->size() / sizeof(uint32_t);
275 for (size_t j
=0; j
< count
; ++j
) {
276 uint32_t funcOffset
= E::get32(inits
[j
]);
277 _initializerAddresses
.push_back(textSegAddr
+ funcOffset
);
280 else if ( type
== S_DTRACE_DOF
) {
281 _dofSections
.push_back(sect
);
283 else if ( (strcmp(sect
->sectname(), "__dyld") == 0) && (strncmp(sect
->segname(), "__DATA", 6) == 0) ) {
284 _dyldSectionAddr
= sect
->addr();
289 case LC_DYLD_CHAINED_FIXUPS
:
290 case LC_SEGMENT_SPLIT_INFO
:
294 uint32_t cmdSize
= cmd
->cmdsize();
295 macho_load_command
<P
>* nextCmd
= (macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmdSize
);
297 ::memmove((void*)cmd
, (void*)nextCmd
, bytesRemaining
);
301 bytesRemaining
-= cmdSize
;
305 // zero out stuff removed
306 ::bzero((void*)cmd
, bytesRemaining
);
308 mh
->set_ncmds(cmdCount
- removedCount
);
309 mh
->set_sizeofcmds(origLoadCommandsSize
- bytesRemaining
);
313 static void dumpLoadCommands(const uint8_t* mheader)
315 const mach_header* const mh = (mach_header*)mheader;
316 const uint32_t cmd_count = mh->ncmds;
317 bool is64 = (mh->magic == MH_MAGIC_64);
318 const load_command* cmds = (load_command*)(mheader + (is64 ? sizeof(mach_header_64) : sizeof(mach_header)));
319 const load_command* cmd = cmds;
320 const segment_command* segCmd;
321 const segment_command_64* seg64Cmd;
322 const symtab_command* symTab;
323 const linkedit_data_command* leData;
324 const uint8_t* linkEditBias = NULL;
325 for (uint32_t i = 0; i < cmd_count; ++i) {
328 segCmd = (const segment_command*)cmd;
329 printf("LC_SEGMENT\n");
330 printf(" segname = %s\n", segCmd->segname);
331 printf(" vmaddr = 0x%08X\n", segCmd->vmaddr);
332 printf(" vmsize = 0x%08X\n", segCmd->vmsize);
333 printf(" fileoff = 0x%08X\n", segCmd->fileoff);
334 printf(" filesize = 0x%08X\n", segCmd->filesize);
335 if ( strcmp(segCmd->segname, "__TEXT") == 0 ) {
336 linkEditBias = mheader - segCmd->fileoff;
340 seg64Cmd = (const segment_command_64*)cmd;
341 printf("LC_SEGMENT_64\n");
342 printf(" segname = %s\n", seg64Cmd->segname);
343 printf(" vmaddr = 0x%09llX\n", seg64Cmd->vmaddr);
344 printf(" vmsize = 0x%09llX\n", seg64Cmd->vmsize);
345 printf(" fileoff = 0x%09llX\n", seg64Cmd->fileoff);
346 printf(" filesize = 0x%09llX\n", seg64Cmd->filesize);
347 if ( strcmp(seg64Cmd->segname, "__TEXT") == 0 ) {
348 linkEditBias = mheader - seg64Cmd->fileoff;
352 symTab = (const symtab_command*)cmd;
353 printf("LC_SYMTAB\n");
354 printf(" symoff = 0x%08X\n", symTab->symoff);
355 printf(" nsyms = 0x%08X\n", symTab->nsyms);
356 printf(" stroff = 0x%08X\n", symTab->stroff);
357 printf(" strsize = 0x%08X\n", symTab->strsize);
359 const char* strPool = (char*)&linkEditBias[symTab->stroff];
360 const nlist_64* sym0 = (nlist_64*)(&linkEditBias[symTab->symoff]);
361 printf(" sym[0].n_strx = 0x%08X (%s)\n", sym0->n_un.n_strx, &strPool[sym0->n_un.n_strx]);
362 printf(" sym[0].n_type = 0x%02X\n", sym0->n_type);
363 printf(" sym[0].n_sect = 0x%02X\n", sym0->n_sect);
364 printf(" sym[0].n_desc = 0x%04X\n", sym0->n_desc);
365 printf(" sym[0].n_value = 0x%llX\n", sym0->n_value);
366 const nlist_64* sym1 = (nlist_64*)(&linkEditBias[symTab->symoff+16]);
367 printf(" sym[1].n_strx = 0x%08X (%s)\n", sym1->n_un.n_strx, &strPool[sym1->n_un.n_strx]);
368 printf(" sym[1].n_type = 0x%02X\n", sym1->n_type);
369 printf(" sym[1].n_sect = 0x%02X\n", sym1->n_sect);
370 printf(" sym[1].n_desc = 0x%04X\n", sym1->n_desc);
371 printf(" sym[1].n_value = 0x%llX\n", sym1->n_value);
374 case LC_FUNCTION_STARTS:
375 leData = (const linkedit_data_command*)cmd;
376 printf("LC_FUNCTION_STARTS\n");
377 printf(" dataoff = 0x%08X\n", leData->dataoff);
378 printf(" datasize = 0x%08X\n", leData->datasize);
380 //printf("0x%08X\n", cmd->cmd);
383 cmd = (const load_command*)(((uint8_t*)cmd)+cmd->cmdsize);
388 template <typename P
>
389 void LinkeditOptimizer
<P
>::updateLoadCommands(uint32_t mergedLinkeditStartOffset
, uint64_t mergedLinkeditAddr
, uint64_t newLinkeditSize
,
390 uint32_t sharedSymbolTableStartOffset
, uint32_t sharedSymbolTableCount
,
391 uint32_t sharedSymbolStringsOffset
, uint32_t sharedSymbolStringsSize
)
393 // update __LINKEDIT segment in all dylibs to overlap the same shared region
394 for (macho_segment_command
<P
>* segCmd
: _segCmds
) {
395 if ( strcmp(segCmd
->segname(), "__LINKEDIT") == 0 ) {
396 segCmd
->set_vmaddr(mergedLinkeditAddr
);
397 segCmd
->set_vmsize(newLinkeditSize
);
398 segCmd
->set_fileoff(mergedLinkeditStartOffset
);
399 segCmd
->set_filesize(newLinkeditSize
);
401 else if ( strcmp(segCmd
->segname(), "__TEXT") == 0 ) {
402 // HACK until lldb fixed in: <rdar://problem/20357466> DynamicLoaderMacOSXDYLD fixes for Monarch dyld shared cache
403 //segCmd->set_fileoff(0);
408 // update symbol table to point to shared symbol table
409 _symTabCmd
->set_symoff(mergedLinkeditStartOffset
+ sharedSymbolTableStartOffset
+ _newLocalSymbolsStartIndex
*sizeof(macho_nlist
<P
>));
410 _symTabCmd
->set_nsyms(_newLocalSymbolCount
+_newExportedSymbolCount
+_newImportedSymbolCount
);
411 _symTabCmd
->set_stroff(mergedLinkeditStartOffset
+ sharedSymbolStringsOffset
);
412 _symTabCmd
->set_strsize(sharedSymbolStringsSize
);
414 // update dynamic symbol table to have proper offsets into shared symbol table
415 _dynSymTabCmd
->set_ilocalsym(0);
416 _dynSymTabCmd
->set_nlocalsym(_newLocalSymbolCount
);
417 _dynSymTabCmd
->set_iextdefsym(_newExportedSymbolsStartIndex
-_newLocalSymbolsStartIndex
);
418 _dynSymTabCmd
->set_nextdefsym(_newExportedSymbolCount
);
419 _dynSymTabCmd
->set_iundefsym(_newImportedSymbolsStartIndex
-_newLocalSymbolsStartIndex
);
420 _dynSymTabCmd
->set_nundefsym(_newImportedSymbolCount
);
421 _dynSymTabCmd
->set_tocoff(0);
422 _dynSymTabCmd
->set_ntoc(0);
423 _dynSymTabCmd
->set_modtaboff(0);
424 _dynSymTabCmd
->set_nmodtab(0);
425 _dynSymTabCmd
->set_indirectsymoff(mergedLinkeditStartOffset
+ _newIndirectSymbolTableOffset
);
426 _dynSymTabCmd
->set_extreloff(0);
427 _dynSymTabCmd
->set_locreloff(0);
428 _dynSymTabCmd
->set_nlocrel(0);
431 if ( _dyldInfo
!= nullptr ) {
432 _dyldInfo
->set_rebase_off(0);
433 _dyldInfo
->set_rebase_size(0);
434 _dyldInfo
->set_bind_off(_dyldInfo
->bind_size() ? mergedLinkeditStartOffset
+ _newBindingInfoOffset
: 0);
435 _dyldInfo
->set_weak_bind_off(_dyldInfo
->weak_bind_size() ? mergedLinkeditStartOffset
+ _newWeakBindingInfoOffset
: 0 );
436 _dyldInfo
->set_lazy_bind_off(_dyldInfo
->lazy_bind_size() ? mergedLinkeditStartOffset
+ _newLazyBindingInfoOffset
: 0 );
437 _dyldInfo
->set_export_off(mergedLinkeditStartOffset
+ _newExportInfoOffset
);
438 } else if ( _exportTrieCmd
!= nullptr ) {
439 _exportTrieCmd
->set_dataoff(mergedLinkeditStartOffset
+ _newExportInfoOffset
);
442 // update function-starts
443 if ( _functionStartsCmd
!= nullptr )
444 _functionStartsCmd
->set_dataoff(mergedLinkeditStartOffset
+_newFunctionStartsOffset
);
446 // update data-in-code
447 if ( _dataInCodeCmd
!= nullptr )
448 _dataInCodeCmd
->set_dataoff(mergedLinkeditStartOffset
+_newDataInCodeOffset
);
451 template <typename P
>
452 void LinkeditOptimizer
<P
>::copyWeakBindingInfo(uint8_t* newLinkEditContent
, uint32_t& offset
)
454 if ( _dyldInfo
== nullptr )
456 unsigned size
= _dyldInfo
->weak_bind_size();
458 ::memcpy(&newLinkEditContent
[offset
], &_linkeditBias
[_dyldInfo
->weak_bind_off()], size
);
459 _newWeakBindingInfoOffset
= offset
;
460 _newWeakBindingSize
= size
;
466 template <typename P
>
467 void LinkeditOptimizer
<P
>::copyLazyBindingInfo(uint8_t* newLinkEditContent
, uint32_t& offset
)
469 if ( _dyldInfo
== nullptr )
471 unsigned size
= _dyldInfo
->lazy_bind_size();
473 ::memcpy(&newLinkEditContent
[offset
], &_linkeditBias
[_dyldInfo
->lazy_bind_off()], size
);
474 _newLazyBindingInfoOffset
= offset
;
479 template <typename P
>
480 void LinkeditOptimizer
<P
>::copyBindingInfo(uint8_t* newLinkEditContent
, uint32_t& offset
)
482 if ( _dyldInfo
== nullptr )
484 unsigned size
= _dyldInfo
->bind_size();
486 ::memcpy(&newLinkEditContent
[offset
], &_linkeditBias
[_dyldInfo
->bind_off()], size
);
487 _newBindingInfoOffset
= offset
;
492 template <typename P
>
493 void LinkeditOptimizer
<P
>::copyExportInfo(uint8_t* newLinkEditContent
, uint32_t& offset
)
495 if ( (_dyldInfo
== nullptr) && (_exportTrieCmd
== nullptr) )
498 uint32_t exportOffset
= _exportTrieCmd
? _exportTrieCmd
->dataoff() : _dyldInfo
->export_off();
499 uint32_t exportSize
= _exportTrieCmd
? _exportTrieCmd
->datasize() : _dyldInfo
->export_size();
500 if ( exportSize
!= 0 ) {
501 ::memcpy(&newLinkEditContent
[offset
], &_linkeditBias
[exportOffset
], exportSize
);
502 _newExportInfoOffset
= offset
;
503 offset
+= exportSize
;
508 template <typename P
>
509 void LinkeditOptimizer
<P
>::copyFunctionStarts(uint8_t* newLinkEditContent
, uint32_t& offset
)
511 if ( _functionStartsCmd
== nullptr )
513 unsigned size
= _functionStartsCmd
->datasize();
514 ::memcpy(&newLinkEditContent
[offset
], &_linkeditBias
[_functionStartsCmd
->dataoff()], size
);
515 _newFunctionStartsOffset
= offset
;
519 template <typename P
>
520 void LinkeditOptimizer
<P
>::copyDataInCode(uint8_t* newLinkEditContent
, uint32_t& offset
)
522 if ( _dataInCodeCmd
== nullptr )
524 unsigned size
= _dataInCodeCmd
->datasize();
525 ::memcpy(&newLinkEditContent
[offset
], &_linkeditBias
[_dataInCodeCmd
->dataoff()], size
);
526 _newDataInCodeOffset
= offset
;
531 template <typename P
>
532 void LinkeditOptimizer
<P
>::copyLocalSymbols(uint8_t* newLinkEditContent
, SortedStringPool
<P
>& stringPool
, uint32_t& offset
, uint32_t& symbolIndex
,
533 bool redact
, std::vector
<LocalSymbolInfo
>& localSymbolInfos
,
534 std::vector
<macho_nlist
<P
>>& unmappedLocalSymbols
, SortedStringPool
<P
>& localSymbolsStringPool
)
536 LocalSymbolInfo localInfo
;
537 localInfo
.dylibOffset
= (uint32_t)(((uint8_t*)_mh
) - (uint8_t*)_cacheBuffer
);
538 localInfo
.nlistStartIndex
= (uint32_t)unmappedLocalSymbols
.size();
539 localInfo
.nlistCount
= 0;
540 _newLocalSymbolsStartIndex
= symbolIndex
;
541 const char* strings
= (char*)&_linkeditBias
[_symTabCmd
->stroff()];
542 const macho_nlist
<P
>* const symbolTable
= (macho_nlist
<P
>*)(&_linkeditBias
[_symTabCmd
->symoff()]);
543 const macho_nlist
<P
>* const firstExport
= &symbolTable
[_dynSymTabCmd
->ilocalsym()];
544 const macho_nlist
<P
>* const lastExport
= &symbolTable
[_dynSymTabCmd
->ilocalsym()+_dynSymTabCmd
->nlocalsym()];
545 for (const macho_nlist
<P
>* entry
= firstExport
; entry
< lastExport
; ++entry
) {
546 if ( (entry
->n_type() & N_TYPE
) != N_SECT
)
548 if ( (entry
->n_type() & N_STAB
) != 0)
550 const char* name
= &strings
[entry
->n_strx()];
551 macho_nlist
<P
>* newSymbolEntry
= (macho_nlist
<P
>*)&newLinkEditContent
[offset
];
552 *newSymbolEntry
= *entry
;
554 // if removing local symbols, change __text symbols to "<redacted>" so backtraces don't have bogus names
555 if ( entry
->n_sect() == 1 ) {
556 stringPool
.add(symbolIndex
, "<redacted>");
558 offset
+= sizeof(macho_nlist
<P
>);
560 // copy local symbol to unmmapped locals area
561 localSymbolsStringPool
.add((uint32_t)unmappedLocalSymbols
.size(), name
);
562 unmappedLocalSymbols
.push_back(*entry
);
563 unmappedLocalSymbols
.back().set_n_strx(0);
566 stringPool
.add(symbolIndex
, name
);
568 offset
+= sizeof(macho_nlist
<P
>);
571 _newLocalSymbolCount
= symbolIndex
- _newLocalSymbolsStartIndex
;
572 localInfo
.nlistCount
= (uint32_t)unmappedLocalSymbols
.size() - localInfo
.nlistStartIndex
;
573 localSymbolInfos
.push_back(localInfo
);
577 template <typename P
>
578 void LinkeditOptimizer
<P
>::copyExportedSymbols(uint8_t* newLinkEditContent
, SortedStringPool
<P
>& stringPool
, uint32_t& offset
, uint32_t& symbolIndex
)
580 _newExportedSymbolsStartIndex
= symbolIndex
;
581 const char* strings
= (char*)&_linkeditBias
[_symTabCmd
->stroff()];
582 const macho_nlist
<P
>* const symbolTable
= (macho_nlist
<P
>*)(&_linkeditBias
[_symTabCmd
->symoff()]);
583 const macho_nlist
<P
>* const firstExport
= &symbolTable
[_dynSymTabCmd
->iextdefsym()];
584 const macho_nlist
<P
>* const lastExport
= &symbolTable
[_dynSymTabCmd
->iextdefsym()+_dynSymTabCmd
->nextdefsym()];
585 uint32_t oldSymbolIndex
= _dynSymTabCmd
->iextdefsym();
586 for (const macho_nlist
<P
>* entry
= firstExport
; entry
< lastExport
; ++entry
, ++oldSymbolIndex
) {
587 if ( (entry
->n_type() & N_TYPE
) != N_SECT
)
589 const char* name
= &strings
[entry
->n_strx()];
590 if ( strncmp(name
, ".objc_", 6) == 0 )
592 if ( strncmp(name
, "$ld$", 4) == 0 )
594 macho_nlist
<P
>* newSymbolEntry
= (macho_nlist
<P
>*)&newLinkEditContent
[offset
];
595 *newSymbolEntry
= *entry
;
596 newSymbolEntry
->set_n_strx(0);
597 stringPool
.add(symbolIndex
, name
);
598 _oldToNewSymbolIndexes
[oldSymbolIndex
] = symbolIndex
- _newLocalSymbolsStartIndex
;
600 offset
+= sizeof(macho_nlist
<P
>);
602 _newExportedSymbolCount
= symbolIndex
- _newExportedSymbolsStartIndex
;
605 template <typename P
>
606 void LinkeditOptimizer
<P
>::copyImportedSymbols(uint8_t* newLinkEditContent
, SortedStringPool
<P
>& stringPool
, uint32_t& offset
, uint32_t& symbolIndex
)
608 _newImportedSymbolsStartIndex
= symbolIndex
;
609 const char* strings
= (char*)&_linkeditBias
[_symTabCmd
->stroff()];
610 const macho_nlist
<P
>* const symbolTable
= (macho_nlist
<P
>*)(&_linkeditBias
[_symTabCmd
->symoff()]);
611 const macho_nlist
<P
>* const firstImport
= &symbolTable
[_dynSymTabCmd
->iundefsym()];
612 const macho_nlist
<P
>* const lastImport
= &symbolTable
[_dynSymTabCmd
->iundefsym()+_dynSymTabCmd
->nundefsym()];
613 uint32_t oldSymbolIndex
= _dynSymTabCmd
->iundefsym();
614 for (const macho_nlist
<P
>* entry
= firstImport
; entry
< lastImport
; ++entry
, ++oldSymbolIndex
) {
615 if ( (entry
->n_type() & N_TYPE
) != N_UNDF
)
617 const char* name
= &strings
[entry
->n_strx()];
618 macho_nlist
<P
>* newSymbolEntry
= (macho_nlist
<P
>*)&newLinkEditContent
[offset
];
619 *newSymbolEntry
= *entry
;
620 newSymbolEntry
->set_n_strx(0);
621 stringPool
.add(symbolIndex
, name
);
622 _oldToNewSymbolIndexes
[oldSymbolIndex
] = symbolIndex
- _newLocalSymbolsStartIndex
;
624 offset
+= sizeof(macho_nlist
<P
>);
626 _newImportedSymbolCount
= symbolIndex
- _newImportedSymbolsStartIndex
;
629 template <typename P
>
630 void LinkeditOptimizer
<P
>::copyIndirectSymbolTable(uint8_t* newLinkEditContent
, uint32_t& offset
)
632 _newIndirectSymbolTableOffset
= offset
;
633 const uint32_t* const indirectTable
= (uint32_t*)&_linkeditBias
[_dynSymTabCmd
->indirectsymoff()];
634 uint32_t* newIndirectTable
= (uint32_t*)&newLinkEditContent
[offset
];
635 for (uint32_t i
=0; i
< _dynSymTabCmd
->nindirectsyms(); ++i
) {
636 uint32_t symbolIndex
= E::get32(indirectTable
[i
]);
637 if ( (symbolIndex
== INDIRECT_SYMBOL_ABS
) || (symbolIndex
== INDIRECT_SYMBOL_LOCAL
) )
638 E::set32(newIndirectTable
[i
], symbolIndex
);
640 E::set32(newIndirectTable
[i
], _oldToNewSymbolIndexes
[symbolIndex
]);
641 offset
+= sizeof(uint32_t);
645 template <typename P
>
646 void LinkeditOptimizer
<P
>::mergeLinkedits(CacheBuilder
& builder
, std::vector
<LinkeditOptimizer
<P
>*>& optimizers
)
648 // allocate space for new linkedit data
649 uint64_t totalUnoptLinkeditsSize
= builder
._readOnlyRegion
.sizeInUse
- builder
._nonLinkEditReadOnlySize
;
650 uint8_t* newLinkEdit
= (uint8_t*)calloc(totalUnoptLinkeditsSize
, 1);
651 SortedStringPool
<P
> stringPool
;
654 builder
._diagnostics
.verbose("Merged LINKEDIT:\n");
656 // copy weak binding info
657 uint32_t startWeakBindInfosOffset
= offset
;
658 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
659 // Skip chained fixups as the in-place linked list isn't valid any more
660 const dyld3::MachOFile
* mf
= (dyld3::MachOFile
*)op
->machHeader();
661 if (!mf
->hasChainedFixups())
662 op
->copyWeakBindingInfo(newLinkEdit
, offset
);
664 builder
._diagnostics
.verbose(" weak bindings size: %5uKB\n", (uint32_t)(offset
-startWeakBindInfosOffset
)/1024);
667 uint32_t startExportInfosOffset
= offset
;
668 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
669 op
->copyExportInfo(newLinkEdit
, offset
);
671 builder
._diagnostics
.verbose(" exports info size: %5uKB\n", (uint32_t)(offset
-startExportInfosOffset
)/1024);
673 // in theory, an optimized cache can drop the binding info
676 uint32_t startBindingsInfosOffset
= offset
;
677 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
678 // Skip chained fixups as the in-place linked list isn't valid any more
679 const dyld3::MachOFile
* mf
= (dyld3::MachOFile
*)op
->machHeader();
680 if (!mf
->hasChainedFixups())
681 op
->copyBindingInfo(newLinkEdit
, offset
);
683 builder
._diagnostics
.verbose(" bindings size: %5uKB\n", (uint32_t)(offset
-startBindingsInfosOffset
)/1024);
685 // copy lazy binding info
686 uint32_t startLazyBindingsInfosOffset
= offset
;
687 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
688 // Skip chained fixups as the in-place linked list isn't valid any more
689 const dyld3::MachOFile
* mf
= (dyld3::MachOFile
*)op
->machHeader();
690 if (!mf
->hasChainedFixups())
691 op
->copyLazyBindingInfo(newLinkEdit
, offset
);
693 builder
._diagnostics
.verbose(" lazy bindings size: %5uKB\n", (offset
-startLazyBindingsInfosOffset
)/1024);
696 // copy symbol table entries
697 std::vector
<macho_nlist
<P
>> unmappedLocalSymbols
;
698 if ( builder
._options
.excludeLocalSymbols
)
699 unmappedLocalSymbols
.reserve(0x01000000);
700 std::vector
<LocalSymbolInfo
> localSymbolInfos
;
701 localSymbolInfos
.reserve(optimizers
.size());
702 SortedStringPool
<P
> localSymbolsStringPool
;
703 uint32_t symbolIndex
= 0;
704 const uint32_t sharedSymbolTableStartOffset
= offset
;
705 uint32_t sharedSymbolTableExportsCount
= 0;
706 uint32_t sharedSymbolTableImportsCount
= 0;
707 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
708 op
->copyLocalSymbols(newLinkEdit
, stringPool
, offset
, symbolIndex
, builder
._options
.excludeLocalSymbols
,
709 localSymbolInfos
, unmappedLocalSymbols
, localSymbolsStringPool
);
710 uint32_t x
= symbolIndex
;
711 op
->copyExportedSymbols(newLinkEdit
, stringPool
, offset
, symbolIndex
);
712 sharedSymbolTableExportsCount
+= (symbolIndex
-x
);
713 uint32_t y
= symbolIndex
;
714 op
->copyImportedSymbols(newLinkEdit
, stringPool
, offset
, symbolIndex
);
715 sharedSymbolTableImportsCount
+= (symbolIndex
-y
);
717 uint32_t sharedSymbolTableCount
= symbolIndex
;
718 const uint32_t sharedSymbolTableEndOffset
= offset
;
720 // copy function starts
721 uint32_t startFunctionStartsOffset
= offset
;
722 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
723 op
->copyFunctionStarts(newLinkEdit
, offset
);
725 builder
._diagnostics
.verbose(" function starts size: %5uKB\n", (offset
-startFunctionStartsOffset
)/1024);
727 // copy data-in-code info
728 uint32_t startDataInCodeOffset
= offset
;
729 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
730 op
->copyDataInCode(newLinkEdit
, offset
);
732 builder
._diagnostics
.verbose(" data in code size: %5uKB\n", (offset
-startDataInCodeOffset
)/1024);
734 // copy indirect symbol tables
735 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
736 op
->copyIndirectSymbolTable(newLinkEdit
, offset
);
738 // if indirect table has odd number of entries, end will not be 8-byte aligned
739 if ( (offset
% sizeof(typename
P::uint_t
)) != 0 )
743 uint32_t sharedSymbolStringsOffset
= offset
;
744 uint32_t sharedSymbolStringsSize
= stringPool
.copyPoolAndUpdateOffsets((char*)&newLinkEdit
[sharedSymbolStringsOffset
], (macho_nlist
<P
>*)&newLinkEdit
[sharedSymbolTableStartOffset
]);
745 offset
+= sharedSymbolStringsSize
;
746 uint32_t newLinkeditUnalignedSize
= offset
;
747 uint64_t newLinkeditAlignedSize
= align(offset
, 14);
748 builder
._diagnostics
.verbose(" symbol table size: %5uKB (%d exports, %d imports)\n", (sharedSymbolTableEndOffset
-sharedSymbolTableStartOffset
)/1024, sharedSymbolTableExportsCount
, sharedSymbolTableImportsCount
);
749 builder
._diagnostics
.verbose(" symbol string pool size: %5uKB\n", sharedSymbolStringsSize
/1024);
750 builder
._sharedStringsPoolVmOffset
= (uint32_t)((builder
._readOnlyRegion
.unslidLoadAddress
- builder
._readExecuteRegion
.unslidLoadAddress
) + builder
._nonLinkEditReadOnlySize
+ sharedSymbolStringsOffset
);
752 // overwrite mapped LINKEDIT area in cache with new merged LINKEDIT content
753 builder
._diagnostics
.verbose("LINKEDITS optimized from %uMB to %uMB\n", (uint32_t)totalUnoptLinkeditsSize
/(1024*1024), (uint32_t)newLinkeditUnalignedSize
/(1024*1024));
754 ::memcpy(builder
._readOnlyRegion
.buffer
+builder
._nonLinkEditReadOnlySize
, newLinkEdit
, newLinkeditAlignedSize
);
756 builder
._readOnlyRegion
.sizeInUse
= builder
._nonLinkEditReadOnlySize
+ newLinkeditAlignedSize
;
758 // overwrite end of un-opt linkedits to create a new unmapped region for local symbols
759 if ( builder
._options
.excludeLocalSymbols
) {
760 const uint32_t entriesOffset
= sizeof(dyld_cache_local_symbols_info
);
761 const uint32_t entriesCount
= (uint32_t)localSymbolInfos
.size();
762 const uint32_t nlistOffset
= (uint32_t)align(entriesOffset
+ entriesCount
* sizeof(dyld_cache_local_symbols_info
), 4); // 16-byte align start
763 const uint32_t nlistCount
= (uint32_t)unmappedLocalSymbols
.size();
764 const uint32_t stringsSize
= (uint32_t)localSymbolsStringPool
.size();
765 const uint32_t stringsOffset
= nlistOffset
+ nlistCount
* sizeof(macho_nlist
<P
>);
766 // allocate buffer for local symbols
767 const size_t localsBufferSize
= align(stringsOffset
+ stringsSize
, 14);
768 vm_address_t localsBuffer
;
769 if ( ::vm_allocate(mach_task_self(), &localsBuffer
, localsBufferSize
, VM_FLAGS_ANYWHERE
) == 0 ) {
770 dyld_cache_local_symbols_info
* infoHeader
= (dyld_cache_local_symbols_info
*)localsBuffer
;
771 // fill in header info
772 infoHeader
->nlistOffset
= nlistOffset
;
773 infoHeader
->nlistCount
= nlistCount
;
774 infoHeader
->stringsOffset
= stringsOffset
;
775 infoHeader
->stringsSize
= stringsSize
;
776 infoHeader
->entriesOffset
= entriesOffset
;
777 infoHeader
->entriesCount
= entriesCount
;
778 // copy info for each dylib
779 dyld_cache_local_symbols_entry
* entries
= (dyld_cache_local_symbols_entry
*)(((uint8_t*)infoHeader
)+entriesOffset
);
780 for (uint32_t i
=0; i
< entriesCount
; ++i
) {
781 entries
[i
].dylibOffset
= localSymbolInfos
[i
].dylibOffset
;
782 entries
[i
].nlistStartIndex
= localSymbolInfos
[i
].nlistStartIndex
;
783 entries
[i
].nlistCount
= localSymbolInfos
[i
].nlistCount
;
786 macho_nlist
<P
>* newLocalsSymbolTable
= (macho_nlist
<P
>*)(localsBuffer
+nlistOffset
);
787 ::memcpy(newLocalsSymbolTable
, &unmappedLocalSymbols
[0], nlistCount
*sizeof(macho_nlist
<P
>));
789 localSymbolsStringPool
.copyPoolAndUpdateOffsets(((char*)infoHeader
)+stringsOffset
, newLocalsSymbolTable
);
790 // update cache header
791 DyldSharedCache
* cacheHeader
= (DyldSharedCache
*)builder
._readExecuteRegion
.buffer
;
792 cacheHeader
->header
.localSymbolsSize
= localsBufferSize
;
793 // return buffer of local symbols, caller to free() it
794 builder
._localSymbolsRegion
.buffer
= (uint8_t*)localsBuffer
;
795 builder
._localSymbolsRegion
.bufferSize
= localsBufferSize
;
796 builder
._localSymbolsRegion
.sizeInUse
= localsBufferSize
;
799 builder
._diagnostics
.warning("could not allocate local symbols");
803 // update all load commands to new merged layout
804 uint64_t linkeditsUnslidStartAddr
= builder
._readOnlyRegion
.unslidLoadAddress
+ builder
._nonLinkEditReadOnlySize
;
805 uint32_t linkeditsCacheFileOffset
= (uint32_t)(builder
._readOnlyRegion
.cacheFileOffset
+ builder
._nonLinkEditReadOnlySize
);
806 for (LinkeditOptimizer
<P
>* op
: optimizers
) {
807 op
->updateLoadCommands(linkeditsCacheFileOffset
, linkeditsUnslidStartAddr
, newLinkeditUnalignedSize
,
808 sharedSymbolTableStartOffset
, sharedSymbolTableCount
,
809 sharedSymbolStringsOffset
, sharedSymbolStringsSize
);
814 template <typename P
>
815 void LinkeditOptimizer
<P
>::optimizeLinkedit(CacheBuilder
& builder
)
817 DyldSharedCache
* cache
= (DyldSharedCache
*)builder
._readExecuteRegion
.buffer
;
818 // construct a LinkeditOptimizer for each image
819 __block
std::vector
<LinkeditOptimizer
<P
>*> optimizers
;
820 cache
->forEachImage(^(const mach_header
* mh
, const char*) {
821 optimizers
.push_back(new LinkeditOptimizer
<P
>(cache
, (macho_header
<P
>*)mh
, builder
._diagnostics
));
824 // add optimizer for each branch pool
825 for (uint64_t poolOffset
: branchPoolOffsets
) {
826 macho_header
<P
>* mh
= (macho_header
<P
>*)((char*)cache
+ poolOffset
);
827 optimizers
.push_back(new LinkeditOptimizer
<P
>(cache
, mh
, diag
));
830 // merge linkedit info
831 mergeLinkedits(builder
, optimizers
);
834 for (LinkeditOptimizer
<P
>* op
: optimizers
)
838 void CacheBuilder::optimizeLinkedit()
841 return LinkeditOptimizer
<Pointer64
<LittleEndian
>>::optimizeLinkedit(*this);
844 return LinkeditOptimizer
<Pointer32
<LittleEndian
>>::optimizeLinkedit(*this);