]> git.saurik.com Git - apple/dyld.git/blob - dyld3/shared-cache/OptimizerLinkedit.cpp
dyld-750.5.tar.gz
[apple/dyld.git] / dyld3 / shared-cache / OptimizerLinkedit.cpp
1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*-
2 *
3 * Copyright (c) 2014 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25
26 #include <dirent.h>
27 #include <sys/errno.h>
28 #include <sys/fcntl.h>
29 #include <mach-o/loader.h>
30 #include <mach-o/fat.h>
31 #include <assert.h>
32
33 #include <fstream>
34 #include <string>
35 #include <algorithm>
36 #include <unordered_map>
37 #include <unordered_set>
38
39 #include "MachOFileAbstraction.hpp"
40 #include "Trie.hpp"
41 #include "DyldSharedCache.h"
42 #include "CacheBuilder.h"
43 #include "MachOLoaded.h"
44
45 #define ALIGN_AS_TYPE(value, type) \
46 ((value + alignof(type) - 1) & (-alignof(type)))
47
48 namespace {
49
50 template <typename P>
51 class SortedStringPool
52 {
53 public:
54 // add a string and symbol table entry index to be updated later
55 void add(uint32_t symbolIndex, const char* symbolName) {
56 _map[symbolName].push_back(symbolIndex);
57 }
58
59 // copy sorted strings to buffer and update all symbol's string offsets
60 uint32_t copyPoolAndUpdateOffsets(char* dstStringPool, macho_nlist<P>* symbolTable) {
61 // walk sorted list of strings
62 dstStringPool[0] = '\0'; // tradition for start of pool to be empty string
63 uint32_t poolOffset = 1;
64 for (auto& entry : _map) {
65 const std::string& symName = entry.first;
66 // append string to pool
67 strcpy(&dstStringPool[poolOffset], symName.c_str());
68 // set each string offset of each symbol using it
69 for (uint32_t symbolIndex : entry.second) {
70 symbolTable[symbolIndex].set_n_strx(poolOffset);
71 }
72 poolOffset += symName.size() + 1;
73 }
74 // return size of pool
75 return poolOffset;
76 }
77
78 size_t size() {
79 size_t size = 1;
80 for (auto& entry : _map) {
81 size += (entry.first.size() + 1);
82 }
83 return size;
84 }
85
86
87 private:
88 std::map<std::string, std::vector<uint32_t>> _map;
89 };
90
91
92 } // anonymous namespace
93
94
95 struct LocalSymbolInfo
96 {
97 uint32_t dylibOffset;
98 uint32_t nlistStartIndex;
99 uint32_t nlistCount;
100 };
101
102
103 template <typename P>
104 class LinkeditOptimizer {
105 public:
106 LinkeditOptimizer(void* cacheBuffer, macho_header<P>* mh, Diagnostics& diag);
107
108 uint32_t linkeditSize() { return _linkeditSize; }
109 uint64_t linkeditAddr() { return _linkeditAddr; }
110 const char* installName() { return _installName; }
111 void copyWeakBindingInfo(uint8_t* newLinkEditContent, uint32_t& offset);
112 void copyLazyBindingInfo(uint8_t* newLinkEditContent, uint32_t& offset);
113 void copyBindingInfo(uint8_t* newLinkEditContent, uint32_t& offset);
114 void copyExportInfo(uint8_t* newLinkEditContent, uint32_t& offset);
115 void copyExportedSymbols(uint8_t* newLinkEditContent, SortedStringPool<P>& stringPool, uint32_t& offset, uint32_t& symbolIndex);
116 void copyImportedSymbols(uint8_t* newLinkEditContent, SortedStringPool<P>& stringPool, uint32_t& offset, uint32_t& symbolIndex);
117 void copyLocalSymbols(uint8_t* newLinkEditContent, SortedStringPool<P>& stringPool, uint32_t& offset, uint32_t& symbolIndex,
118 bool redact, std::vector<LocalSymbolInfo>& localSymbolInfos,
119 std::vector<macho_nlist<P>>& unmappedLocalSymbols, SortedStringPool<P>& localSymbolsStringPool);
120 void copyFunctionStarts(uint8_t* newLinkEditContent, uint32_t& offset);
121 void copyDataInCode(uint8_t* newLinkEditContent, uint32_t& offset);
122 void copyIndirectSymbolTable(uint8_t* newLinkEditContent, uint32_t& offset);
123 void updateLoadCommands(uint32_t linkeditStartOffset, uint64_t mergedLinkeditAddr, uint64_t newLinkeditSize,
124 uint32_t sharedSymbolTableStartOffset, uint32_t sharedSymbolTableCount,
125 uint32_t sharedSymbolStringsOffset, uint32_t sharedSymbolStringsSize);
126
127 macho_header<P>* machHeader() { return _mh; }
128 const std::vector<const char*> getDownwardDependents() { return _downDependentPaths; }
129 const std::vector<const char*> getAllDependents() { return _allDependentPaths; }
130 const std::vector<const char*> getReExportPaths() { return _reExportPaths; }
131 const std::vector<uint64_t> initializerAddresses() { return _initializerAddresses; }
132 const std::vector<macho_section<P>*> dofSections() { return _dofSections; }
133 uint32_t exportsTrieLinkEditOffset() { return _newExportInfoOffset; }
134 uint32_t exportsTrieLinkEditSize() { return _exportInfoSize; }
135 uint32_t weakBindingLinkEditOffset() { return _newWeakBindingInfoOffset; }
136 uint32_t weakBindingLinkEditSize() { return _newWeakBindingSize; }
137 uint64_t dyldSectionAddress() { return _dyldSectionAddr; }
138 const std::vector<macho_segment_command<P>*>& segCmds() { return _segCmds; }
139
140
141 static void optimizeLinkedit(CacheBuilder& builder);
142 static void mergeLinkedits(CacheBuilder& builder, std::vector<LinkeditOptimizer<P>*>& optimizers);
143
144 private:
145
146 typedef typename P::uint_t pint_t;
147 typedef typename P::E E;
148
149 macho_header<P>* _mh;
150 void* _cacheBuffer;
151 Diagnostics& _diagnostics;
152 uint32_t _linkeditSize = 0;
153 uint64_t _linkeditAddr = 0;
154 const uint8_t* _linkeditBias = nullptr;
155 const char* _installName = nullptr;
156 macho_symtab_command<P>* _symTabCmd = nullptr;
157 macho_dysymtab_command<P>* _dynSymTabCmd = nullptr;
158 macho_dyld_info_command<P>* _dyldInfo = nullptr;
159 macho_linkedit_data_command<P>* _exportTrieCmd = nullptr;
160 macho_linkedit_data_command<P>* _functionStartsCmd = nullptr;
161 macho_linkedit_data_command<P>* _dataInCodeCmd = nullptr;
162 std::vector<macho_segment_command<P>*> _segCmds;
163 std::unordered_map<uint32_t,uint32_t> _oldToNewSymbolIndexes;
164 std::vector<const char*> _reExportPaths;
165 std::vector<const char*> _downDependentPaths;
166 std::vector<const char*> _allDependentPaths;
167 std::vector<uint64_t> _initializerAddresses;
168 std::vector<macho_section<P>*> _dofSections;
169 uint32_t _newWeakBindingInfoOffset = 0;
170 uint32_t _newLazyBindingInfoOffset = 0;
171 uint32_t _newBindingInfoOffset = 0;
172 uint32_t _newExportInfoOffset = 0;
173 uint32_t _exportInfoSize = 0;
174 uint32_t _newWeakBindingSize = 0;
175 uint32_t _newExportedSymbolsStartIndex = 0;
176 uint32_t _newExportedSymbolCount = 0;
177 uint32_t _newImportedSymbolsStartIndex = 0;
178 uint32_t _newImportedSymbolCount = 0;
179 uint32_t _newLocalSymbolsStartIndex = 0;
180 uint32_t _newLocalSymbolCount = 0;
181 uint32_t _newFunctionStartsOffset = 0;
182 uint32_t _newDataInCodeOffset = 0;
183 uint32_t _newIndirectSymbolTableOffset = 0;
184 uint64_t _dyldSectionAddr = 0;
185 };
186
187
188
189 template <typename P>
190 LinkeditOptimizer<P>::LinkeditOptimizer(void* cacheBuffer, macho_header<P>* mh, Diagnostics& diag)
191 : _mh(mh), _cacheBuffer(cacheBuffer), _diagnostics(diag)
192 {
193 const unsigned origLoadCommandsSize = mh->sizeofcmds();
194 unsigned bytesRemaining = origLoadCommandsSize;
195 unsigned removedCount = 0;
196 uint64_t textSegAddr = 0;
197 int64_t slide = 0;
198 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
199 const uint32_t cmdCount = mh->ncmds();
200 const macho_load_command<P>* cmd = cmds;
201 const macho_dylib_command<P>* dylibCmd;
202 const macho_routines_command<P>* routinesCmd;
203 macho_segment_command<P>* segCmd;
204 for (uint32_t i = 0; i < cmdCount; ++i) {
205 bool remove = false;
206 switch (cmd->cmd()) {
207 case LC_ID_DYLIB:
208 _installName = ((macho_dylib_command<P>*)cmd)->name();
209 break;
210 case LC_SYMTAB:
211 _symTabCmd = (macho_symtab_command<P>*)cmd;
212 break;
213 case LC_DYSYMTAB:
214 _dynSymTabCmd = (macho_dysymtab_command<P>*)cmd;
215 break;
216 case LC_DYLD_INFO:
217 case LC_DYLD_INFO_ONLY:
218 _dyldInfo = (macho_dyld_info_command<P>*)cmd;
219 _exportInfoSize = _dyldInfo->export_size();
220 break;
221 case LC_FUNCTION_STARTS:
222 _functionStartsCmd = (macho_linkedit_data_command<P>*)cmd;
223 break;
224 case LC_DATA_IN_CODE:
225 _dataInCodeCmd = (macho_linkedit_data_command<P>*)cmd;
226 break;
227 case LC_DYLD_EXPORTS_TRIE:
228 _exportTrieCmd = (macho_linkedit_data_command<P>*)cmd;
229 _exportInfoSize = _exportTrieCmd->datasize();
230 break;
231 case LC_ROUTINES:
232 case LC_ROUTINES_64:
233 routinesCmd = (macho_routines_command<P>*)cmd;
234 _initializerAddresses.push_back(routinesCmd->init_address());
235 break;
236 case LC_REEXPORT_DYLIB:
237 case LC_LOAD_DYLIB:
238 case LC_LOAD_WEAK_DYLIB:
239 case LC_LOAD_UPWARD_DYLIB:
240 dylibCmd = (macho_dylib_command<P>*)cmd;
241 _allDependentPaths.push_back(dylibCmd->name());
242 if ( cmd->cmd() != LC_LOAD_UPWARD_DYLIB )
243 _downDependentPaths.push_back(dylibCmd->name());
244 if ( cmd->cmd() == LC_REEXPORT_DYLIB )
245 _reExportPaths.push_back(dylibCmd->name());
246 break;
247 case macho_segment_command<P>::CMD:
248 segCmd = (macho_segment_command<P>*)cmd;
249 _segCmds.push_back(segCmd);
250 if ( strcmp(segCmd->segname(), "__TEXT") == 0 ) {
251 textSegAddr = segCmd->vmaddr();
252 slide = (uint64_t)mh - textSegAddr;
253 }
254 else if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
255 _linkeditAddr = segCmd->vmaddr();
256 _linkeditBias = (uint8_t*)mh + (_linkeditAddr - textSegAddr) - segCmd->fileoff();
257 _linkeditSize = (uint32_t)segCmd->vmsize();
258 }
259 else if ( segCmd->nsects() > 0 ) {
260 macho_section<P>* const sectionsStart = (macho_section<P>*)((uint8_t*)segCmd + sizeof(macho_segment_command<P>));
261 macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
262 for (macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
263 const uint8_t type = sect->flags() & SECTION_TYPE;
264 if ( type == S_MOD_INIT_FUNC_POINTERS ) {
265 const pint_t* inits = (pint_t*)(sect->addr()+slide);
266 const size_t count = sect->size() / sizeof(pint_t);
267 for (size_t j=0; j < count; ++j) {
268 uint64_t func = P::getP(inits[j]);
269 _initializerAddresses.push_back(func);
270 }
271 }
272 else if ( type == S_INIT_FUNC_OFFSETS ) {
273 const uint32_t* inits = (uint32_t*)(sect->addr()+slide);
274 const size_t count = sect->size() / sizeof(uint32_t);
275 for (size_t j=0; j < count; ++j) {
276 uint32_t funcOffset = E::get32(inits[j]);
277 _initializerAddresses.push_back(textSegAddr + funcOffset);
278 }
279 }
280 else if ( type == S_DTRACE_DOF ) {
281 _dofSections.push_back(sect);
282 }
283 else if ( (strcmp(sect->sectname(), "__dyld") == 0) && (strncmp(sect->segname(), "__DATA", 6) == 0) ) {
284 _dyldSectionAddr = sect->addr();
285 }
286 }
287 }
288 break;
289 case LC_DYLD_CHAINED_FIXUPS:
290 case LC_SEGMENT_SPLIT_INFO:
291 remove = true;
292 break;
293 }
294 uint32_t cmdSize = cmd->cmdsize();
295 macho_load_command<P>* nextCmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmdSize);
296 if ( remove ) {
297 ::memmove((void*)cmd, (void*)nextCmd, bytesRemaining);
298 ++removedCount;
299 }
300 else {
301 bytesRemaining -= cmdSize;
302 cmd = nextCmd;
303 }
304 }
305 // zero out stuff removed
306 ::bzero((void*)cmd, bytesRemaining);
307 // update header
308 mh->set_ncmds(cmdCount - removedCount);
309 mh->set_sizeofcmds(origLoadCommandsSize - bytesRemaining);
310 }
311
312 /*
313 static void dumpLoadCommands(const uint8_t* mheader)
314 {
315 const mach_header* const mh = (mach_header*)mheader;
316 const uint32_t cmd_count = mh->ncmds;
317 bool is64 = (mh->magic == MH_MAGIC_64);
318 const load_command* cmds = (load_command*)(mheader + (is64 ? sizeof(mach_header_64) : sizeof(mach_header)));
319 const load_command* cmd = cmds;
320 const segment_command* segCmd;
321 const segment_command_64* seg64Cmd;
322 const symtab_command* symTab;
323 const linkedit_data_command* leData;
324 const uint8_t* linkEditBias = NULL;
325 for (uint32_t i = 0; i < cmd_count; ++i) {
326 switch (cmd->cmd) {
327 case LC_SEGMENT:
328 segCmd = (const segment_command*)cmd;
329 printf("LC_SEGMENT\n");
330 printf(" segname = %s\n", segCmd->segname);
331 printf(" vmaddr = 0x%08X\n", segCmd->vmaddr);
332 printf(" vmsize = 0x%08X\n", segCmd->vmsize);
333 printf(" fileoff = 0x%08X\n", segCmd->fileoff);
334 printf(" filesize = 0x%08X\n", segCmd->filesize);
335 if ( strcmp(segCmd->segname, "__TEXT") == 0 ) {
336 linkEditBias = mheader - segCmd->fileoff;
337 }
338 break;
339 case LC_SEGMENT_64:
340 seg64Cmd = (const segment_command_64*)cmd;
341 printf("LC_SEGMENT_64\n");
342 printf(" segname = %s\n", seg64Cmd->segname);
343 printf(" vmaddr = 0x%09llX\n", seg64Cmd->vmaddr);
344 printf(" vmsize = 0x%09llX\n", seg64Cmd->vmsize);
345 printf(" fileoff = 0x%09llX\n", seg64Cmd->fileoff);
346 printf(" filesize = 0x%09llX\n", seg64Cmd->filesize);
347 if ( strcmp(seg64Cmd->segname, "__TEXT") == 0 ) {
348 linkEditBias = mheader - seg64Cmd->fileoff;
349 }
350 break;
351 case LC_SYMTAB:
352 symTab = (const symtab_command*)cmd;
353 printf("LC_SYMTAB\n");
354 printf(" symoff = 0x%08X\n", symTab->symoff);
355 printf(" nsyms = 0x%08X\n", symTab->nsyms);
356 printf(" stroff = 0x%08X\n", symTab->stroff);
357 printf(" strsize = 0x%08X\n", symTab->strsize);
358 {
359 const char* strPool = (char*)&linkEditBias[symTab->stroff];
360 const nlist_64* sym0 = (nlist_64*)(&linkEditBias[symTab->symoff]);
361 printf(" sym[0].n_strx = 0x%08X (%s)\n", sym0->n_un.n_strx, &strPool[sym0->n_un.n_strx]);
362 printf(" sym[0].n_type = 0x%02X\n", sym0->n_type);
363 printf(" sym[0].n_sect = 0x%02X\n", sym0->n_sect);
364 printf(" sym[0].n_desc = 0x%04X\n", sym0->n_desc);
365 printf(" sym[0].n_value = 0x%llX\n", sym0->n_value);
366 const nlist_64* sym1 = (nlist_64*)(&linkEditBias[symTab->symoff+16]);
367 printf(" sym[1].n_strx = 0x%08X (%s)\n", sym1->n_un.n_strx, &strPool[sym1->n_un.n_strx]);
368 printf(" sym[1].n_type = 0x%02X\n", sym1->n_type);
369 printf(" sym[1].n_sect = 0x%02X\n", sym1->n_sect);
370 printf(" sym[1].n_desc = 0x%04X\n", sym1->n_desc);
371 printf(" sym[1].n_value = 0x%llX\n", sym1->n_value);
372 }
373 break;
374 case LC_FUNCTION_STARTS:
375 leData = (const linkedit_data_command*)cmd;
376 printf("LC_FUNCTION_STARTS\n");
377 printf(" dataoff = 0x%08X\n", leData->dataoff);
378 printf(" datasize = 0x%08X\n", leData->datasize);
379 default:
380 //printf("0x%08X\n", cmd->cmd);
381 break;
382 }
383 cmd = (const load_command*)(((uint8_t*)cmd)+cmd->cmdsize);
384 }
385 }
386 */
387
388 template <typename P>
389 void LinkeditOptimizer<P>::updateLoadCommands(uint32_t mergedLinkeditStartOffset, uint64_t mergedLinkeditAddr, uint64_t newLinkeditSize,
390 uint32_t sharedSymbolTableStartOffset, uint32_t sharedSymbolTableCount,
391 uint32_t sharedSymbolStringsOffset, uint32_t sharedSymbolStringsSize)
392 {
393 // update __LINKEDIT segment in all dylibs to overlap the same shared region
394 for (macho_segment_command<P>* segCmd : _segCmds) {
395 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
396 segCmd->set_vmaddr(mergedLinkeditAddr);
397 segCmd->set_vmsize(newLinkeditSize);
398 segCmd->set_fileoff(mergedLinkeditStartOffset);
399 segCmd->set_filesize(newLinkeditSize);
400 }
401 else if ( strcmp(segCmd->segname(), "__TEXT") == 0 ) {
402 // HACK until lldb fixed in: <rdar://problem/20357466> DynamicLoaderMacOSXDYLD fixes for Monarch dyld shared cache
403 //segCmd->set_fileoff(0);
404
405 }
406 }
407
408 // update symbol table to point to shared symbol table
409 _symTabCmd->set_symoff(mergedLinkeditStartOffset + sharedSymbolTableStartOffset + _newLocalSymbolsStartIndex*sizeof(macho_nlist<P>));
410 _symTabCmd->set_nsyms(_newLocalSymbolCount+_newExportedSymbolCount+_newImportedSymbolCount);
411 _symTabCmd->set_stroff(mergedLinkeditStartOffset + sharedSymbolStringsOffset);
412 _symTabCmd->set_strsize(sharedSymbolStringsSize);
413
414 // update dynamic symbol table to have proper offsets into shared symbol table
415 _dynSymTabCmd->set_ilocalsym(0);
416 _dynSymTabCmd->set_nlocalsym(_newLocalSymbolCount);
417 _dynSymTabCmd->set_iextdefsym(_newExportedSymbolsStartIndex-_newLocalSymbolsStartIndex);
418 _dynSymTabCmd->set_nextdefsym(_newExportedSymbolCount);
419 _dynSymTabCmd->set_iundefsym(_newImportedSymbolsStartIndex-_newLocalSymbolsStartIndex);
420 _dynSymTabCmd->set_nundefsym(_newImportedSymbolCount);
421 _dynSymTabCmd->set_tocoff(0);
422 _dynSymTabCmd->set_ntoc(0);
423 _dynSymTabCmd->set_modtaboff(0);
424 _dynSymTabCmd->set_nmodtab(0);
425 _dynSymTabCmd->set_indirectsymoff(mergedLinkeditStartOffset + _newIndirectSymbolTableOffset);
426 _dynSymTabCmd->set_extreloff(0);
427 _dynSymTabCmd->set_locreloff(0);
428 _dynSymTabCmd->set_nlocrel(0);
429
430 // update dyld info
431 if ( _dyldInfo != nullptr ) {
432 _dyldInfo->set_rebase_off(0);
433 _dyldInfo->set_rebase_size(0);
434 _dyldInfo->set_bind_off(_dyldInfo->bind_size() ? mergedLinkeditStartOffset + _newBindingInfoOffset : 0);
435 _dyldInfo->set_weak_bind_off(_dyldInfo->weak_bind_size() ? mergedLinkeditStartOffset + _newWeakBindingInfoOffset : 0 );
436 _dyldInfo->set_lazy_bind_off(_dyldInfo->lazy_bind_size() ? mergedLinkeditStartOffset + _newLazyBindingInfoOffset : 0 );
437 _dyldInfo->set_export_off(mergedLinkeditStartOffset + _newExportInfoOffset);
438 } else if ( _exportTrieCmd != nullptr ) {
439 _exportTrieCmd->set_dataoff(mergedLinkeditStartOffset + _newExportInfoOffset);
440 }
441
442 // update function-starts
443 if ( _functionStartsCmd != nullptr )
444 _functionStartsCmd->set_dataoff(mergedLinkeditStartOffset+_newFunctionStartsOffset);
445
446 // update data-in-code
447 if ( _dataInCodeCmd != nullptr )
448 _dataInCodeCmd->set_dataoff(mergedLinkeditStartOffset+_newDataInCodeOffset);
449 }
450
451 template <typename P>
452 void LinkeditOptimizer<P>::copyWeakBindingInfo(uint8_t* newLinkEditContent, uint32_t& offset)
453 {
454 if ( _dyldInfo == nullptr )
455 return;
456 unsigned size = _dyldInfo->weak_bind_size();
457 if ( size != 0 ) {
458 ::memcpy(&newLinkEditContent[offset], &_linkeditBias[_dyldInfo->weak_bind_off()], size);
459 _newWeakBindingInfoOffset = offset;
460 _newWeakBindingSize = size;
461 offset += size;
462 }
463 }
464
465
466 template <typename P>
467 void LinkeditOptimizer<P>::copyLazyBindingInfo(uint8_t* newLinkEditContent, uint32_t& offset)
468 {
469 if ( _dyldInfo == nullptr )
470 return;
471 unsigned size = _dyldInfo->lazy_bind_size();
472 if ( size != 0 ) {
473 ::memcpy(&newLinkEditContent[offset], &_linkeditBias[_dyldInfo->lazy_bind_off()], size);
474 _newLazyBindingInfoOffset = offset;
475 offset += size;
476 }
477 }
478
479 template <typename P>
480 void LinkeditOptimizer<P>::copyBindingInfo(uint8_t* newLinkEditContent, uint32_t& offset)
481 {
482 if ( _dyldInfo == nullptr )
483 return;
484 unsigned size = _dyldInfo->bind_size();
485 if ( size != 0 ) {
486 ::memcpy(&newLinkEditContent[offset], &_linkeditBias[_dyldInfo->bind_off()], size);
487 _newBindingInfoOffset = offset;
488 offset += size;
489 }
490 }
491
492 template <typename P>
493 void LinkeditOptimizer<P>::copyExportInfo(uint8_t* newLinkEditContent, uint32_t& offset)
494 {
495 if ( (_dyldInfo == nullptr) && (_exportTrieCmd == nullptr) )
496 return;
497
498 uint32_t exportOffset = _exportTrieCmd ? _exportTrieCmd->dataoff() : _dyldInfo->export_off();
499 uint32_t exportSize = _exportTrieCmd ? _exportTrieCmd->datasize() : _dyldInfo->export_size();
500 if ( exportSize != 0 ) {
501 ::memcpy(&newLinkEditContent[offset], &_linkeditBias[exportOffset], exportSize);
502 _newExportInfoOffset = offset;
503 offset += exportSize;
504 }
505 }
506
507
508 template <typename P>
509 void LinkeditOptimizer<P>::copyFunctionStarts(uint8_t* newLinkEditContent, uint32_t& offset)
510 {
511 if ( _functionStartsCmd == nullptr )
512 return;
513 unsigned size = _functionStartsCmd->datasize();
514 ::memcpy(&newLinkEditContent[offset], &_linkeditBias[_functionStartsCmd->dataoff()], size);
515 _newFunctionStartsOffset = offset;
516 offset += size;
517 }
518
519 template <typename P>
520 void LinkeditOptimizer<P>::copyDataInCode(uint8_t* newLinkEditContent, uint32_t& offset)
521 {
522 if ( _dataInCodeCmd == nullptr )
523 return;
524 unsigned size = _dataInCodeCmd->datasize();
525 ::memcpy(&newLinkEditContent[offset], &_linkeditBias[_dataInCodeCmd->dataoff()], size);
526 _newDataInCodeOffset = offset;
527 offset += size;
528 }
529
530
531 template <typename P>
532 void LinkeditOptimizer<P>::copyLocalSymbols(uint8_t* newLinkEditContent, SortedStringPool<P>& stringPool, uint32_t& offset, uint32_t& symbolIndex,
533 bool redact, std::vector<LocalSymbolInfo>& localSymbolInfos,
534 std::vector<macho_nlist<P>>& unmappedLocalSymbols, SortedStringPool<P>& localSymbolsStringPool)
535 {
536 LocalSymbolInfo localInfo;
537 localInfo.dylibOffset = (uint32_t)(((uint8_t*)_mh) - (uint8_t*)_cacheBuffer);
538 localInfo.nlistStartIndex = (uint32_t)unmappedLocalSymbols.size();
539 localInfo.nlistCount = 0;
540 _newLocalSymbolsStartIndex = symbolIndex;
541 const char* strings = (char*)&_linkeditBias[_symTabCmd->stroff()];
542 const macho_nlist<P>* const symbolTable = (macho_nlist<P>*)(&_linkeditBias[_symTabCmd->symoff()]);
543 const macho_nlist<P>* const firstExport = &symbolTable[_dynSymTabCmd->ilocalsym()];
544 const macho_nlist<P>* const lastExport = &symbolTable[_dynSymTabCmd->ilocalsym()+_dynSymTabCmd->nlocalsym()];
545 for (const macho_nlist<P>* entry = firstExport; entry < lastExport; ++entry) {
546 if ( (entry->n_type() & N_TYPE) != N_SECT)
547 continue;
548 if ( (entry->n_type() & N_STAB) != 0)
549 continue;
550 const char* name = &strings[entry->n_strx()];
551 macho_nlist<P>* newSymbolEntry = (macho_nlist<P>*)&newLinkEditContent[offset];
552 *newSymbolEntry = *entry;
553 if ( redact ) {
554 // if removing local symbols, change __text symbols to "<redacted>" so backtraces don't have bogus names
555 if ( entry->n_sect() == 1 ) {
556 stringPool.add(symbolIndex, "<redacted>");
557 ++symbolIndex;
558 offset += sizeof(macho_nlist<P>);
559 }
560 // copy local symbol to unmmapped locals area
561 localSymbolsStringPool.add((uint32_t)unmappedLocalSymbols.size(), name);
562 unmappedLocalSymbols.push_back(*entry);
563 unmappedLocalSymbols.back().set_n_strx(0);
564 }
565 else {
566 stringPool.add(symbolIndex, name);
567 ++symbolIndex;
568 offset += sizeof(macho_nlist<P>);
569 }
570 }
571 _newLocalSymbolCount = symbolIndex - _newLocalSymbolsStartIndex;
572 localInfo.nlistCount = (uint32_t)unmappedLocalSymbols.size() - localInfo.nlistStartIndex;
573 localSymbolInfos.push_back(localInfo);
574 }
575
576
577 template <typename P>
578 void LinkeditOptimizer<P>::copyExportedSymbols(uint8_t* newLinkEditContent, SortedStringPool<P>& stringPool, uint32_t& offset, uint32_t& symbolIndex)
579 {
580 _newExportedSymbolsStartIndex = symbolIndex;
581 const char* strings = (char*)&_linkeditBias[_symTabCmd->stroff()];
582 const macho_nlist<P>* const symbolTable = (macho_nlist<P>*)(&_linkeditBias[_symTabCmd->symoff()]);
583 const macho_nlist<P>* const firstExport = &symbolTable[_dynSymTabCmd->iextdefsym()];
584 const macho_nlist<P>* const lastExport = &symbolTable[_dynSymTabCmd->iextdefsym()+_dynSymTabCmd->nextdefsym()];
585 uint32_t oldSymbolIndex = _dynSymTabCmd->iextdefsym();
586 for (const macho_nlist<P>* entry = firstExport; entry < lastExport; ++entry, ++oldSymbolIndex) {
587 if ( (entry->n_type() & N_TYPE) != N_SECT)
588 continue;
589 const char* name = &strings[entry->n_strx()];
590 if ( strncmp(name, ".objc_", 6) == 0 )
591 continue;
592 if ( strncmp(name, "$ld$", 4) == 0 )
593 continue;
594 macho_nlist<P>* newSymbolEntry = (macho_nlist<P>*)&newLinkEditContent[offset];
595 *newSymbolEntry = *entry;
596 newSymbolEntry->set_n_strx(0);
597 stringPool.add(symbolIndex, name);
598 _oldToNewSymbolIndexes[oldSymbolIndex] = symbolIndex - _newLocalSymbolsStartIndex;
599 ++symbolIndex;
600 offset += sizeof(macho_nlist<P>);
601 }
602 _newExportedSymbolCount = symbolIndex - _newExportedSymbolsStartIndex;
603 }
604
605 template <typename P>
606 void LinkeditOptimizer<P>::copyImportedSymbols(uint8_t* newLinkEditContent, SortedStringPool<P>& stringPool, uint32_t& offset, uint32_t& symbolIndex)
607 {
608 _newImportedSymbolsStartIndex = symbolIndex;
609 const char* strings = (char*)&_linkeditBias[_symTabCmd->stroff()];
610 const macho_nlist<P>* const symbolTable = (macho_nlist<P>*)(&_linkeditBias[_symTabCmd->symoff()]);
611 const macho_nlist<P>* const firstImport = &symbolTable[_dynSymTabCmd->iundefsym()];
612 const macho_nlist<P>* const lastImport = &symbolTable[_dynSymTabCmd->iundefsym()+_dynSymTabCmd->nundefsym()];
613 uint32_t oldSymbolIndex = _dynSymTabCmd->iundefsym();
614 for (const macho_nlist<P>* entry = firstImport; entry < lastImport; ++entry, ++oldSymbolIndex) {
615 if ( (entry->n_type() & N_TYPE) != N_UNDF)
616 continue;
617 const char* name = &strings[entry->n_strx()];
618 macho_nlist<P>* newSymbolEntry = (macho_nlist<P>*)&newLinkEditContent[offset];
619 *newSymbolEntry = *entry;
620 newSymbolEntry->set_n_strx(0);
621 stringPool.add(symbolIndex, name);
622 _oldToNewSymbolIndexes[oldSymbolIndex] = symbolIndex - _newLocalSymbolsStartIndex;
623 ++symbolIndex;
624 offset += sizeof(macho_nlist<P>);
625 }
626 _newImportedSymbolCount = symbolIndex - _newImportedSymbolsStartIndex;
627 }
628
629 template <typename P>
630 void LinkeditOptimizer<P>::copyIndirectSymbolTable(uint8_t* newLinkEditContent, uint32_t& offset)
631 {
632 _newIndirectSymbolTableOffset = offset;
633 const uint32_t* const indirectTable = (uint32_t*)&_linkeditBias[_dynSymTabCmd->indirectsymoff()];
634 uint32_t* newIndirectTable = (uint32_t*)&newLinkEditContent[offset];
635 for (uint32_t i=0; i < _dynSymTabCmd->nindirectsyms(); ++i) {
636 uint32_t symbolIndex = E::get32(indirectTable[i]);
637 if ( (symbolIndex == INDIRECT_SYMBOL_ABS) || (symbolIndex == INDIRECT_SYMBOL_LOCAL) )
638 E::set32(newIndirectTable[i], symbolIndex);
639 else
640 E::set32(newIndirectTable[i], _oldToNewSymbolIndexes[symbolIndex]);
641 offset += sizeof(uint32_t);
642 }
643 }
644
645 template <typename P>
646 void LinkeditOptimizer<P>::mergeLinkedits(CacheBuilder& builder, std::vector<LinkeditOptimizer<P>*>& optimizers)
647 {
648 // allocate space for new linkedit data
649 uint64_t totalUnoptLinkeditsSize = builder._readOnlyRegion.sizeInUse - builder._nonLinkEditReadOnlySize;
650 uint8_t* newLinkEdit = (uint8_t*)calloc(totalUnoptLinkeditsSize, 1);
651 SortedStringPool<P> stringPool;
652 uint32_t offset = 0;
653
654 builder._diagnostics.verbose("Merged LINKEDIT:\n");
655
656 // copy weak binding info
657 uint32_t startWeakBindInfosOffset = offset;
658 for (LinkeditOptimizer<P>* op : optimizers) {
659 // Skip chained fixups as the in-place linked list isn't valid any more
660 const dyld3::MachOFile* mf = (dyld3::MachOFile*)op->machHeader();
661 if (!mf->hasChainedFixups())
662 op->copyWeakBindingInfo(newLinkEdit, offset);
663 }
664 builder._diagnostics.verbose(" weak bindings size: %5uKB\n", (uint32_t)(offset-startWeakBindInfosOffset)/1024);
665
666 // copy export info
667 uint32_t startExportInfosOffset = offset;
668 for (LinkeditOptimizer<P>* op : optimizers) {
669 op->copyExportInfo(newLinkEdit, offset);
670 }
671 builder._diagnostics.verbose(" exports info size: %5uKB\n", (uint32_t)(offset-startExportInfosOffset)/1024);
672
673 // in theory, an optimized cache can drop the binding info
674 if ( true ) {
675 // copy binding info
676 uint32_t startBindingsInfosOffset = offset;
677 for (LinkeditOptimizer<P>* op : optimizers) {
678 // Skip chained fixups as the in-place linked list isn't valid any more
679 const dyld3::MachOFile* mf = (dyld3::MachOFile*)op->machHeader();
680 if (!mf->hasChainedFixups())
681 op->copyBindingInfo(newLinkEdit, offset);
682 }
683 builder._diagnostics.verbose(" bindings size: %5uKB\n", (uint32_t)(offset-startBindingsInfosOffset)/1024);
684
685 // copy lazy binding info
686 uint32_t startLazyBindingsInfosOffset = offset;
687 for (LinkeditOptimizer<P>* op : optimizers) {
688 // Skip chained fixups as the in-place linked list isn't valid any more
689 const dyld3::MachOFile* mf = (dyld3::MachOFile*)op->machHeader();
690 if (!mf->hasChainedFixups())
691 op->copyLazyBindingInfo(newLinkEdit, offset);
692 }
693 builder._diagnostics.verbose(" lazy bindings size: %5uKB\n", (offset-startLazyBindingsInfosOffset)/1024);
694 }
695
696 // copy symbol table entries
697 std::vector<macho_nlist<P>> unmappedLocalSymbols;
698 if ( builder._options.excludeLocalSymbols )
699 unmappedLocalSymbols.reserve(0x01000000);
700 std::vector<LocalSymbolInfo> localSymbolInfos;
701 localSymbolInfos.reserve(optimizers.size());
702 SortedStringPool<P> localSymbolsStringPool;
703 uint32_t symbolIndex = 0;
704 const uint32_t sharedSymbolTableStartOffset = offset;
705 uint32_t sharedSymbolTableExportsCount = 0;
706 uint32_t sharedSymbolTableImportsCount = 0;
707 for (LinkeditOptimizer<P>* op : optimizers) {
708 op->copyLocalSymbols(newLinkEdit, stringPool, offset, symbolIndex, builder._options.excludeLocalSymbols,
709 localSymbolInfos, unmappedLocalSymbols, localSymbolsStringPool);
710 uint32_t x = symbolIndex;
711 op->copyExportedSymbols(newLinkEdit, stringPool, offset, symbolIndex);
712 sharedSymbolTableExportsCount += (symbolIndex-x);
713 uint32_t y = symbolIndex;
714 op->copyImportedSymbols(newLinkEdit, stringPool, offset, symbolIndex);
715 sharedSymbolTableImportsCount += (symbolIndex-y);
716 }
717 uint32_t sharedSymbolTableCount = symbolIndex;
718 const uint32_t sharedSymbolTableEndOffset = offset;
719
720 // copy function starts
721 uint32_t startFunctionStartsOffset = offset;
722 for (LinkeditOptimizer<P>* op : optimizers) {
723 op->copyFunctionStarts(newLinkEdit, offset);
724 }
725 builder._diagnostics.verbose(" function starts size: %5uKB\n", (offset-startFunctionStartsOffset)/1024);
726
727 // copy data-in-code info
728 uint32_t startDataInCodeOffset = offset;
729 for (LinkeditOptimizer<P>* op : optimizers) {
730 op->copyDataInCode(newLinkEdit, offset);
731 }
732 builder._diagnostics.verbose(" data in code size: %5uKB\n", (offset-startDataInCodeOffset)/1024);
733
734 // copy indirect symbol tables
735 for (LinkeditOptimizer<P>* op : optimizers) {
736 op->copyIndirectSymbolTable(newLinkEdit, offset);
737 }
738 // if indirect table has odd number of entries, end will not be 8-byte aligned
739 if ( (offset % sizeof(typename P::uint_t)) != 0 )
740 offset += 4;
741
742 // copy string pool
743 uint32_t sharedSymbolStringsOffset = offset;
744 uint32_t sharedSymbolStringsSize = stringPool.copyPoolAndUpdateOffsets((char*)&newLinkEdit[sharedSymbolStringsOffset], (macho_nlist<P>*)&newLinkEdit[sharedSymbolTableStartOffset]);
745 offset += sharedSymbolStringsSize;
746 uint32_t newLinkeditUnalignedSize = offset;
747 uint64_t newLinkeditAlignedSize = align(offset, 14);
748 builder._diagnostics.verbose(" symbol table size: %5uKB (%d exports, %d imports)\n", (sharedSymbolTableEndOffset-sharedSymbolTableStartOffset)/1024, sharedSymbolTableExportsCount, sharedSymbolTableImportsCount);
749 builder._diagnostics.verbose(" symbol string pool size: %5uKB\n", sharedSymbolStringsSize/1024);
750 builder._sharedStringsPoolVmOffset = (uint32_t)((builder._readOnlyRegion.unslidLoadAddress - builder._readExecuteRegion.unslidLoadAddress) + builder._nonLinkEditReadOnlySize + sharedSymbolStringsOffset);
751
752 // overwrite mapped LINKEDIT area in cache with new merged LINKEDIT content
753 builder._diagnostics.verbose("LINKEDITS optimized from %uMB to %uMB\n", (uint32_t)totalUnoptLinkeditsSize/(1024*1024), (uint32_t)newLinkeditUnalignedSize/(1024*1024));
754 ::memcpy(builder._readOnlyRegion.buffer+builder._nonLinkEditReadOnlySize, newLinkEdit, newLinkeditAlignedSize);
755 ::free(newLinkEdit);
756 builder._readOnlyRegion.sizeInUse = builder._nonLinkEditReadOnlySize + newLinkeditAlignedSize;
757
758 // overwrite end of un-opt linkedits to create a new unmapped region for local symbols
759 if ( builder._options.excludeLocalSymbols ) {
760 const uint32_t entriesOffset = sizeof(dyld_cache_local_symbols_info);
761 const uint32_t entriesCount = (uint32_t)localSymbolInfos.size();
762 const uint32_t nlistOffset = (uint32_t)align(entriesOffset + entriesCount * sizeof(dyld_cache_local_symbols_info), 4); // 16-byte align start
763 const uint32_t nlistCount = (uint32_t)unmappedLocalSymbols.size();
764 const uint32_t stringsSize = (uint32_t)localSymbolsStringPool.size();
765 const uint32_t stringsOffset = nlistOffset + nlistCount * sizeof(macho_nlist<P>);
766 // allocate buffer for local symbols
767 const size_t localsBufferSize = align(stringsOffset + stringsSize, 14);
768 vm_address_t localsBuffer;
769 if ( ::vm_allocate(mach_task_self(), &localsBuffer, localsBufferSize, VM_FLAGS_ANYWHERE) == 0 ) {
770 dyld_cache_local_symbols_info* infoHeader = (dyld_cache_local_symbols_info*)localsBuffer;
771 // fill in header info
772 infoHeader->nlistOffset = nlistOffset;
773 infoHeader->nlistCount = nlistCount;
774 infoHeader->stringsOffset = stringsOffset;
775 infoHeader->stringsSize = stringsSize;
776 infoHeader->entriesOffset = entriesOffset;
777 infoHeader->entriesCount = entriesCount;
778 // copy info for each dylib
779 dyld_cache_local_symbols_entry* entries = (dyld_cache_local_symbols_entry*)(((uint8_t*)infoHeader)+entriesOffset);
780 for (uint32_t i=0; i < entriesCount; ++i) {
781 entries[i].dylibOffset = localSymbolInfos[i].dylibOffset;
782 entries[i].nlistStartIndex = localSymbolInfos[i].nlistStartIndex;
783 entries[i].nlistCount = localSymbolInfos[i].nlistCount;
784 }
785 // copy nlists
786 macho_nlist<P>* newLocalsSymbolTable = (macho_nlist<P>*)(localsBuffer+nlistOffset);
787 ::memcpy(newLocalsSymbolTable, &unmappedLocalSymbols[0], nlistCount*sizeof(macho_nlist<P>));
788 // copy string pool
789 localSymbolsStringPool.copyPoolAndUpdateOffsets(((char*)infoHeader)+stringsOffset, newLocalsSymbolTable);
790 // update cache header
791 DyldSharedCache* cacheHeader = (DyldSharedCache*)builder._readExecuteRegion.buffer;
792 cacheHeader->header.localSymbolsSize = localsBufferSize;
793 // return buffer of local symbols, caller to free() it
794 builder._localSymbolsRegion.buffer = (uint8_t*)localsBuffer;
795 builder._localSymbolsRegion.bufferSize = localsBufferSize;
796 builder._localSymbolsRegion.sizeInUse = localsBufferSize;
797 }
798 else {
799 builder._diagnostics.warning("could not allocate local symbols");
800 }
801 }
802
803 // update all load commands to new merged layout
804 uint64_t linkeditsUnslidStartAddr = builder._readOnlyRegion.unslidLoadAddress + builder._nonLinkEditReadOnlySize;
805 uint32_t linkeditsCacheFileOffset = (uint32_t)(builder._readOnlyRegion.cacheFileOffset + builder._nonLinkEditReadOnlySize);
806 for (LinkeditOptimizer<P>* op : optimizers) {
807 op->updateLoadCommands(linkeditsCacheFileOffset, linkeditsUnslidStartAddr, newLinkeditUnalignedSize,
808 sharedSymbolTableStartOffset, sharedSymbolTableCount,
809 sharedSymbolStringsOffset, sharedSymbolStringsSize);
810 }
811 }
812
813
814 template <typename P>
815 void LinkeditOptimizer<P>::optimizeLinkedit(CacheBuilder& builder)
816 {
817 DyldSharedCache* cache = (DyldSharedCache*)builder._readExecuteRegion.buffer;
818 // construct a LinkeditOptimizer for each image
819 __block std::vector<LinkeditOptimizer<P>*> optimizers;
820 cache->forEachImage(^(const mach_header* mh, const char*) {
821 optimizers.push_back(new LinkeditOptimizer<P>(cache, (macho_header<P>*)mh, builder._diagnostics));
822 });
823 #if 0
824 // add optimizer for each branch pool
825 for (uint64_t poolOffset : branchPoolOffsets) {
826 macho_header<P>* mh = (macho_header<P>*)((char*)cache + poolOffset);
827 optimizers.push_back(new LinkeditOptimizer<P>(cache, mh, diag));
828 }
829 #endif
830 // merge linkedit info
831 mergeLinkedits(builder, optimizers);
832
833 // delete optimizers
834 for (LinkeditOptimizer<P>* op : optimizers)
835 delete op;
836 }
837
838 void CacheBuilder::optimizeLinkedit()
839 {
840 if ( _is64 ) {
841 return LinkeditOptimizer<Pointer64<LittleEndian>>::optimizeLinkedit(*this);
842 }
843 else {
844 return LinkeditOptimizer<Pointer32<LittleEndian>>::optimizeLinkedit(*this);
845 }
846 }
847
848
849