dyld-832.7.1.tar.gz
[apple/dyld.git] / dyld3 / shared-cache / OptimizerLinkedit.cpp
1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*-
2 *
3 * Copyright (c) 2014 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25
26 #include <dirent.h>
27 #include <sys/errno.h>
28 #include <sys/fcntl.h>
29 #include <mach-o/loader.h>
30 #include <mach-o/fat.h>
31 #include <assert.h>
32
33 #include <fstream>
34 #include <string>
35 #include <algorithm>
36 #include <unordered_map>
37 #include <unordered_set>
38
39 #include "MachOFileAbstraction.hpp"
40 #include "Trie.hpp"
41 #include "DyldSharedCache.h"
42 #include "CacheBuilder.h"
43 #include "MachOLoaded.h"
44
45 #define ALIGN_AS_TYPE(value, type) \
46 ((value + alignof(type) - 1) & (-alignof(type)))
47
48 namespace {
49
50 template <typename P>
51 class SortedStringPool
52 {
53 public:
54 // add a string and symbol table entry index to be updated later
55 void add(uint32_t symbolIndex, const char* symbolName) {
56 _map[symbolName].push_back({ symbolIndex, false });
57 }
58
59 // add a string and symbol table entry index to be updated later
60 void addIndirect(uint32_t symbolIndex, const char* symbolName) {
61 _map[symbolName].push_back({ symbolIndex, true });
62 }
63
64 // copy sorted strings to buffer and update all symbol's string offsets
65 uint32_t copyPoolAndUpdateOffsets(char* dstStringPool, macho_nlist<P>* symbolTable) {
66 // walk sorted list of strings
67 dstStringPool[0] = '\0'; // tradition for start of pool to be empty string
68 uint32_t poolOffset = 1;
69 for (auto& entry : _map) {
70 const std::string& symName = entry.first;
71 // append string to pool
72 strcpy(&dstStringPool[poolOffset], symName.c_str());
73 // set each string offset of each symbol using it
74 for (std::pair<uint32_t, bool> symbolIndexAndIndirect : entry.second) {
75 if ( symbolIndexAndIndirect.second ) {
76 // Indirect
77 symbolTable[symbolIndexAndIndirect.first].set_n_value(poolOffset);
78 } else {
79 symbolTable[symbolIndexAndIndirect.first].set_n_strx(poolOffset);
80 }
81 }
82 poolOffset += symName.size() + 1;
83 }
84 // return size of pool
85 return poolOffset;
86 }
87
88 size_t size() {
89 size_t size = 1;
90 for (auto& entry : _map) {
91 size += (entry.first.size() + 1);
92 }
93 return size;
94 }
95
96
97 private:
98 std::map<std::string, std::vector<std::pair<uint32_t, bool>>> _map;
99 };
100
101
102 } // anonymous namespace
103
104
105 struct LocalSymbolInfo
106 {
107 uint32_t dylibOffset;
108 uint32_t nlistStartIndex;
109 uint32_t nlistCount;
110 };
111
112
113 template <typename P>
114 class LinkeditOptimizer {
115 public:
116 LinkeditOptimizer(const void* containerBuffer, macho_header<P>* mh, const char* dylibID,
117 Diagnostics& diag);
118
119 uint32_t linkeditSize() { return _linkeditSize; }
120 uint64_t linkeditAddr() { return _linkeditAddr; }
121 const char* dylibID() { return _dylibID; }
122 void copyWeakBindingInfo(uint8_t* newLinkEditContent, uint32_t& offset);
123 void copyLazyBindingInfo(uint8_t* newLinkEditContent, uint32_t& offset);
124 void copyBindingInfo(uint8_t* newLinkEditContent, uint32_t& offset);
125 void copyExportInfo(uint8_t* newLinkEditContent, uint32_t& offset);
126 void copyExportedSymbols(uint8_t* newLinkEditContent, SortedStringPool<P>& stringPool, uint32_t& offset, uint32_t& symbolIndex);
127 void copyImportedSymbols(uint8_t* newLinkEditContent, SortedStringPool<P>& stringPool, uint32_t& offset, uint32_t& symbolIndex);
128 void copyLocalSymbols(uint8_t* newLinkEditContent, SortedStringPool<P>& stringPool, uint32_t& offset, uint32_t& symbolIndex,
129 bool redact, std::vector<LocalSymbolInfo>& localSymbolInfos,
130 std::vector<macho_nlist<P>>& unmappedLocalSymbols, SortedStringPool<P>& localSymbolsStringPool);
131 void copyFunctionStarts(uint8_t* newLinkEditContent, uint32_t& offset);
132 void copyDataInCode(uint8_t* newLinkEditContent, uint32_t& offset);
133 void copyIndirectSymbolTable(uint8_t* newLinkEditContent, uint32_t& offset);
134 void updateLoadCommands(uint32_t linkeditStartOffset, uint64_t mergedLinkeditAddr, uint64_t newLinkeditSize,
135 uint32_t sharedSymbolTableStartOffset, uint32_t sharedSymbolTableCount,
136 uint32_t sharedSymbolStringsOffset, uint32_t sharedSymbolStringsSize);
137
138 typedef CacheBuilder::DylibStripMode DylibStripMode;
139 void setStripMode(DylibStripMode stripMode);
140
141 macho_header<P>* machHeader() { return _mh; }
142 const std::vector<const char*> getDownwardDependents() { return _downDependentPaths; }
143 const std::vector<const char*> getAllDependents() { return _allDependentPaths; }
144 const std::vector<const char*> getReExportPaths() { return _reExportPaths; }
145 const std::vector<uint64_t> initializerAddresses() { return _initializerAddresses; }
146 const std::vector<macho_section<P>*> dofSections() { return _dofSections; }
147 uint32_t exportsTrieLinkEditOffset() { return _newExportInfoOffset; }
148 uint32_t exportsTrieLinkEditSize() { return _exportInfoSize; }
149 uint32_t weakBindingLinkEditOffset() { return _newWeakBindingInfoOffset; }
150 uint32_t weakBindingLinkEditSize() { return _newWeakBindingSize; }
151 uint64_t dyldSectionAddress() { return _dyldSectionAddr; }
152 const std::vector<macho_segment_command<P>*>& segCmds() { return _segCmds; }
153
154
155 static void optimizeLinkedit(CacheBuilder& builder, const void* containerBuffer,
156 CacheBuilder::UnmappedRegion* localSymbolsRegion,
157 const std::vector<std::tuple<const mach_header*, const char*, DylibStripMode>>& images);
158 static void mergeLinkedits(CacheBuilder& builder, CacheBuilder::UnmappedRegion* localSymbolsRegion,
159 std::vector<LinkeditOptimizer<P>*>& optimizers);
160
161 private:
162
163 typedef typename P::uint_t pint_t;
164 typedef typename P::E E;
165
166 macho_header<P>* _mh;
167 const void* _containerBuffer;
168 Diagnostics& _diagnostics;
169 uint32_t _linkeditSize = 0;
170 uint64_t _linkeditAddr = 0;
171 const uint8_t* _linkeditBias = nullptr;
172 const char* _dylibID = nullptr;
173 macho_symtab_command<P>* _symTabCmd = nullptr;
174 macho_dysymtab_command<P>* _dynSymTabCmd = nullptr;
175 macho_dyld_info_command<P>* _dyldInfo = nullptr;
176 macho_linkedit_data_command<P>* _exportTrieCmd = nullptr;
177 macho_linkedit_data_command<P>* _functionStartsCmd = nullptr;
178 macho_linkedit_data_command<P>* _dataInCodeCmd = nullptr;
179 std::vector<macho_segment_command<P>*> _segCmds;
180 std::unordered_map<uint32_t,uint32_t> _oldToNewSymbolIndexes;
181 std::vector<const char*> _reExportPaths;
182 std::vector<const char*> _downDependentPaths;
183 std::vector<const char*> _allDependentPaths;
184 std::vector<uint64_t> _initializerAddresses;
185 std::vector<macho_section<P>*> _dofSections;
186 uint32_t _newWeakBindingInfoOffset = 0;
187 uint32_t _newLazyBindingInfoOffset = 0;
188 uint32_t _newBindingInfoOffset = 0;
189 uint32_t _newExportInfoOffset = 0;
190 uint32_t _exportInfoSize = 0;
191 uint32_t _newWeakBindingSize = 0;
192 uint32_t _newExportedSymbolsStartIndex = 0;
193 uint32_t _newExportedSymbolCount = 0;
194 uint32_t _newImportedSymbolsStartIndex = 0;
195 uint32_t _newImportedSymbolCount = 0;
196 uint32_t _newLocalSymbolsStartIndex = 0;
197 uint32_t _newLocalSymbolCount = 0;
198 uint32_t _newFunctionStartsOffset = 0;
199 uint32_t _newDataInCodeOffset = 0;
200 uint32_t _newIndirectSymbolTableOffset = 0;
201 uint64_t _dyldSectionAddr = 0;
202 DylibStripMode _stripMode = DylibStripMode::stripAll;
203 };
204
205
206 template <typename P>
207 LinkeditOptimizer<P>::LinkeditOptimizer(const void* containerBuffer, macho_header<P>* mh,
208 const char* dylibID, Diagnostics& diag)
209 : _mh(mh), _dylibID(dylibID), _containerBuffer(containerBuffer), _diagnostics(diag)
210 {
211 const unsigned origLoadCommandsSize = mh->sizeofcmds();
212 unsigned bytesRemaining = origLoadCommandsSize;
213 unsigned removedCount = 0;
214 uint64_t textSegAddr = 0;
215 int64_t slide = 0;
216 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
217 const uint32_t cmdCount = mh->ncmds();
218 const macho_load_command<P>* cmd = cmds;
219 const macho_dylib_command<P>* dylibCmd;
220 const macho_routines_command<P>* routinesCmd;
221 macho_segment_command<P>* segCmd;
222 for (uint32_t i = 0; i < cmdCount; ++i) {
223 bool remove = false;
224 switch (cmd->cmd()) {
225 case LC_SYMTAB:
226 _symTabCmd = (macho_symtab_command<P>*)cmd;
227 break;
228 case LC_DYSYMTAB:
229 _dynSymTabCmd = (macho_dysymtab_command<P>*)cmd;
230 break;
231 case LC_DYLD_INFO:
232 case LC_DYLD_INFO_ONLY:
233 _dyldInfo = (macho_dyld_info_command<P>*)cmd;
234 _exportInfoSize = _dyldInfo->export_size();
235 break;
236 case LC_FUNCTION_STARTS:
237 _functionStartsCmd = (macho_linkedit_data_command<P>*)cmd;
238 break;
239 case LC_DATA_IN_CODE:
240 _dataInCodeCmd = (macho_linkedit_data_command<P>*)cmd;
241 break;
242 case LC_DYLD_EXPORTS_TRIE:
243 _exportTrieCmd = (macho_linkedit_data_command<P>*)cmd;
244 _exportInfoSize = _exportTrieCmd->datasize();
245 break;
246 case LC_ROUTINES:
247 case LC_ROUTINES_64:
248 routinesCmd = (macho_routines_command<P>*)cmd;
249 _initializerAddresses.push_back(routinesCmd->init_address());
250 break;
251 case LC_REEXPORT_DYLIB:
252 case LC_LOAD_DYLIB:
253 case LC_LOAD_WEAK_DYLIB:
254 case LC_LOAD_UPWARD_DYLIB:
255 dylibCmd = (macho_dylib_command<P>*)cmd;
256 _allDependentPaths.push_back(dylibCmd->name());
257 if ( cmd->cmd() != LC_LOAD_UPWARD_DYLIB )
258 _downDependentPaths.push_back(dylibCmd->name());
259 if ( cmd->cmd() == LC_REEXPORT_DYLIB )
260 _reExportPaths.push_back(dylibCmd->name());
261 break;
262 case macho_segment_command<P>::CMD:
263 segCmd = (macho_segment_command<P>*)cmd;
264 _segCmds.push_back(segCmd);
265 if ( strcmp(segCmd->segname(), "__TEXT") == 0 ) {
266 textSegAddr = segCmd->vmaddr();
267 slide = (uint64_t)mh - textSegAddr;
268 }
269 else if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
270 _linkeditAddr = segCmd->vmaddr();
271 _linkeditBias = (uint8_t*)mh + (_linkeditAddr - textSegAddr) - segCmd->fileoff();
272 _linkeditSize = (uint32_t)segCmd->vmsize();
273 }
274 else if ( segCmd->nsects() > 0 ) {
275 macho_section<P>* const sectionsStart = (macho_section<P>*)((uint8_t*)segCmd + sizeof(macho_segment_command<P>));
276 macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
277 for (macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
278 const uint8_t type = sect->flags() & SECTION_TYPE;
279 if ( type == S_MOD_INIT_FUNC_POINTERS ) {
280 const pint_t* inits = (pint_t*)(sect->addr()+slide);
281 const size_t count = sect->size() / sizeof(pint_t);
282 for (size_t j=0; j < count; ++j) {
283 uint64_t func = P::getP(inits[j]);
284 _initializerAddresses.push_back(func);
285 }
286 }
287 else if ( type == S_INIT_FUNC_OFFSETS ) {
288 const uint32_t* inits = (uint32_t*)(sect->addr()+slide);
289 const size_t count = sect->size() / sizeof(uint32_t);
290 for (size_t j=0; j < count; ++j) {
291 uint32_t funcOffset = E::get32(inits[j]);
292 _initializerAddresses.push_back(textSegAddr + funcOffset);
293 }
294 }
295 else if ( type == S_DTRACE_DOF ) {
296 _dofSections.push_back(sect);
297 }
298 else if ( (strcmp(sect->sectname(), "__dyld") == 0) && (strncmp(sect->segname(), "__DATA", 6) == 0) ) {
299 _dyldSectionAddr = sect->addr();
300 }
301 }
302 }
303 break;
304 case LC_DYLD_CHAINED_FIXUPS:
305 case LC_SEGMENT_SPLIT_INFO:
306 remove = true;
307 break;
308 }
309 uint32_t cmdSize = cmd->cmdsize();
310 macho_load_command<P>* nextCmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmdSize);
311 if ( remove ) {
312 ::memmove((void*)cmd, (void*)nextCmd, bytesRemaining);
313 ++removedCount;
314 }
315 else {
316 bytesRemaining -= cmdSize;
317 cmd = nextCmd;
318 }
319 }
320 // zero out stuff removed
321 ::bzero((void*)cmd, bytesRemaining);
322 // update header
323 mh->set_ncmds(cmdCount - removedCount);
324 mh->set_sizeofcmds(origLoadCommandsSize - bytesRemaining);
325 }
326
327 template <typename P>
328 void LinkeditOptimizer<P>::setStripMode(DylibStripMode stripMode) {
329 _stripMode = stripMode;
330 }
331
332 /*
333 static void dumpLoadCommands(const uint8_t* mheader)
334 {
335 const mach_header* const mh = (mach_header*)mheader;
336 const uint32_t cmd_count = mh->ncmds;
337 bool is64 = (mh->magic == MH_MAGIC_64);
338 const load_command* cmds = (load_command*)(mheader + (is64 ? sizeof(mach_header_64) : sizeof(mach_header)));
339 const load_command* cmd = cmds;
340 const segment_command* segCmd;
341 const segment_command_64* seg64Cmd;
342 const symtab_command* symTab;
343 const linkedit_data_command* leData;
344 const uint8_t* linkEditBias = NULL;
345 for (uint32_t i = 0; i < cmd_count; ++i) {
346 switch (cmd->cmd) {
347 case LC_SEGMENT:
348 segCmd = (const segment_command*)cmd;
349 printf("LC_SEGMENT\n");
350 printf(" segname = %s\n", segCmd->segname);
351 printf(" vmaddr = 0x%08X\n", segCmd->vmaddr);
352 printf(" vmsize = 0x%08X\n", segCmd->vmsize);
353 printf(" fileoff = 0x%08X\n", segCmd->fileoff);
354 printf(" filesize = 0x%08X\n", segCmd->filesize);
355 if ( strcmp(segCmd->segname, "__TEXT") == 0 ) {
356 linkEditBias = mheader - segCmd->fileoff;
357 }
358 break;
359 case LC_SEGMENT_64:
360 seg64Cmd = (const segment_command_64*)cmd;
361 printf("LC_SEGMENT_64\n");
362 printf(" segname = %s\n", seg64Cmd->segname);
363 printf(" vmaddr = 0x%09llX\n", seg64Cmd->vmaddr);
364 printf(" vmsize = 0x%09llX\n", seg64Cmd->vmsize);
365 printf(" fileoff = 0x%09llX\n", seg64Cmd->fileoff);
366 printf(" filesize = 0x%09llX\n", seg64Cmd->filesize);
367 if ( strcmp(seg64Cmd->segname, "__TEXT") == 0 ) {
368 linkEditBias = mheader - seg64Cmd->fileoff;
369 }
370 break;
371 case LC_SYMTAB:
372 symTab = (const symtab_command*)cmd;
373 printf("LC_SYMTAB\n");
374 printf(" symoff = 0x%08X\n", symTab->symoff);
375 printf(" nsyms = 0x%08X\n", symTab->nsyms);
376 printf(" stroff = 0x%08X\n", symTab->stroff);
377 printf(" strsize = 0x%08X\n", symTab->strsize);
378 {
379 const char* strPool = (char*)&linkEditBias[symTab->stroff];
380 const nlist_64* sym0 = (nlist_64*)(&linkEditBias[symTab->symoff]);
381 printf(" sym[0].n_strx = 0x%08X (%s)\n", sym0->n_un.n_strx, &strPool[sym0->n_un.n_strx]);
382 printf(" sym[0].n_type = 0x%02X\n", sym0->n_type);
383 printf(" sym[0].n_sect = 0x%02X\n", sym0->n_sect);
384 printf(" sym[0].n_desc = 0x%04X\n", sym0->n_desc);
385 printf(" sym[0].n_value = 0x%llX\n", sym0->n_value);
386 const nlist_64* sym1 = (nlist_64*)(&linkEditBias[symTab->symoff+16]);
387 printf(" sym[1].n_strx = 0x%08X (%s)\n", sym1->n_un.n_strx, &strPool[sym1->n_un.n_strx]);
388 printf(" sym[1].n_type = 0x%02X\n", sym1->n_type);
389 printf(" sym[1].n_sect = 0x%02X\n", sym1->n_sect);
390 printf(" sym[1].n_desc = 0x%04X\n", sym1->n_desc);
391 printf(" sym[1].n_value = 0x%llX\n", sym1->n_value);
392 }
393 break;
394 case LC_FUNCTION_STARTS:
395 leData = (const linkedit_data_command*)cmd;
396 printf("LC_FUNCTION_STARTS\n");
397 printf(" dataoff = 0x%08X\n", leData->dataoff);
398 printf(" datasize = 0x%08X\n", leData->datasize);
399 default:
400 //printf("0x%08X\n", cmd->cmd);
401 break;
402 }
403 cmd = (const load_command*)(((uint8_t*)cmd)+cmd->cmdsize);
404 }
405 }
406 */
407
408 template <typename P>
409 void LinkeditOptimizer<P>::updateLoadCommands(uint32_t mergedLinkeditStartOffset, uint64_t mergedLinkeditAddr, uint64_t newLinkeditSize,
410 uint32_t sharedSymbolTableStartOffset, uint32_t sharedSymbolTableCount,
411 uint32_t sharedSymbolStringsOffset, uint32_t sharedSymbolStringsSize)
412 {
413 // update __LINKEDIT segment in all dylibs to overlap the same shared region
414 for (macho_segment_command<P>* segCmd : _segCmds) {
415 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
416 segCmd->set_vmaddr(mergedLinkeditAddr);
417 segCmd->set_vmsize(newLinkeditSize);
418 segCmd->set_fileoff(mergedLinkeditStartOffset);
419 segCmd->set_filesize(newLinkeditSize);
420 }
421 else if ( strcmp(segCmd->segname(), "__TEXT") == 0 ) {
422 // HACK until lldb fixed in: <rdar://problem/20357466> DynamicLoaderMacOSXDYLD fixes for Monarch dyld shared cache
423 //segCmd->set_fileoff(0);
424
425 }
426 }
427
428 // update symbol table to point to shared symbol table
429 _symTabCmd->set_symoff(mergedLinkeditStartOffset + sharedSymbolTableStartOffset + _newLocalSymbolsStartIndex*sizeof(macho_nlist<P>));
430 _symTabCmd->set_nsyms(_newLocalSymbolCount+_newExportedSymbolCount+_newImportedSymbolCount);
431 _symTabCmd->set_stroff(mergedLinkeditStartOffset + sharedSymbolStringsOffset);
432 _symTabCmd->set_strsize(sharedSymbolStringsSize);
433
434 // update dynamic symbol table to have proper offsets into shared symbol table
435 if ( _dynSymTabCmd != nullptr ) {
436 _dynSymTabCmd->set_ilocalsym(0);
437 _dynSymTabCmd->set_nlocalsym(_newLocalSymbolCount);
438 _dynSymTabCmd->set_iextdefsym(_newExportedSymbolsStartIndex-_newLocalSymbolsStartIndex);
439 _dynSymTabCmd->set_nextdefsym(_newExportedSymbolCount);
440 _dynSymTabCmd->set_iundefsym(_newImportedSymbolsStartIndex-_newLocalSymbolsStartIndex);
441 _dynSymTabCmd->set_nundefsym(_newImportedSymbolCount);
442 _dynSymTabCmd->set_tocoff(0);
443 _dynSymTabCmd->set_ntoc(0);
444 _dynSymTabCmd->set_modtaboff(0);
445 _dynSymTabCmd->set_nmodtab(0);
446 _dynSymTabCmd->set_indirectsymoff(mergedLinkeditStartOffset + _newIndirectSymbolTableOffset);
447 _dynSymTabCmd->set_extreloff(0);
448 _dynSymTabCmd->set_locreloff(0);
449 _dynSymTabCmd->set_nlocrel(0);
450 }
451
452 // update dyld info
453 if ( _dyldInfo != nullptr ) {
454 _dyldInfo->set_rebase_off(0);
455 _dyldInfo->set_rebase_size(0);
456 _dyldInfo->set_bind_off(_dyldInfo->bind_size() ? mergedLinkeditStartOffset + _newBindingInfoOffset : 0);
457 _dyldInfo->set_weak_bind_off(_dyldInfo->weak_bind_size() ? mergedLinkeditStartOffset + _newWeakBindingInfoOffset : 0 );
458 _dyldInfo->set_lazy_bind_off(_dyldInfo->lazy_bind_size() ? mergedLinkeditStartOffset + _newLazyBindingInfoOffset : 0 );
459 _dyldInfo->set_export_off(mergedLinkeditStartOffset + _newExportInfoOffset);
460 } else if ( _exportTrieCmd != nullptr ) {
461 _exportTrieCmd->set_dataoff(mergedLinkeditStartOffset + _newExportInfoOffset);
462 }
463
464 // update function-starts
465 if ( _functionStartsCmd != nullptr )
466 _functionStartsCmd->set_dataoff(mergedLinkeditStartOffset+_newFunctionStartsOffset);
467
468 // update data-in-code
469 if ( _dataInCodeCmd != nullptr )
470 _dataInCodeCmd->set_dataoff(mergedLinkeditStartOffset+_newDataInCodeOffset);
471 }
472
473 template <typename P>
474 void LinkeditOptimizer<P>::copyWeakBindingInfo(uint8_t* newLinkEditContent, uint32_t& offset)
475 {
476 if ( _dyldInfo == nullptr )
477 return;
478 unsigned size = _dyldInfo->weak_bind_size();
479 if ( size != 0 ) {
480 ::memcpy(&newLinkEditContent[offset], &_linkeditBias[_dyldInfo->weak_bind_off()], size);
481 _newWeakBindingInfoOffset = offset;
482 _newWeakBindingSize = size;
483 offset += size;
484 }
485 }
486
487
488 template <typename P>
489 void LinkeditOptimizer<P>::copyLazyBindingInfo(uint8_t* newLinkEditContent, uint32_t& offset)
490 {
491 if ( _dyldInfo == nullptr )
492 return;
493 unsigned size = _dyldInfo->lazy_bind_size();
494 if ( size != 0 ) {
495 ::memcpy(&newLinkEditContent[offset], &_linkeditBias[_dyldInfo->lazy_bind_off()], size);
496 _newLazyBindingInfoOffset = offset;
497 offset += size;
498 }
499 }
500
501 template <typename P>
502 void LinkeditOptimizer<P>::copyBindingInfo(uint8_t* newLinkEditContent, uint32_t& offset)
503 {
504 if ( _dyldInfo == nullptr )
505 return;
506 unsigned size = _dyldInfo->bind_size();
507 if ( size != 0 ) {
508 ::memcpy(&newLinkEditContent[offset], &_linkeditBias[_dyldInfo->bind_off()], size);
509 _newBindingInfoOffset = offset;
510 offset += size;
511 }
512 }
513
514 template <typename P>
515 void LinkeditOptimizer<P>::copyExportInfo(uint8_t* newLinkEditContent, uint32_t& offset)
516 {
517 if ( (_dyldInfo == nullptr) && (_exportTrieCmd == nullptr) )
518 return;
519
520 uint32_t exportOffset = _exportTrieCmd ? _exportTrieCmd->dataoff() : _dyldInfo->export_off();
521 uint32_t exportSize = _exportTrieCmd ? _exportTrieCmd->datasize() : _dyldInfo->export_size();
522 if ( exportSize != 0 ) {
523 ::memcpy(&newLinkEditContent[offset], &_linkeditBias[exportOffset], exportSize);
524 _newExportInfoOffset = offset;
525 offset += exportSize;
526 }
527 }
528
529
530 template <typename P>
531 void LinkeditOptimizer<P>::copyFunctionStarts(uint8_t* newLinkEditContent, uint32_t& offset)
532 {
533 if ( _functionStartsCmd == nullptr )
534 return;
535 unsigned size = _functionStartsCmd->datasize();
536 ::memcpy(&newLinkEditContent[offset], &_linkeditBias[_functionStartsCmd->dataoff()], size);
537 _newFunctionStartsOffset = offset;
538 offset += size;
539 }
540
541 template <typename P>
542 void LinkeditOptimizer<P>::copyDataInCode(uint8_t* newLinkEditContent, uint32_t& offset)
543 {
544 if ( _dataInCodeCmd == nullptr )
545 return;
546 unsigned size = _dataInCodeCmd->datasize();
547 ::memcpy(&newLinkEditContent[offset], &_linkeditBias[_dataInCodeCmd->dataoff()], size);
548 _newDataInCodeOffset = offset;
549 offset += size;
550 }
551
552
553 template <typename P>
554 void LinkeditOptimizer<P>::copyLocalSymbols(uint8_t* newLinkEditContent, SortedStringPool<P>& stringPool, uint32_t& offset, uint32_t& symbolIndex,
555 bool redact, std::vector<LocalSymbolInfo>& localSymbolInfos,
556 std::vector<macho_nlist<P>>& unmappedLocalSymbols, SortedStringPool<P>& localSymbolsStringPool)
557 {
558 localSymbolInfos.push_back(LocalSymbolInfo());
559
560 LocalSymbolInfo& localInfo = localSymbolInfos.back();
561 localInfo.dylibOffset = (uint32_t)(((uint8_t*)_mh) - (uint8_t*)_containerBuffer);
562 localInfo.nlistStartIndex = (uint32_t)unmappedLocalSymbols.size();
563 localInfo.nlistCount = 0;
564 _newLocalSymbolsStartIndex = symbolIndex;
565 _newLocalSymbolCount = 0;
566
567 switch (_stripMode) {
568 case CacheBuilder::DylibStripMode::stripNone:
569 case CacheBuilder::DylibStripMode::stripExports:
570 break;
571 case CacheBuilder::DylibStripMode::stripLocals:
572 case CacheBuilder::DylibStripMode::stripAll:
573 return;
574 }
575
576 if ( _dynSymTabCmd == nullptr )
577 return;
578
579 const char* strings = (char*)&_linkeditBias[_symTabCmd->stroff()];
580 const macho_nlist<P>* const symbolTable = (macho_nlist<P>*)(&_linkeditBias[_symTabCmd->symoff()]);
581 const macho_nlist<P>* const firstExport = &symbolTable[_dynSymTabCmd->ilocalsym()];
582 const macho_nlist<P>* const lastExport = &symbolTable[_dynSymTabCmd->ilocalsym()+_dynSymTabCmd->nlocalsym()];
583 for (const macho_nlist<P>* entry = firstExport; entry < lastExport; ++entry) {
584 if ( (entry->n_type() & N_TYPE) != N_SECT)
585 continue;
586 if ( (entry->n_type() & N_STAB) != 0)
587 continue;
588 const char* name = &strings[entry->n_strx()];
589 macho_nlist<P>* newSymbolEntry = (macho_nlist<P>*)&newLinkEditContent[offset];
590 *newSymbolEntry = *entry;
591 if ( redact ) {
592 // if removing local symbols, change __text symbols to "<redacted>" so backtraces don't have bogus names
593 if ( entry->n_sect() == 1 ) {
594 stringPool.add(symbolIndex, "<redacted>");
595 ++symbolIndex;
596 offset += sizeof(macho_nlist<P>);
597 }
598 // copy local symbol to unmmapped locals area
599 localSymbolsStringPool.add((uint32_t)unmappedLocalSymbols.size(), name);
600 unmappedLocalSymbols.push_back(*entry);
601 unmappedLocalSymbols.back().set_n_strx(0);
602 }
603 else {
604 stringPool.add(symbolIndex, name);
605 ++symbolIndex;
606 offset += sizeof(macho_nlist<P>);
607 }
608 }
609 _newLocalSymbolCount = symbolIndex - _newLocalSymbolsStartIndex;
610 localInfo.nlistCount = (uint32_t)unmappedLocalSymbols.size() - localInfo.nlistStartIndex;
611 }
612
613
614 template <typename P>
615 void LinkeditOptimizer<P>::copyExportedSymbols(uint8_t* newLinkEditContent, SortedStringPool<P>& stringPool, uint32_t& offset, uint32_t& symbolIndex)
616 {
617 _newExportedSymbolsStartIndex = symbolIndex;
618 _newExportedSymbolCount = 0;
619
620 switch (_stripMode) {
621 case CacheBuilder::DylibStripMode::stripNone:
622 case CacheBuilder::DylibStripMode::stripLocals:
623 break;
624 case CacheBuilder::DylibStripMode::stripExports:
625 case CacheBuilder::DylibStripMode::stripAll:
626 return;
627 }
628
629 if ( _dynSymTabCmd == nullptr )
630 return;
631
632 const char* strings = (char*)&_linkeditBias[_symTabCmd->stroff()];
633 const macho_nlist<P>* const symbolTable = (macho_nlist<P>*)(&_linkeditBias[_symTabCmd->symoff()]);
634 const macho_nlist<P>* const firstExport = &symbolTable[_dynSymTabCmd->iextdefsym()];
635 const macho_nlist<P>* const lastExport = &symbolTable[_dynSymTabCmd->iextdefsym()+_dynSymTabCmd->nextdefsym()];
636 uint32_t oldSymbolIndex = _dynSymTabCmd->iextdefsym();
637 for (const macho_nlist<P>* entry = firstExport; entry < lastExport; ++entry, ++oldSymbolIndex) {
638 if ( (entry->n_type() & N_TYPE) != N_SECT)
639 continue;
640 const char* name = &strings[entry->n_strx()];
641 if ( strncmp(name, ".objc_", 6) == 0 )
642 continue;
643 if ( strncmp(name, "$ld$", 4) == 0 )
644 continue;
645 macho_nlist<P>* newSymbolEntry = (macho_nlist<P>*)&newLinkEditContent[offset];
646 *newSymbolEntry = *entry;
647 newSymbolEntry->set_n_strx(0);
648 stringPool.add(symbolIndex, name);
649 _oldToNewSymbolIndexes[oldSymbolIndex] = symbolIndex - _newLocalSymbolsStartIndex;
650 ++symbolIndex;
651 offset += sizeof(macho_nlist<P>);
652 }
653 _newExportedSymbolCount = symbolIndex - _newExportedSymbolsStartIndex;
654 }
655
656 template <typename P>
657 void LinkeditOptimizer<P>::copyImportedSymbols(uint8_t* newLinkEditContent, SortedStringPool<P>& stringPool, uint32_t& offset, uint32_t& symbolIndex)
658 {
659 _newImportedSymbolsStartIndex = symbolIndex;
660 _newImportedSymbolCount = 0;
661
662 if ( _dynSymTabCmd == nullptr )
663 return;
664
665 switch (_stripMode) {
666 case CacheBuilder::DylibStripMode::stripNone:
667 break;
668 case CacheBuilder::DylibStripMode::stripLocals:
669 case CacheBuilder::DylibStripMode::stripExports:
670 case CacheBuilder::DylibStripMode::stripAll:
671 return;
672 }
673
674 const char* strings = (char*)&_linkeditBias[_symTabCmd->stroff()];
675 const macho_nlist<P>* const symbolTable = (macho_nlist<P>*)(&_linkeditBias[_symTabCmd->symoff()]);
676 const macho_nlist<P>* const firstImport = &symbolTable[_dynSymTabCmd->iundefsym()];
677 const macho_nlist<P>* const lastImport = &symbolTable[_dynSymTabCmd->iundefsym()+_dynSymTabCmd->nundefsym()];
678 uint32_t oldSymbolIndex = _dynSymTabCmd->iundefsym();
679 for (const macho_nlist<P>* entry = firstImport; entry < lastImport; ++entry, ++oldSymbolIndex) {
680 if ( (entry->n_type() & N_TYPE) != N_UNDF)
681 continue;
682 const char* name = &strings[entry->n_strx()];
683 macho_nlist<P>* newSymbolEntry = (macho_nlist<P>*)&newLinkEditContent[offset];
684 *newSymbolEntry = *entry;
685 newSymbolEntry->set_n_strx(0);
686 stringPool.add(symbolIndex, name);
687 _oldToNewSymbolIndexes[oldSymbolIndex] = symbolIndex - _newLocalSymbolsStartIndex;
688 ++symbolIndex;
689 offset += sizeof(macho_nlist<P>);
690 }
691 _newImportedSymbolCount = symbolIndex - _newImportedSymbolsStartIndex;
692 }
693
694 template <typename P>
695 void LinkeditOptimizer<P>::copyIndirectSymbolTable(uint8_t* newLinkEditContent, uint32_t& offset)
696 {
697 _newIndirectSymbolTableOffset = offset;
698
699 if ( _dynSymTabCmd == nullptr )
700 return;
701
702 const uint32_t* const indirectTable = (uint32_t*)&_linkeditBias[_dynSymTabCmd->indirectsymoff()];
703 uint32_t* newIndirectTable = (uint32_t*)&newLinkEditContent[offset];
704 for (uint32_t i=0; i < _dynSymTabCmd->nindirectsyms(); ++i) {
705 uint32_t symbolIndex = E::get32(indirectTable[i]);
706 if ( (symbolIndex == INDIRECT_SYMBOL_ABS) || (symbolIndex == INDIRECT_SYMBOL_LOCAL) )
707 E::set32(newIndirectTable[i], symbolIndex);
708 else
709 E::set32(newIndirectTable[i], _oldToNewSymbolIndexes[symbolIndex]);
710 offset += sizeof(uint32_t);
711 }
712 }
713
714 template <typename P>
715 void LinkeditOptimizer<P>::mergeLinkedits(CacheBuilder& builder,
716 CacheBuilder::UnmappedRegion* localSymbolsRegion,
717 std::vector<LinkeditOptimizer<P>*>& optimizers)
718 {
719 // allocate space for new linkedit data
720 uint64_t totalUnoptLinkeditsSize = builder._readOnlyRegion.sizeInUse - builder._nonLinkEditReadOnlySize;
721 uint8_t* newLinkEdit = (uint8_t*)calloc(totalUnoptLinkeditsSize, 1);
722 SortedStringPool<P> stringPool;
723 uint32_t offset = 0;
724
725 builder._diagnostics.verbose("Merged LINKEDIT:\n");
726
727 // copy weak binding info
728 uint32_t startWeakBindInfosOffset = offset;
729 for (LinkeditOptimizer<P>* op : optimizers) {
730 // Skip chained fixups as the in-place linked list isn't valid any more
731 const dyld3::MachOFile* mf = (dyld3::MachOFile*)op->machHeader();
732 if (!mf->hasChainedFixups())
733 op->copyWeakBindingInfo(newLinkEdit, offset);
734 }
735 builder._diagnostics.verbose(" weak bindings size: %5uKB\n", (uint32_t)(offset-startWeakBindInfosOffset)/1024);
736
737 // copy export info
738 uint32_t startExportInfosOffset = offset;
739 for (LinkeditOptimizer<P>* op : optimizers) {
740 op->copyExportInfo(newLinkEdit, offset);
741 }
742 builder._diagnostics.verbose(" exports info size: %5uKB\n", (uint32_t)(offset-startExportInfosOffset)/1024);
743
744 // in theory, an optimized cache can drop the binding info
745 if ( true ) {
746 // copy binding info
747 uint32_t startBindingsInfosOffset = offset;
748 for (LinkeditOptimizer<P>* op : optimizers) {
749 // Skip chained fixups as the in-place linked list isn't valid any more
750 const dyld3::MachOFile* mf = (dyld3::MachOFile*)op->machHeader();
751 if (!mf->hasChainedFixups())
752 op->copyBindingInfo(newLinkEdit, offset);
753 }
754 builder._diagnostics.verbose(" bindings size: %5uKB\n", (uint32_t)(offset-startBindingsInfosOffset)/1024);
755
756 // copy lazy binding info
757 uint32_t startLazyBindingsInfosOffset = offset;
758 for (LinkeditOptimizer<P>* op : optimizers) {
759 // Skip chained fixups as the in-place linked list isn't valid any more
760 const dyld3::MachOFile* mf = (dyld3::MachOFile*)op->machHeader();
761 if (!mf->hasChainedFixups())
762 op->copyLazyBindingInfo(newLinkEdit, offset);
763 }
764 builder._diagnostics.verbose(" lazy bindings size: %5uKB\n", (offset-startLazyBindingsInfosOffset)/1024);
765 }
766
767 bool unmapLocals = ( builder._options.localSymbolMode == DyldSharedCache::LocalSymbolsMode::unmap );
768
769 // copy symbol table entries
770 std::vector<macho_nlist<P>> unmappedLocalSymbols;
771 if ( unmapLocals )
772 unmappedLocalSymbols.reserve(0x01000000);
773 std::vector<LocalSymbolInfo> localSymbolInfos;
774 localSymbolInfos.reserve(optimizers.size());
775 SortedStringPool<P> localSymbolsStringPool;
776 uint32_t symbolIndex = 0;
777 const uint32_t sharedSymbolTableStartOffset = offset;
778 uint32_t sharedSymbolTableExportsCount = 0;
779 uint32_t sharedSymbolTableImportsCount = 0;
780 for (LinkeditOptimizer<P>* op : optimizers) {
781 op->copyLocalSymbols(newLinkEdit, stringPool, offset, symbolIndex, unmapLocals,
782 localSymbolInfos, unmappedLocalSymbols, localSymbolsStringPool);
783 uint32_t x = symbolIndex;
784 op->copyExportedSymbols(newLinkEdit, stringPool, offset, symbolIndex);
785 sharedSymbolTableExportsCount += (symbolIndex-x);
786 uint32_t y = symbolIndex;
787 op->copyImportedSymbols(newLinkEdit, stringPool, offset, symbolIndex);
788 sharedSymbolTableImportsCount += (symbolIndex-y);
789 }
790 uint32_t sharedSymbolTableCount = symbolIndex;
791 const uint32_t sharedSymbolTableEndOffset = offset;
792
793 // copy function starts
794 uint32_t startFunctionStartsOffset = offset;
795 for (LinkeditOptimizer<P>* op : optimizers) {
796 op->copyFunctionStarts(newLinkEdit, offset);
797 }
798 builder._diagnostics.verbose(" function starts size: %5uKB\n", (offset-startFunctionStartsOffset)/1024);
799
800 // copy data-in-code info
801 uint32_t startDataInCodeOffset = offset;
802 for (LinkeditOptimizer<P>* op : optimizers) {
803 op->copyDataInCode(newLinkEdit, offset);
804 }
805 builder._diagnostics.verbose(" data in code size: %5uKB\n", (offset-startDataInCodeOffset)/1024);
806
807 // copy indirect symbol tables
808 for (LinkeditOptimizer<P>* op : optimizers) {
809 op->copyIndirectSymbolTable(newLinkEdit, offset);
810 }
811 // if indirect table has odd number of entries, end will not be 8-byte aligned
812 if ( (offset % sizeof(typename P::uint_t)) != 0 )
813 offset += 4;
814
815 // copy string pool
816 uint32_t sharedSymbolStringsOffset = offset;
817 uint32_t sharedSymbolStringsSize = stringPool.copyPoolAndUpdateOffsets((char*)&newLinkEdit[sharedSymbolStringsOffset], (macho_nlist<P>*)&newLinkEdit[sharedSymbolTableStartOffset]);
818 offset += sharedSymbolStringsSize;
819 uint32_t newLinkeditUnalignedSize = offset;
820 uint64_t newLinkeditAlignedSize = align(offset, 14);
821 builder._diagnostics.verbose(" symbol table size: %5uKB (%d exports, %d imports)\n", (sharedSymbolTableEndOffset-sharedSymbolTableStartOffset)/1024, sharedSymbolTableExportsCount, sharedSymbolTableImportsCount);
822 builder._diagnostics.verbose(" symbol string pool size: %5uKB\n", sharedSymbolStringsSize/1024);
823
824 // overwrite mapped LINKEDIT area in cache with new merged LINKEDIT content
825 builder._diagnostics.verbose("LINKEDITS optimized from %uMB to %uMB\n", (uint32_t)totalUnoptLinkeditsSize/(1024*1024), (uint32_t)newLinkeditUnalignedSize/(1024*1024));
826 ::memcpy(builder._readOnlyRegion.buffer+builder._nonLinkEditReadOnlySize, newLinkEdit, newLinkeditAlignedSize);
827 ::free(newLinkEdit);
828 builder._readOnlyRegion.sizeInUse = builder._nonLinkEditReadOnlySize + newLinkeditAlignedSize;
829
830 // overwrite end of un-opt linkedits to create a new unmapped region for local symbols
831 if ( unmapLocals ) {
832 const uint32_t entriesOffset = sizeof(dyld_cache_local_symbols_info);
833 const uint32_t entriesCount = (uint32_t)localSymbolInfos.size();
834 const uint32_t nlistOffset = (uint32_t)align(entriesOffset + entriesCount * sizeof(dyld_cache_local_symbols_info), 4); // 16-byte align start
835 const uint32_t nlistCount = (uint32_t)unmappedLocalSymbols.size();
836 const uint32_t stringsSize = (uint32_t)localSymbolsStringPool.size();
837 const uint32_t stringsOffset = nlistOffset + nlistCount * sizeof(macho_nlist<P>);
838 // allocate buffer for local symbols
839 const size_t localsBufferSize = align(stringsOffset + stringsSize, 14);
840 vm_address_t localsBuffer;
841 if ( ::vm_allocate(mach_task_self(), &localsBuffer, localsBufferSize, VM_FLAGS_ANYWHERE) == 0 ) {
842 dyld_cache_local_symbols_info* infoHeader = (dyld_cache_local_symbols_info*)localsBuffer;
843 // fill in header info
844 infoHeader->nlistOffset = nlistOffset;
845 infoHeader->nlistCount = nlistCount;
846 infoHeader->stringsOffset = stringsOffset;
847 infoHeader->stringsSize = stringsSize;
848 infoHeader->entriesOffset = entriesOffset;
849 infoHeader->entriesCount = entriesCount;
850 // copy info for each dylib
851 dyld_cache_local_symbols_entry* entries = (dyld_cache_local_symbols_entry*)(((uint8_t*)infoHeader)+entriesOffset);
852 for (uint32_t i=0; i < entriesCount; ++i) {
853 entries[i].dylibOffset = localSymbolInfos[i].dylibOffset;
854 entries[i].nlistStartIndex = localSymbolInfos[i].nlistStartIndex;
855 entries[i].nlistCount = localSymbolInfos[i].nlistCount;
856 }
857 // copy nlists
858 macho_nlist<P>* newLocalsSymbolTable = (macho_nlist<P>*)(localsBuffer+nlistOffset);
859 ::memcpy(newLocalsSymbolTable, &unmappedLocalSymbols[0], nlistCount*sizeof(macho_nlist<P>));
860 // copy string pool
861 localSymbolsStringPool.copyPoolAndUpdateOffsets(((char*)infoHeader)+stringsOffset, newLocalsSymbolTable);
862 // return buffer of local symbols, caller to free() it
863 localSymbolsRegion->buffer = (uint8_t*)localsBuffer;
864 localSymbolsRegion->bufferSize = localsBufferSize;
865 localSymbolsRegion->sizeInUse = localsBufferSize;
866 }
867 else {
868 builder._diagnostics.warning("could not allocate local symbols");
869 }
870 }
871
872 // update all load commands to new merged layout
873 uint64_t linkeditsUnslidStartAddr = builder._readOnlyRegion.unslidLoadAddress + builder._nonLinkEditReadOnlySize;
874 uint32_t linkeditsCacheFileOffset = (uint32_t)(builder._readOnlyRegion.cacheFileOffset + builder._nonLinkEditReadOnlySize);
875 for (LinkeditOptimizer<P>* op : optimizers) {
876 op->updateLoadCommands(linkeditsCacheFileOffset, linkeditsUnslidStartAddr, newLinkeditUnalignedSize,
877 sharedSymbolTableStartOffset, sharedSymbolTableCount,
878 sharedSymbolStringsOffset, sharedSymbolStringsSize);
879 }
880 }
881
882
883 template <typename P>
884 void LinkeditOptimizer<P>::optimizeLinkedit(CacheBuilder& builder, const void* containerBuffer,
885 CacheBuilder::UnmappedRegion* localSymbolsRegion,
886 const std::vector<std::tuple<const mach_header*, const char*, DylibStripMode>>& images)
887 {
888 // construct a LinkeditOptimizer for each image
889 __block std::vector<LinkeditOptimizer<P>*> optimizers;
890 for (std::tuple<const mach_header*, const char*, DylibStripMode> image : images) {
891 optimizers.push_back(new LinkeditOptimizer<P>(containerBuffer, (macho_header<P>*)std::get<0>(image), std::get<1>(image), builder._diagnostics));
892 optimizers.back()->setStripMode(std::get<2>(image));
893 }
894 #if 0
895 // add optimizer for each branch pool
896 for (uint64_t poolOffset : branchPoolOffsets) {
897 macho_header<P>* mh = (macho_header<P>*)((char*)cache + poolOffset);
898 optimizers.push_back(new LinkeditOptimizer<P>(cache, mh, diag));
899 }
900 #endif
901 // merge linkedit info
902 mergeLinkedits(builder, localSymbolsRegion, optimizers);
903
904 // delete optimizers
905 for (LinkeditOptimizer<P>* op : optimizers)
906 delete op;
907 }
908
909 void CacheBuilder::optimizeLinkedit(UnmappedRegion* localSymbolsRegion,
910 const std::vector<std::tuple<const mach_header*, const char*, DylibStripMode>>& images)
911 {
912 const void* buffer = (const void*)_fullAllocatedBuffer;
913 if ( _is64 ) {
914 return LinkeditOptimizer<Pointer64<LittleEndian>>::optimizeLinkedit(*this, buffer,
915 localSymbolsRegion, images);
916 }
917 else {
918 return LinkeditOptimizer<Pointer32<LittleEndian>>::optimizeLinkedit(*this, buffer,
919 localSymbolsRegion, images);
920 }
921 }
922
923
924