dyld-640.2.tar.gz
[apple/dyld.git] / launch-cache / dsc_extractor.cpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <sys/stat.h>
29 #include <string.h>
30 #include <fcntl.h>
31 #include <stdlib.h>
32 #include <errno.h>
33 #include <sys/mman.h>
34 #include <sys/syslimits.h>
35 #include <libkern/OSByteOrder.h>
36 #include <mach-o/fat.h>
37 #include <mach-o/arch.h>
38 #include <mach-o/loader.h>
39 #include <Availability.h>
40
41 #include "CodeSigningTypes.h"
42 #include <CommonCrypto/CommonHMAC.h>
43 #include <CommonCrypto/CommonDigest.h>
44 #include <CommonCrypto/CommonDigestSPI.h>
45
46 #define NO_ULEB
47 #include "Architectures.hpp"
48 #include "MachOFileAbstraction.hpp"
49 #include "CacheFileAbstraction.hpp"
50
51 #include "dsc_iterator.h"
52 #include "dsc_extractor.h"
53 #include "MachOTrie.hpp"
54 #include "SupportedArchs.h"
55 #include "DyldSharedCache.h"
56
57 #include <vector>
58 #include <set>
59 #include <map>
60 #include <unordered_map>
61 #include <algorithm>
62 #include <dispatch/dispatch.h>
63
64 struct seg_info
65 {
66 seg_info(const char* n, uint64_t o, uint64_t s)
67 : segName(n), offset(o), sizem(s) { }
68 const char* segName;
69 uint64_t offset;
70 uint64_t sizem;
71 };
72
73 class CStringHash {
74 public:
75 size_t operator()(const char* __s) const {
76 size_t __h = 0;
77 for ( ; *__s; ++__s)
78 __h = 5 * __h + *__s;
79 return __h;
80 };
81 };
82 class CStringEquals {
83 public:
84 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
85 };
86 typedef std::unordered_map<const char*, std::vector<seg_info>, CStringHash, CStringEquals> NameToSegments;
87
88 // Filter to find individual symbol re-exports in trie
89 class NotReExportSymbol {
90 public:
91 NotReExportSymbol(const std::set<int> &rd) :_reexportDeps(rd) {}
92 bool operator()(const mach_o::trie::Entry &entry) const {
93 bool result = isSymbolReExport(entry);
94 if (result) {
95 // <rdar://problem/17671438> Xcode 6 leaks in dyld_shared_cache_extract_dylibs
96 ::free((void*)entry.name);
97 const_cast<mach_o::trie::Entry*>(&entry)->name = NULL;
98 }
99 return result;
100 }
101 private:
102 bool isSymbolReExport(const mach_o::trie::Entry &entry) const {
103 if ( (entry.flags & EXPORT_SYMBOL_FLAGS_KIND_MASK) != EXPORT_SYMBOL_FLAGS_KIND_REGULAR )
104 return true;
105 if ( (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) == 0 )
106 return true;
107 // If the symbol comes from a dylib that is re-exported, this is not an individual symbol re-export
108 if ( _reexportDeps.count((int)entry.other) != 0 )
109 return true;
110 return false;
111 }
112 const std::set<int> &_reexportDeps;
113 };
114
115 template <typename P>
116 struct LoadCommandInfo {
117 };
118
119 template <typename A>
120 class LinkeditOptimizer {
121 typedef typename A::P P;
122 typedef typename A::P::E E;
123 typedef typename A::P::uint_t pint_t;
124
125 private:
126 macho_segment_command<P>* linkEditSegCmd = NULL;
127 macho_symtab_command<P>* symtab = NULL;
128 macho_dysymtab_command<P>* dynamicSymTab = NULL;
129 macho_linkedit_data_command<P>* functionStarts = NULL;
130 macho_linkedit_data_command<P>* dataInCode = NULL;
131 uint32_t exportsTrieOffset = 0;
132 uint32_t exportsTrieSize = 0;
133 std::set<int> reexportDeps;
134
135 public:
136
137 void optimize_loadcommands(macho_header<typename A::P>* mh)
138 {
139 typedef typename A::P P;
140 typedef typename A::P::E E;
141 typedef typename A::P::uint_t pint_t;
142
143 // update header flags
144 mh->set_flags(mh->flags() & 0x7FFFFFFF); // remove in-cache bit
145
146 // update load commands
147 uint64_t cumulativeFileSize = 0;
148 const unsigned origLoadCommandsSize = mh->sizeofcmds();
149 unsigned bytesRemaining = origLoadCommandsSize;
150 unsigned removedCount = 0;
151 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
152 const uint32_t cmdCount = mh->ncmds();
153 const macho_load_command<P>* cmd = cmds;
154 int depIndex = 0;
155 for (uint32_t i = 0; i < cmdCount; ++i) {
156 bool remove = false;
157 switch ( cmd->cmd() ) {
158 case macho_segment_command<P>::CMD:
159 {
160 // update segment/section file offsets
161 macho_segment_command<P>* segCmd = (macho_segment_command<P>*)cmd;
162 segCmd->set_fileoff(cumulativeFileSize);
163 segCmd->set_filesize(segCmd->vmsize());
164 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
165 macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
166 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
167 if ( sect->offset() != 0 )
168 sect->set_offset((uint32_t)(cumulativeFileSize+sect->addr()-segCmd->vmaddr()));
169 }
170 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
171 linkEditSegCmd = segCmd;
172 }
173 cumulativeFileSize += segCmd->filesize();
174 break;
175 }
176 case LC_DYLD_INFO_ONLY:
177 {
178 // zero out all dyld info
179 macho_dyld_info_command<P>* dyldInfo = (macho_dyld_info_command<P>*)cmd;
180 exportsTrieOffset = dyldInfo->export_off();
181 exportsTrieSize = dyldInfo->export_size();
182 dyldInfo->set_rebase_off(0);
183 dyldInfo->set_rebase_size(0);
184 dyldInfo->set_bind_off(0);
185 dyldInfo->set_bind_size(0);
186 dyldInfo->set_weak_bind_off(0);
187 dyldInfo->set_weak_bind_size(0);
188 dyldInfo->set_lazy_bind_off(0);
189 dyldInfo->set_lazy_bind_size(0);
190 dyldInfo->set_export_off(0);
191 dyldInfo->set_export_size(0);
192 }
193 break;
194 case LC_SYMTAB:
195 symtab = (macho_symtab_command<P>*)cmd;
196 break;
197 case LC_DYSYMTAB:
198 dynamicSymTab = (macho_dysymtab_command<P>*)cmd;
199 break;
200 case LC_FUNCTION_STARTS:
201 functionStarts = (macho_linkedit_data_command<P>*)cmd;
202 break;
203 case LC_DATA_IN_CODE:
204 dataInCode = (macho_linkedit_data_command<P>*)cmd;
205 break;
206 case LC_LOAD_DYLIB:
207 case LC_LOAD_WEAK_DYLIB:
208 case LC_REEXPORT_DYLIB:
209 case LC_LOAD_UPWARD_DYLIB:
210 ++depIndex;
211 if ( cmd->cmd() == LC_REEXPORT_DYLIB ) {
212 reexportDeps.insert(depIndex);
213 }
214 break;
215 case LC_SEGMENT_SPLIT_INFO:
216 // <rdar://problem/23212513> dylibs iOS 9 dyld caches have bogus LC_SEGMENT_SPLIT_INFO
217 remove = true;
218 break;
219 }
220 uint32_t cmdSize = cmd->cmdsize();
221 macho_load_command<P>* nextCmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmdSize);
222 if ( remove ) {
223 ::memmove((void*)cmd, (void*)nextCmd, bytesRemaining);
224 ++removedCount;
225 }
226 else {
227 bytesRemaining -= cmdSize;
228 cmd = nextCmd;
229 }
230 }
231 // zero out stuff removed
232 ::bzero((void*)cmd, bytesRemaining);
233 // update header
234 mh->set_ncmds(cmdCount - removedCount);
235 mh->set_sizeofcmds(origLoadCommandsSize - bytesRemaining);
236 }
237
238 int optimize_linkedit(std::vector<uint8_t> &new_linkedit_data, uint64_t textOffsetInCache, const void* mapped_cache)
239 {
240 typedef typename A::P P;
241 typedef typename A::P::E E;
242 typedef typename A::P::uint_t pint_t;
243
244 // rebuild symbol table
245 if ( linkEditSegCmd == NULL ) {
246 fprintf(stderr, "__LINKEDIT not found\n");
247 return -1;
248 }
249 if ( symtab == NULL ) {
250 fprintf(stderr, "LC_SYMTAB not found\n");
251 return -1;
252 }
253 if ( dynamicSymTab == NULL ) {
254 fprintf(stderr, "LC_DYSYMTAB not found\n");
255 return -1;
256 }
257
258 const uint64_t newFunctionStartsOffset = new_linkedit_data.size();
259 uint32_t functionStartsSize = 0;
260 if ( functionStarts != NULL ) {
261 // copy function starts from original cache file to new mapped dylib file
262 functionStartsSize = functionStarts->datasize();
263 new_linkedit_data.insert(new_linkedit_data.end(),
264 (char*)mapped_cache + functionStarts->dataoff(),
265 (char*)mapped_cache + functionStarts->dataoff() + functionStartsSize);
266 }
267
268 // pointer align
269 while ((linkEditSegCmd->fileoff() + new_linkedit_data.size()) % sizeof(pint_t))
270 new_linkedit_data.push_back(0);
271
272 const uint64_t newDataInCodeOffset = new_linkedit_data.size();
273 uint32_t dataInCodeSize = 0;
274 if ( dataInCode != NULL ) {
275 // copy data-in-code info from original cache file to new mapped dylib file
276 dataInCodeSize = dataInCode->datasize();
277 new_linkedit_data.insert(new_linkedit_data.end(),
278 (char*)mapped_cache + dataInCode->dataoff(),
279 (char*)mapped_cache + dataInCode->dataoff() + dataInCodeSize);
280 }
281
282 std::vector<mach_o::trie::Entry> exports;
283 if ( exportsTrieSize != 0 ) {
284 const uint8_t* exportsStart = ((uint8_t*)mapped_cache) + exportsTrieOffset;
285 const uint8_t* exportsEnd = &exportsStart[exportsTrieSize];
286 mach_o::trie::parseTrie(exportsStart, exportsEnd, exports);
287 exports.erase(std::remove_if(exports.begin(), exports.end(), NotReExportSymbol(reexportDeps)), exports.end());
288 }
289
290 // look for local symbol info in unmapped part of shared cache
291 dyldCacheHeader<E>* header = (dyldCacheHeader<E>*)mapped_cache;
292 macho_nlist<P>* localNlists = NULL;
293 uint32_t localNlistCount = 0;
294 const char* localStrings = NULL;
295 const char* localStringsEnd = NULL;
296 if ( header->mappingOffset() > offsetof(dyld_cache_header,localSymbolsSize) ) {
297 dyldCacheLocalSymbolsInfo<E>* localInfo = (dyldCacheLocalSymbolsInfo<E>*)(((uint8_t*)mapped_cache) + header->localSymbolsOffset());
298 dyldCacheLocalSymbolEntry<E>* entries = (dyldCacheLocalSymbolEntry<E>*)(((uint8_t*)mapped_cache) + header->localSymbolsOffset() + localInfo->entriesOffset());
299 macho_nlist<P>* allLocalNlists = (macho_nlist<P>*)(((uint8_t*)localInfo) + localInfo->nlistOffset());
300 const uint32_t entriesCount = localInfo->entriesCount();
301 for (uint32_t i=0; i < entriesCount; ++i) {
302 if ( entries[i].dylibOffset() == textOffsetInCache ) {
303 uint32_t localNlistStart = entries[i].nlistStartIndex();
304 localNlistCount = entries[i].nlistCount();
305 localNlists = &allLocalNlists[localNlistStart];
306 localStrings = ((char*)localInfo) + localInfo->stringsOffset();
307 localStringsEnd = &localStrings[localInfo->stringsSize()];
308 break;
309 }
310 }
311 }
312 // compute number of symbols in new symbol table
313 const macho_nlist<P>* const mergedSymTabStart = (macho_nlist<P>*)(((uint8_t*)mapped_cache) + symtab->symoff());
314 const macho_nlist<P>* const mergedSymTabend = &mergedSymTabStart[symtab->nsyms()];
315 uint32_t newSymCount = symtab->nsyms();
316 if ( localNlists != NULL ) {
317 newSymCount = localNlistCount;
318 for (const macho_nlist<P>* s = mergedSymTabStart; s != mergedSymTabend; ++s) {
319 // skip any locals in cache
320 if ( (s->n_type() & (N_TYPE|N_EXT)) == N_SECT )
321 continue;
322 ++newSymCount;
323 }
324 }
325
326 // add room for N_INDR symbols for re-exported symbols
327 newSymCount += exports.size();
328
329 // copy symbol entries and strings from original cache file to new mapped dylib file
330 const char* mergedStringPoolStart = (char*)mapped_cache + symtab->stroff();
331 const char* mergedStringPoolEnd = &mergedStringPoolStart[symtab->strsize()];
332
333 // First count how many entries we need
334 std::vector<macho_nlist<P>> newSymTab;
335 newSymTab.reserve(newSymCount);
336 std::vector<char> newSymNames;
337
338 // first pool entry is always empty string
339 newSymNames.push_back('\0');
340
341 for (const macho_nlist<P>* s = mergedSymTabStart; s != mergedSymTabend; ++s) {
342 // if we have better local symbol info, skip any locals here
343 if ( (localNlists != NULL) && ((s->n_type() & (N_TYPE|N_EXT)) == N_SECT) )
344 continue;
345 macho_nlist<P> t = *s;
346 t.set_n_strx((uint32_t)newSymNames.size());
347 const char* symName = &mergedStringPoolStart[s->n_strx()];
348 if ( symName > mergedStringPoolEnd )
349 symName = "<corrupt symbol name>";
350 newSymNames.insert(newSymNames.end(),
351 symName,
352 symName + (strlen(symName) + 1));
353 newSymTab.push_back(t);
354 }
355 // <rdar://problem/16529213> recreate N_INDR symbols in extracted dylibs for debugger
356 for (std::vector<mach_o::trie::Entry>::iterator it = exports.begin(); it != exports.end(); ++it) {
357 macho_nlist<P> t;
358 memset(&t, 0, sizeof(t));
359 t.set_n_strx((uint32_t)newSymNames.size());
360 t.set_n_type(N_INDR | N_EXT);
361 t.set_n_sect(0);
362 t.set_n_desc(0);
363 newSymNames.insert(newSymNames.end(),
364 it->name,
365 it->name + (strlen(it->name) + 1));
366 const char* importName = it->importName;
367 if ( *importName == '\0' )
368 importName = it->name;
369 t.set_n_value(newSymNames.size());
370 newSymNames.insert(newSymNames.end(),
371 importName,
372 importName + (strlen(importName) + 1));
373 newSymTab.push_back(t);
374 }
375 if ( localNlists != NULL ) {
376 // update load command to reflect new count of locals
377 dynamicSymTab->set_ilocalsym((uint32_t)newSymTab.size());
378 dynamicSymTab->set_nlocalsym(localNlistCount);
379 // copy local symbols
380 for (uint32_t i=0; i < localNlistCount; ++i) {
381 const char* localName = &localStrings[localNlists[i].n_strx()];
382 if ( localName > localStringsEnd )
383 localName = "<corrupt local symbol name>";
384 macho_nlist<P> t = localNlists[i];
385 t.set_n_strx((uint32_t)newSymNames.size());
386 newSymNames.insert(newSymNames.end(),
387 localName,
388 localName + (strlen(localName) + 1));
389 newSymTab.push_back(t);
390 }
391 }
392
393 if ( newSymCount != newSymTab.size() ) {
394 fprintf(stderr, "symbol count miscalculation\n");
395 return -1;
396 }
397
398 //const uint64_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
399 //macho_nlist<P>* const newSymTabStart = (macho_nlist<P>*)(((uint8_t*)mh) + newSymTabOffset);
400 //char* const newStringPoolStart = (char*)mh + newStringPoolOffset;
401
402 // pointer align
403 while ((linkEditSegCmd->fileoff() + new_linkedit_data.size()) % sizeof(pint_t))
404 new_linkedit_data.push_back(0);
405
406 const uint64_t newSymTabOffset = new_linkedit_data.size();
407
408 // Copy sym tab
409 for (macho_nlist<P>& sym : newSymTab) {
410 uint8_t symData[sizeof(macho_nlist<P>)];
411 memcpy(&symData, &sym, sizeof(sym));
412 new_linkedit_data.insert(new_linkedit_data.end(), &symData[0], &symData[sizeof(macho_nlist<P>)]);
413 }
414
415 const uint64_t newIndSymTabOffset = new_linkedit_data.size();
416
417 // Copy indirect symbol table
418 const uint32_t* mergedIndSymTab = (uint32_t*)((char*)mapped_cache + dynamicSymTab->indirectsymoff());
419 new_linkedit_data.insert(new_linkedit_data.end(),
420 (char*)mergedIndSymTab,
421 (char*)(mergedIndSymTab + dynamicSymTab->nindirectsyms()));
422
423 const uint64_t newStringPoolOffset = new_linkedit_data.size();
424
425 // pointer align string pool size
426 while (newSymNames.size() % sizeof(pint_t))
427 newSymNames.push_back('\0');
428
429 new_linkedit_data.insert(new_linkedit_data.end(), newSymNames.begin(), newSymNames.end());
430
431 // update load commands
432 if ( functionStarts != NULL ) {
433 functionStarts->set_dataoff((uint32_t)(newFunctionStartsOffset + linkEditSegCmd->fileoff()));
434 functionStarts->set_datasize(functionStartsSize);
435 }
436 if ( dataInCode != NULL ) {
437 dataInCode->set_dataoff((uint32_t)(newDataInCodeOffset + linkEditSegCmd->fileoff()));
438 dataInCode->set_datasize(dataInCodeSize);
439 }
440
441 symtab->set_nsyms(newSymCount);
442 symtab->set_symoff((uint32_t)(newSymTabOffset + linkEditSegCmd->fileoff()));
443 symtab->set_stroff((uint32_t)(newStringPoolOffset + linkEditSegCmd->fileoff()));
444 symtab->set_strsize((uint32_t)newSymNames.size());
445 dynamicSymTab->set_extreloff(0);
446 dynamicSymTab->set_nextrel(0);
447 dynamicSymTab->set_locreloff(0);
448 dynamicSymTab->set_nlocrel(0);
449 dynamicSymTab->set_indirectsymoff((uint32_t)(newIndSymTabOffset + linkEditSegCmd->fileoff()));
450 linkEditSegCmd->set_filesize(symtab->stroff()+symtab->strsize() - linkEditSegCmd->fileoff());
451 linkEditSegCmd->set_vmsize( (linkEditSegCmd->filesize()+4095) & (-4096) );
452
453 // <rdar://problem/17671438> Xcode 6 leaks in dyld_shared_cache_extract_dylibs
454 for (std::vector<mach_o::trie::Entry>::iterator it = exports.begin(); it != exports.end(); ++it) {
455 ::free((void*)(it->name));
456 }
457
458
459 return 0;
460 }
461
462 };
463
464 static void make_dirs(const char* file_path)
465 {
466 //printf("make_dirs(%s)\n", file_path);
467 char dirs[strlen(file_path)+1];
468 strcpy(dirs, file_path);
469 char* lastSlash = strrchr(dirs, '/');
470 if ( lastSlash == NULL )
471 return;
472 lastSlash[1] = '\0';
473 struct stat stat_buf;
474 if ( stat(dirs, &stat_buf) != 0 ) {
475 char* afterSlash = &dirs[1];
476 char* slash;
477 while ( (slash = strchr(afterSlash, '/')) != NULL ) {
478 *slash = '\0';
479 ::mkdir(dirs, S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH);
480 //printf("mkdir(%s)\n", dirs);
481 *slash = '/';
482 afterSlash = slash+1;
483 }
484 }
485 }
486
487
488
489 template <typename A>
490 size_t dylib_maker(const void* mapped_cache, std::vector<uint8_t> &dylib_data, const std::vector<seg_info>& segments) {
491 typedef typename A::P P;
492
493 int32_t nfat_archs = 0;
494 uint32_t offsetInFatFile = 4096;
495 uint8_t *base_ptr = &dylib_data.front();
496
497 #define FH reinterpret_cast<fat_header*>(base_ptr)
498 #define FA reinterpret_cast<fat_arch*>(base_ptr + (8 + (nfat_archs - 1) * sizeof(fat_arch)))
499
500 if(dylib_data.size() >= 4096 && OSSwapBigToHostInt32(FH->magic) == FAT_MAGIC) {
501 // have fat header, append new arch to end
502 nfat_archs = OSSwapBigToHostInt32(FH->nfat_arch);
503 offsetInFatFile = OSSwapBigToHostInt32(FA->offset) + OSSwapBigToHostInt32(FA->size);
504 }
505
506 // First see if this slice already exists.
507 for(std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
508 if(strcmp(it->segName, "__TEXT") == 0 ) {
509 const macho_header<P> *textMH = reinterpret_cast<macho_header<P>*>((uint8_t*)mapped_cache+it->offset);
510
511 // if this cputype/subtype already exist in fat header, then return immediately
512 for(int32_t i=0; i < nfat_archs; ++i) {
513 fat_arch *afa = reinterpret_cast<fat_arch*>(base_ptr+8)+i;
514 if (afa->cputype == (cpu_type_t)OSSwapHostToBigInt32(textMH->cputype()) && afa->cpusubtype == (cpu_type_t)OSSwapHostToBigInt32(textMH->cpusubtype())) {
515 //fprintf(stderr, "arch already exists in fat dylib\n");
516 return offsetInFatFile;
517 }
518 }
519 }
520 }
521
522 if (dylib_data.empty()) {
523 // Reserve space for the fat header.
524 dylib_data.resize(4096);
525 base_ptr = &dylib_data.front();
526 FH->magic = OSSwapHostToBigInt32(FAT_MAGIC);
527 }
528
529 FH->nfat_arch = OSSwapHostToBigInt32(++nfat_archs);
530
531 FA->cputype = 0; // filled in later
532 FA->cpusubtype = 0; // filled in later
533 FA->offset = OSSwapHostToBigInt32(offsetInFatFile);
534 FA->size = 0; // filled in later
535 FA->align = OSSwapHostToBigInt32(12);
536
537 size_t additionalSize = 0;
538 for(std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
539 if ( strcmp(it->segName, "__LINKEDIT") != 0 )
540 additionalSize += it->sizem;
541 }
542
543 std::vector<uint8_t> new_dylib_data;
544 new_dylib_data.reserve(additionalSize);
545
546 // Write regular segments into the buffer
547 uint64_t textOffsetInCache = 0;
548 for( std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
549
550 if(strcmp(it->segName, "__TEXT") == 0 ) {
551 textOffsetInCache = it->offset;
552 const macho_header<P> *textMH = reinterpret_cast<macho_header<P>*>((uint8_t*)mapped_cache+textOffsetInCache);
553 FA->cputype = OSSwapHostToBigInt32(textMH->cputype());
554 FA->cpusubtype = OSSwapHostToBigInt32(textMH->cpusubtype());
555 }
556
557 //printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem);
558 // Copy all but the __LINKEDIT. It will be copied later during the optimizer in to a temporary buffer but it would
559 // not be efficient to copy it all now for each dylib.
560 if (strcmp(it->segName, "__LINKEDIT") == 0 )
561 continue;
562 std::copy(((uint8_t*)mapped_cache)+it->offset, ((uint8_t*)mapped_cache)+it->offset+it->sizem, std::back_inserter(new_dylib_data));
563 }
564
565 // optimize linkedit
566 std::vector<uint8_t> new_linkedit_data;
567 new_linkedit_data.reserve(1 << 20);
568
569 LinkeditOptimizer<A> linkeditOptimizer;
570 macho_header<P>* mh = (macho_header<P>*)&new_dylib_data.front();
571 linkeditOptimizer.optimize_loadcommands(mh);
572 linkeditOptimizer.optimize_linkedit(new_linkedit_data, textOffsetInCache, mapped_cache);
573
574 new_dylib_data.insert(new_dylib_data.end(), new_linkedit_data.begin(), new_linkedit_data.end());
575
576 // Page align file
577 while (new_dylib_data.size() % 4096)
578 new_dylib_data.push_back(0);
579
580 // update fat header with new file size
581 FA->size = OSSwapHostToBigInt32(new_dylib_data.size());
582 #undef FH
583 #undef FA
584 dylib_data.insert(dylib_data.end(), new_dylib_data.begin(), new_dylib_data.end());
585 return offsetInFatFile;
586 }
587
588 typedef __typeof(dylib_maker<x86>) dylib_maker_func;
589 typedef void (^progress_block)(unsigned current, unsigned total);
590
591 class SharedCacheExtractor;
592 struct SharedCacheDylibExtractor {
593 SharedCacheDylibExtractor(const char* name, std::vector<seg_info> segInfo)
594 : name(name), segInfo(segInfo) { }
595
596 void extractCache(SharedCacheExtractor& context);
597
598 const char* name;
599 const std::vector<seg_info> segInfo;
600 int result = 0;
601 };
602
603 struct SharedCacheExtractor {
604 SharedCacheExtractor(const NameToSegments& map,
605 const char* extraction_root_path,
606 dylib_maker_func* dylib_create_func,
607 void* mapped_cache,
608 progress_block progress)
609 : map(map), extraction_root_path(extraction_root_path),
610 dylib_create_func(dylib_create_func), mapped_cache(mapped_cache),
611 progress(progress) {
612
613 extractors.reserve(map.size());
614 for (const std::pair<const char*, std::vector<seg_info>>& it : map)
615 extractors.emplace_back(it.first, it.second);
616
617 // Limit the number of open files. 16 seems to give better performance than higher numbers.
618 sema = dispatch_semaphore_create(16);
619 }
620 int extractCaches();
621
622 static void extractCache(void *ctx, size_t i);
623
624 const NameToSegments& map;
625 std::vector<SharedCacheDylibExtractor> extractors;
626 dispatch_semaphore_t sema;
627 const char* extraction_root_path;
628 dylib_maker_func* dylib_create_func;
629 void* mapped_cache;
630 progress_block progress;
631 std::atomic_int count = { 0 };
632 };
633
634 int SharedCacheExtractor::extractCaches() {
635 dispatch_queue_t process_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0);
636 dispatch_apply_f(map.size(), process_queue,
637 this, extractCache);
638
639 int result = 0;
640 for (const SharedCacheDylibExtractor& extractor : extractors) {
641 if (extractor.result != 0) {
642 result = extractor.result;
643 break;
644 }
645 }
646 return result;
647 }
648
649 void SharedCacheExtractor::extractCache(void *ctx, size_t i) {
650 SharedCacheExtractor& context = *(SharedCacheExtractor*)ctx;
651 dispatch_semaphore_wait(context.sema, DISPATCH_TIME_FOREVER);
652 context.extractors[i].extractCache(context);
653 dispatch_semaphore_signal(context.sema);
654 }
655
656 void SharedCacheDylibExtractor::extractCache(SharedCacheExtractor &context) {
657
658 char dylib_path[PATH_MAX];
659 strcpy(dylib_path, context.extraction_root_path);
660 strcat(dylib_path, "/");
661 strcat(dylib_path, name);
662
663 //printf("%s with %lu segments\n", dylib_path, it->second.size());
664 // make sure all directories in this path exist
665 make_dirs(dylib_path);
666
667 // open file, create if does not already exist
668 int fd = ::open(dylib_path, O_CREAT | O_EXLOCK | O_RDWR, 0644);
669 if ( fd == -1 ) {
670 fprintf(stderr, "can't open or create dylib file %s, errnor=%d\n", dylib_path, errno);
671 result = -1;
672 return;
673 }
674
675 struct stat statbuf;
676 if (fstat(fd, &statbuf)) {
677 fprintf(stderr, "Error: stat failed for dyld file %s, errnor=%d\n", dylib_path, errno);
678 close(fd);
679 result = -1;
680 return;
681 }
682
683 std::vector<uint8_t> vec((size_t)statbuf.st_size);
684 if(pread(fd, &vec.front(), vec.size(), 0) != (long)vec.size()) {
685 fprintf(stderr, "can't read dylib file %s, errnor=%d\n", dylib_path, errno);
686 close(fd);
687 result = -1;
688 return;
689 }
690
691 const size_t offset = context.dylib_create_func(context.mapped_cache, vec, segInfo);
692 context.progress(context.count++, (unsigned)context.map.size());
693
694 if(offset != vec.size()) {
695 //Write out the first page, and everything after offset
696 if( pwrite(fd, &vec.front(), 4096, 0) == -1
697 || pwrite(fd, &vec.front() + offset, vec.size() - offset, offset) == -1) {
698 fprintf(stderr, "error writing, errnor=%d\n", errno);
699 result = -1;
700 }
701 }
702
703 close(fd);
704 }
705
706 static int sharedCacheIsValid(const void* mapped_cache, uint64_t size) {
707 // First check that the size is good.
708 // Note the shared cache may not have a codeSignatureSize value set so we need to first make
709 // sure we have space for the CS_SuperBlob, then later crack that to check for the size of the rest.
710 const DyldSharedCache* dyldSharedCache = (DyldSharedCache*)mapped_cache;
711 uint64_t requiredSizeForCSSuperBlob = dyldSharedCache->header.codeSignatureOffset + sizeof(CS_SuperBlob);
712 const dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)((uint8_t*)mapped_cache + dyldSharedCache->header.mappingOffset);
713 if ( requiredSizeForCSSuperBlob > size ) {
714 fprintf(stderr, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size, requiredSizeForCSSuperBlob);
715 return -1;
716 }
717
718 // Now see if the code signatures are valid as that tells us the pages aren't corrupt.
719 // First find all of the regions of the shared cache we computed cd hashes
720 std::vector<std::pair<uint64_t, uint64_t>> sharedCacheRegions;
721 sharedCacheRegions.emplace_back(std::make_pair(mappings[0].fileOffset, mappings[0].fileOffset + mappings[0].size));
722 sharedCacheRegions.emplace_back(std::make_pair(mappings[1].fileOffset, mappings[1].fileOffset + mappings[1].size));
723 sharedCacheRegions.emplace_back(std::make_pair(mappings[2].fileOffset, mappings[2].fileOffset + mappings[2].size));
724 if (dyldSharedCache->header.localSymbolsSize)
725 sharedCacheRegions.emplace_back(std::make_pair(dyldSharedCache->header.localSymbolsOffset, dyldSharedCache->header.localSymbolsOffset + dyldSharedCache->header.localSymbolsSize));
726 size_t inBbufferSize = 0;
727 for (auto& sharedCacheRegion : sharedCacheRegions)
728 inBbufferSize += (sharedCacheRegion.second - sharedCacheRegion.first);
729 uint32_t slotCountFromRegions = (uint32_t)((inBbufferSize + CS_PAGE_SIZE - 1) / CS_PAGE_SIZE);
730
731 // Now take the cd hash from the cache itself and validate the regions we found.
732 uint8_t* codeSignatureRegion = (uint8_t*)mapped_cache + dyldSharedCache->header.codeSignatureOffset;
733 CS_SuperBlob* sb = reinterpret_cast<CS_SuperBlob*>(codeSignatureRegion);
734 if (sb->magic != htonl(CSMAGIC_EMBEDDED_SIGNATURE)) {
735 fprintf(stderr, "Error: dyld shared cache code signature magic is incorrect.\n");
736 return -1;
737 }
738
739 size_t sbSize = ntohl(sb->length);
740 uint64_t requiredSizeForCS = dyldSharedCache->header.codeSignatureOffset + sbSize;
741 if ( requiredSizeForCS > size ) {
742 fprintf(stderr, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size, requiredSizeForCS);
743 return -1;
744 }
745
746 // Find the offset to the code directory.
747 CS_CodeDirectory* cd = nullptr;
748 for (unsigned i =0; i != sb->count; ++i) {
749 if (ntohl(sb->index[i].type) == CSSLOT_CODEDIRECTORY) {
750 cd = (CS_CodeDirectory*)(codeSignatureRegion + ntohl(sb->index[i].offset));
751 break;
752 }
753 }
754
755 if (!cd) {
756 fprintf(stderr, "Error: dyld shared cache code signature directory is missing.\n");
757 return -1;
758 }
759
760 if ( (uint8_t*)cd > (codeSignatureRegion + sbSize) ) {
761 fprintf(stderr, "Error: dyld shared cache code signature directory is out of bounds.\n");
762 return -1;
763 }
764
765 if ( cd->magic != htonl(CSMAGIC_CODEDIRECTORY) ) {
766 fprintf(stderr, "Error: dyld shared cache code signature directory magic is incorrect.\n");
767 return -1;
768 }
769
770 if ( ntohl(cd->nCodeSlots) < slotCountFromRegions ) {
771 fprintf(stderr, "Error: dyld shared cache code signature directory num slots is incorrect.\n");
772 return -1;
773 }
774
775 uint32_t dscDigestFormat = kCCDigestNone;
776 switch (cd->hashType) {
777 case CS_HASHTYPE_SHA1:
778 dscDigestFormat = kCCDigestSHA1;
779 break;
780 case CS_HASHTYPE_SHA256:
781 dscDigestFormat = kCCDigestSHA256;
782 break;
783 default:
784 break;
785 }
786
787 if (dscDigestFormat != kCCDigestNone) {
788 const uint64_t csPageSize = 1 << cd->pageSize;
789 size_t hashOffset = ntohl(cd->hashOffset);
790 uint8_t* hashSlot = (uint8_t*)cd + hashOffset;
791 uint8_t cdHashBuffer[cd->hashSize];
792
793 // Skip local symbols for now as those aren't being codesign correctly right now.
794 size_t inBbufferSize = 0;
795 for (auto& sharedCacheRegion : sharedCacheRegions) {
796 if (sharedCacheRegion.first == dyldSharedCache->header.localSymbolsOffset)
797 continue;
798 inBbufferSize += (sharedCacheRegion.second - sharedCacheRegion.first);
799 }
800 uint32_t slotCountToProcess = (uint32_t)((inBbufferSize + CS_PAGE_SIZE - 1) / CS_PAGE_SIZE);
801
802 for (unsigned i = 0; i != slotCountToProcess; ++i) {
803 // Skip data pages as those may have been slid by ASLR in the extracted file
804 uint64_t fileOffset = i * csPageSize;
805 if ( (fileOffset >= mappings[1].fileOffset) && (fileOffset < (mappings[1].fileOffset + mappings[1].size)) )
806 continue;
807
808 CCDigest(dscDigestFormat, (uint8_t*)mapped_cache + fileOffset, csPageSize, cdHashBuffer);
809 uint8_t* cacheCdHashBuffer = hashSlot + (i * cd->hashSize);
810 if (memcmp(cdHashBuffer, cacheCdHashBuffer, cd->hashSize) != 0) {
811 fprintf(stderr, "Error: dyld shared cache code signature for page %d is incorrect.\n", i);
812 return -1;
813 }
814 }
815 }
816 return 0;
817 }
818
819 int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path, const char* extraction_root_path,
820 progress_block progress)
821 {
822 struct stat statbuf;
823 if (stat(shared_cache_file_path, &statbuf)) {
824 fprintf(stderr, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path);
825 return -1;
826 }
827
828 int cache_fd = open(shared_cache_file_path, O_RDONLY);
829 if (cache_fd < 0) {
830 fprintf(stderr, "Error: failed to open shared cache file at %s\n", shared_cache_file_path);
831 return -1;
832 }
833
834 void* mapped_cache = mmap(NULL, (size_t)statbuf.st_size, PROT_READ, MAP_PRIVATE, cache_fd, 0);
835 if (mapped_cache == MAP_FAILED) {
836 fprintf(stderr, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path, errno);
837 return -1;
838 }
839
840 close(cache_fd);
841
842 // instantiate arch specific dylib maker
843 dylib_maker_func* dylib_create_func = nullptr;
844 if ( strcmp((char*)mapped_cache, "dyld_v1 i386") == 0 )
845 dylib_create_func = dylib_maker<x86>;
846 else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64") == 0 )
847 dylib_create_func = dylib_maker<x86_64>;
848 else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64h") == 0 )
849 dylib_create_func = dylib_maker<x86_64>;
850 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv5") == 0 )
851 dylib_create_func = dylib_maker<arm>;
852 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv6") == 0 )
853 dylib_create_func = dylib_maker<arm>;
854 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv7") == 0 )
855 dylib_create_func = dylib_maker<arm>;
856 else if ( strncmp((char*)mapped_cache, "dyld_v1 armv7", 14) == 0 )
857 dylib_create_func = dylib_maker<arm>;
858 else if ( strcmp((char*)mapped_cache, "dyld_v1 arm64") == 0 )
859 dylib_create_func = dylib_maker<arm64>;
860 #if SUPPORT_ARCH_arm64e
861 else if ( strcmp((char*)mapped_cache, "dyld_v1 arm64e") == 0 )
862 dylib_create_func = dylib_maker<arm64>;
863 #endif
864 #if SUPPORT_ARCH_arm64_32
865 else if ( strcmp((char*)mapped_cache, "dyld_v1arm64_32") == 0 )
866 dylib_create_func = dylib_maker<arm64_32>;
867 #endif
868 else {
869 fprintf(stderr, "Error: unrecognized dyld shared cache magic.\n");
870 munmap(mapped_cache, (size_t)statbuf.st_size);
871 return -1;
872 }
873
874 // Verify that the cache isn't corrupt.
875 if (int result = sharedCacheIsValid(mapped_cache, (uint64_t)statbuf.st_size)) {
876 munmap(mapped_cache, (size_t)statbuf.st_size);
877 return result;
878 }
879
880 // iterate through all images in cache and build map of dylibs and segments
881 __block NameToSegments map;
882 int result = 0;
883
884 result = dyld_shared_cache_iterate(mapped_cache, (uint32_t)statbuf.st_size, ^(const dyld_shared_cache_dylib_info* dylibInfo, const dyld_shared_cache_segment_info* segInfo) {
885 map[dylibInfo->path].push_back(seg_info(segInfo->name, segInfo->fileOffset, segInfo->fileSize));
886 });
887
888 if(result != 0) {
889 fprintf(stderr, "Error: dyld_shared_cache_iterate_segments_with_slide failed.\n");
890 munmap(mapped_cache, (size_t)statbuf.st_size);
891 return result;
892 }
893
894 // for each dylib instantiate a dylib file
895 SharedCacheExtractor extractor(map, extraction_root_path, dylib_create_func, mapped_cache, progress);
896 result = extractor.extractCaches();
897
898 munmap(mapped_cache, (size_t)statbuf.st_size);
899 return result;
900 }
901
902
903
904 int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path, const char* extraction_root_path)
905 {
906 return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path, extraction_root_path,
907 ^(unsigned , unsigned) {} );
908 }
909
910
911 #if 0
912 // test program
913 #include <stdio.h>
914 #include <stddef.h>
915 #include <dlfcn.h>
916
917
918 typedef int (*extractor_proc)(const char* shared_cache_file_path, const char* extraction_root_path,
919 void (^progress)(unsigned current, unsigned total));
920
921 int main(int argc, const char* argv[])
922 {
923 if ( argc != 3 ) {
924 fprintf(stderr, "usage: dsc_extractor <path-to-cache-file> <path-to-device-dir>\n");
925 return 1;
926 }
927
928 //void* handle = dlopen("/Volumes/my/src/dyld/build/Debug/dsc_extractor.bundle", RTLD_LAZY);
929 void* handle = dlopen("/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/usr/lib/dsc_extractor.bundle", RTLD_LAZY);
930 if ( handle == NULL ) {
931 fprintf(stderr, "dsc_extractor.bundle could not be loaded\n");
932 return 1;
933 }
934
935 extractor_proc proc = (extractor_proc)dlsym(handle, "dyld_shared_cache_extract_dylibs_progress");
936 if ( proc == NULL ) {
937 fprintf(stderr, "dsc_extractor.bundle did not have dyld_shared_cache_extract_dylibs_progress symbol\n");
938 return 1;
939 }
940
941 int result = (*proc)(argv[1], argv[2], ^(unsigned c, unsigned total) { printf("%d/%d\n", c, total); } );
942 fprintf(stderr, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result);
943 return 0;
944 }
945
946
947 #endif
948
949
950
951