dyld-851.27.tar.gz
[apple/dyld.git] / dyld3 / shared-cache / dsc_extractor.cpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <sys/stat.h>
29 #include <string.h>
30 #include <fcntl.h>
31 #include <stdlib.h>
32 #include <errno.h>
33 #include <sys/mman.h>
34 #include <sys/syslimits.h>
35 #include <libkern/OSByteOrder.h>
36 #include <mach-o/arch.h>
37 #include <mach-o/loader.h>
38 #include <Availability.h>
39
40 #include "CodeSigningTypes.h"
41 #include <CommonCrypto/CommonHMAC.h>
42 #include <CommonCrypto/CommonDigest.h>
43 #include <CommonCrypto/CommonDigestSPI.h>
44
45 #define NO_ULEB
46 #include "Architectures.hpp"
47 #include "MachOFileAbstraction.hpp"
48
49 #include "dsc_iterator.h"
50 #include "dsc_extractor.h"
51 #include "DyldSharedCache.h"
52 #include "MachOAnalyzer.h"
53 #include "SupportedArchs.h"
54 #include "Trie.hpp"
55
56 #include <vector>
57 #include <set>
58 #include <map>
59 #include <unordered_map>
60 #include <algorithm>
61 #include <dispatch/dispatch.h>
62
63 struct seg_info
64 {
65 seg_info(const char* n, uint64_t o, uint64_t s)
66 : segName(n), offset(o), sizem(s) { }
67 const char* segName;
68 uint64_t offset;
69 uint64_t sizem;
70 };
71
72 class CStringHash {
73 public:
74 size_t operator()(const char* __s) const {
75 size_t __h = 0;
76 for ( ; *__s; ++__s)
77 __h = 5 * __h + *__s;
78 return __h;
79 };
80 };
81 class CStringEquals {
82 public:
83 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
84 };
85 typedef std::unordered_map<const char*, std::vector<seg_info>, CStringHash, CStringEquals> NameToSegments;
86
87 // Filter to find individual symbol re-exports in trie
88 class NotReExportSymbol {
89 public:
90 NotReExportSymbol(const std::set<int> &rd) :_reexportDeps(rd) {}
91 bool operator()(const ExportInfoTrie::Entry &entry) const {
92 return isSymbolReExport(entry);
93 }
94 private:
95 bool isSymbolReExport(const ExportInfoTrie::Entry &entry) const {
96 if ( (entry.info.flags & EXPORT_SYMBOL_FLAGS_KIND_MASK) != EXPORT_SYMBOL_FLAGS_KIND_REGULAR )
97 return true;
98 if ( (entry.info.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) == 0 )
99 return true;
100 // If the symbol comes from a dylib that is re-exported, this is not an individual symbol re-export
101 if ( _reexportDeps.count((int)entry.info.other) != 0 )
102 return true;
103 return false;
104 }
105 const std::set<int> &_reexportDeps;
106 };
107
108 template <typename P>
109 struct LoadCommandInfo {
110 };
111
112 template <typename A>
113 class LinkeditOptimizer {
114 typedef typename A::P P;
115 typedef typename A::P::E E;
116 typedef typename A::P::uint_t pint_t;
117
118 private:
119 macho_segment_command<P>* linkEditSegCmd = nullptr;
120 symtab_command* symtab = nullptr;
121 dysymtab_command* dynamicSymTab = nullptr;
122 linkedit_data_command* functionStarts = nullptr;
123 linkedit_data_command* dataInCode = nullptr;
124 uint32_t exportsTrieOffset = 0;
125 uint32_t exportsTrieSize = 0;
126 std::set<int> reexportDeps;
127
128 public:
129
130 void optimize_loadcommands(dyld3::MachOAnalyzer* mh)
131 {
132 // update header flags
133 mh->flags &= 0x7FFFFFFF; // remove in-cache bit
134
135 // update load commands
136 __block uint64_t cumulativeFileSize = 0;
137 __block int depIndex = 0;
138 Diagnostics diag;
139 mh->forEachLoadCommand(diag, ^(const load_command* cmd, bool &stop) {
140 switch ( cmd->cmd ) {
141 case macho_segment_command<P>::CMD: {
142 auto segCmd = (macho_segment_command<P>*)cmd;
143 segCmd->set_fileoff(cumulativeFileSize);
144 segCmd->set_filesize(segCmd->vmsize());
145
146 auto const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
147 auto const sectionsEnd = &sectionsStart[segCmd->nsects()];
148 for (auto sect = sectionsStart; sect < sectionsEnd; ++sect) {
149 if ( sect->offset() != 0 ) {
150 sect->set_offset((uint32_t)(cumulativeFileSize + sect->addr() - segCmd->vmaddr()));
151 }
152 }
153 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 )
154 linkEditSegCmd = segCmd;
155 cumulativeFileSize += segCmd->filesize();
156 } break;
157 case LC_DYLD_INFO_ONLY: {
158 // zero out all dyld info. lldb only uses symbol table
159 auto dyldInfo = (dyld_info_command*)cmd;
160 exportsTrieOffset = dyldInfo->export_off;
161 exportsTrieSize = dyldInfo->export_size;
162 dyldInfo->rebase_off = 0;
163 dyldInfo->rebase_size = 0;
164 dyldInfo->bind_off = 0;
165 dyldInfo->bind_size = 0;
166 dyldInfo->weak_bind_off = 0;
167 dyldInfo->weak_bind_size = 0;
168 dyldInfo->lazy_bind_off = 0;
169 dyldInfo->lazy_bind_size = 0;
170 dyldInfo->export_off = 0;
171 dyldInfo->export_size = 0;
172 } break;
173 case LC_DYLD_EXPORTS_TRIE: {
174 // don't put export trie into extracted dylib. lldb only uses symbol table
175 linkedit_data_command* exportsTrie = (linkedit_data_command*)cmd;
176 exportsTrieOffset = exportsTrie->dataoff;
177 exportsTrieSize = exportsTrie->datasize;
178 exportsTrie->dataoff = 0;
179 exportsTrie->datasize = 0;
180 } break;
181 case LC_SYMTAB:
182 symtab = (symtab_command*)cmd;
183 break;
184 case LC_DYSYMTAB:
185 dynamicSymTab = (dysymtab_command*)cmd;
186 break;
187 case LC_FUNCTION_STARTS:
188 functionStarts = (linkedit_data_command*)cmd;
189 break;
190 case LC_DATA_IN_CODE:
191 dataInCode = (linkedit_data_command*)cmd;
192 break;
193 case LC_LOAD_DYLIB:
194 case LC_LOAD_WEAK_DYLIB:
195 case LC_REEXPORT_DYLIB:
196 case LC_LOAD_UPWARD_DYLIB:
197 depIndex++;
198 if ( cmd->cmd == LC_REEXPORT_DYLIB ) {
199 reexportDeps.insert(depIndex);
200 }
201 break;
202 default:
203 break;
204 }
205 });
206
207 mh->removeLoadCommand(diag, ^(const load_command* cmd, bool& remove, bool &stop) {
208 switch ( cmd->cmd ) {
209 case LC_SEGMENT_SPLIT_INFO:
210 // <rdar://problem/23212513> dylibs iOS 9 dyld caches have bogus LC_SEGMENT_SPLIT_INFO
211 remove = true;
212 stop = true;
213 break;
214 default:
215 break;
216 }
217 });
218 }
219
220 int optimize_linkedit(std::vector<uint8_t> &new_linkedit_data, uint64_t textOffsetInCache, const void* mapped_cache)
221 {
222 // rebuild symbol table
223 if ( linkEditSegCmd == nullptr ) {
224 fprintf(stderr, "__LINKEDIT not found\n");
225 return -1;
226 }
227 if ( symtab == nullptr ) {
228 fprintf(stderr, "LC_SYMTAB not found\n");
229 return -1;
230 }
231 if ( dynamicSymTab == nullptr ) {
232 fprintf(stderr, "LC_DYSYMTAB not found\n");
233 return -1;
234 }
235
236 const uint64_t newFunctionStartsOffset = new_linkedit_data.size();
237 uint32_t functionStartsSize = 0;
238 if ( functionStarts != NULL ) {
239 // copy function starts from original cache file to new mapped dylib file
240 functionStartsSize = functionStarts->datasize;
241 new_linkedit_data.insert(new_linkedit_data.end(),
242 (char*)mapped_cache + functionStarts->dataoff,
243 (char*)mapped_cache + functionStarts->dataoff + functionStartsSize);
244 }
245
246 // pointer align
247 while ((linkEditSegCmd->fileoff() + new_linkedit_data.size()) % sizeof(pint_t))
248 new_linkedit_data.push_back(0);
249
250 const uint64_t newDataInCodeOffset = new_linkedit_data.size();
251 uint32_t dataInCodeSize = 0;
252 if ( dataInCode != NULL ) {
253 // copy data-in-code info from original cache file to new mapped dylib file
254 dataInCodeSize = dataInCode->datasize;
255 new_linkedit_data.insert(new_linkedit_data.end(),
256 (char*)mapped_cache + dataInCode->dataoff,
257 (char*)mapped_cache + dataInCode->dataoff + dataInCodeSize);
258 }
259
260 std::vector<ExportInfoTrie::Entry> exports;
261 if ( exportsTrieSize != 0 ) {
262 const uint8_t* exportsStart = ((uint8_t*)mapped_cache) + exportsTrieOffset;
263 const uint8_t* exportsEnd = &exportsStart[exportsTrieSize];
264 ExportInfoTrie::parseTrie(exportsStart, exportsEnd, exports);
265 exports.erase(std::remove_if(exports.begin(), exports.end(), NotReExportSymbol(reexportDeps)), exports.end());
266 }
267
268 const DyldSharedCache* cache = (DyldSharedCache*)mapped_cache;
269 macho_nlist<P>* allLocalNlists = (macho_nlist<P>*)cache->getLocalNlistEntries();
270 __block macho_nlist<P>* localNlists = nullptr;
271 __block uint32_t localNlistCount = 0;
272 cache->forEachLocalSymbolEntry(^(uint32_t dylibOffset, uint32_t nlistStartIndex, uint32_t nlistCount, bool& stop){
273 if (dylibOffset == textOffsetInCache) {
274 localNlists = &allLocalNlists[nlistStartIndex];
275 localNlistCount = nlistCount;
276 stop = true;
277 }
278 });
279 // compute number of symbols in new symbol table
280 const macho_nlist<P>* mergedSymTabStart = (macho_nlist<P>*)(((uint8_t*)mapped_cache) + symtab->symoff);
281 const macho_nlist<P>* const mergedSymTabend = &mergedSymTabStart[symtab->nsyms];
282 uint32_t newSymCount = symtab->nsyms;
283 if ( localNlistCount != 0 ) {
284 // if we are recombining with unmapped locals, recompute new total size
285 newSymCount = localNlistCount + dynamicSymTab->nextdefsym + dynamicSymTab->nundefsym;
286 }
287
288 // add room for N_INDR symbols for re-exported symbols
289 newSymCount += exports.size();
290
291 // copy symbol entries and strings from original cache file to new mapped dylib file
292 const char* mergedStringPoolStart = (char*)mapped_cache + symtab->stroff;
293 const char* mergedStringPoolEnd = &mergedStringPoolStart[symtab->strsize];
294
295 // First count how many entries we need
296 std::vector<macho_nlist<P>> newSymTab;
297 newSymTab.reserve(newSymCount);
298 std::vector<char> newSymNames;
299
300 // first pool entry is always empty string
301 newSymNames.push_back('\0');
302
303 // local symbols are first in dylibs, if this cache has unmapped locals, insert them all first
304 uint32_t undefSymbolShift = 0;
305 if ( localNlistCount != 0 ) {
306 const char* localStrings = cache->getLocalStrings();
307 undefSymbolShift = localNlistCount - dynamicSymTab->nlocalsym;
308 // update load command to reflect new count of locals
309 dynamicSymTab->ilocalsym = (uint32_t)newSymTab.size();
310 dynamicSymTab->nlocalsym = localNlistCount;
311 // copy local symbols
312 for (uint32_t i=0; i < localNlistCount; ++i) {
313 const char* localName = &localStrings[localNlists[i].n_strx()];
314 if ( localName > localStrings + cache->getLocalStringsSize() )
315 localName = "<corrupt local symbol name>";
316 macho_nlist<P> t = localNlists[i];
317 t.set_n_strx((uint32_t)newSymNames.size());
318 newSymNames.insert(newSymNames.end(),
319 localName,
320 localName + (strlen(localName) + 1));
321 newSymTab.push_back(t);
322 }
323 // now start copying symbol table from start of externs instead of start of locals
324 mergedSymTabStart = &mergedSymTabStart[dynamicSymTab->iextdefsym];
325 }
326 // copy full symbol table from cache (skipping locals if they where elsewhere)
327 for (const macho_nlist<P>* s = mergedSymTabStart; s != mergedSymTabend; ++s) {
328 macho_nlist<P> t = *s;
329 t.set_n_strx((uint32_t)newSymNames.size());
330 const char* symName = &mergedStringPoolStart[s->n_strx()];
331 if ( symName > mergedStringPoolEnd )
332 symName = "<corrupt symbol name>";
333 newSymNames.insert(newSymNames.end(),
334 symName,
335 symName + (strlen(symName) + 1));
336 newSymTab.push_back(t);
337 }
338 // <rdar://problem/16529213> recreate N_INDR symbols in extracted dylibs for debugger
339 for (std::vector<ExportInfoTrie::Entry>::iterator it = exports.begin(); it != exports.end(); ++it) {
340 macho_nlist<P> t;
341 memset(&t, 0, sizeof(t));
342 t.set_n_strx((uint32_t)newSymNames.size());
343 t.set_n_type(N_INDR | N_EXT);
344 t.set_n_sect(0);
345 t.set_n_desc(0);
346 newSymNames.insert(newSymNames.end(),
347 it->name.c_str(),
348 it->name.c_str() + (it->name.size() + 1));
349 const char* importName = it->info.importName.c_str();
350 if ( *importName == '\0' )
351 importName = it->name.c_str();
352 t.set_n_value(newSymNames.size());
353 newSymNames.insert(newSymNames.end(),
354 importName,
355 importName + (strlen(importName) + 1));
356 newSymTab.push_back(t);
357 }
358
359 if ( newSymCount != newSymTab.size() ) {
360 fprintf(stderr, "symbol count miscalculation\n");
361 return -1;
362 }
363
364 //const uint64_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
365 //macho_nlist<P>* const newSymTabStart = (macho_nlist<P>*)(((uint8_t*)mh) + newSymTabOffset);
366 //char* const newStringPoolStart = (char*)mh + newStringPoolOffset;
367
368 // pointer align
369 while ((linkEditSegCmd->fileoff() + new_linkedit_data.size()) % sizeof(pint_t))
370 new_linkedit_data.push_back(0);
371
372 const uint64_t newSymTabOffset = new_linkedit_data.size();
373
374 // Copy sym tab
375 for (macho_nlist<P>& sym : newSymTab) {
376 uint8_t symData[sizeof(macho_nlist<P>)];
377 memcpy(&symData, &sym, sizeof(sym));
378 new_linkedit_data.insert(new_linkedit_data.end(), &symData[0], &symData[sizeof(macho_nlist<P>)]);
379 }
380
381 const uint64_t newIndSymTabOffset = new_linkedit_data.size();
382
383 // Copy (and adjust) indirect symbol table
384 const uint32_t* mergedIndSymTab = (uint32_t*)((char*)mapped_cache + dynamicSymTab->indirectsymoff);
385 new_linkedit_data.insert(new_linkedit_data.end(),
386 (char*)mergedIndSymTab,
387 (char*)(mergedIndSymTab + dynamicSymTab->nindirectsyms));
388 if ( undefSymbolShift != 0 ) {
389 uint32_t* newIndSymTab = (uint32_t*)&new_linkedit_data[newIndSymTabOffset];
390 for (int i=0; i < dynamicSymTab->nindirectsyms; ++i) {
391 newIndSymTab[i] += undefSymbolShift;
392 }
393 }
394 const uint64_t newStringPoolOffset = new_linkedit_data.size();
395
396 // pointer align string pool size
397 while (newSymNames.size() % sizeof(pint_t))
398 newSymNames.push_back('\0');
399
400 new_linkedit_data.insert(new_linkedit_data.end(), newSymNames.begin(), newSymNames.end());
401
402 // update load commands
403 if ( functionStarts != NULL ) {
404 functionStarts->dataoff = (uint32_t)(newFunctionStartsOffset + linkEditSegCmd->fileoff());
405 functionStarts->datasize = functionStartsSize;
406 }
407 if ( dataInCode != NULL ) {
408 dataInCode->dataoff = (uint32_t)(newDataInCodeOffset + linkEditSegCmd->fileoff());
409 dataInCode->datasize = dataInCodeSize;
410 }
411
412 symtab->nsyms = newSymCount;
413 symtab->symoff = (uint32_t)(newSymTabOffset + linkEditSegCmd->fileoff());
414 symtab->stroff = (uint32_t)(newStringPoolOffset + linkEditSegCmd->fileoff());
415 symtab->strsize = (uint32_t)newSymNames.size();
416 dynamicSymTab->extreloff = 0;
417 dynamicSymTab->nextrel = 0;
418 dynamicSymTab->locreloff = 0;
419 dynamicSymTab->nlocrel = 0;
420 dynamicSymTab->indirectsymoff = (uint32_t)(newIndSymTabOffset + linkEditSegCmd->fileoff());
421 linkEditSegCmd->set_filesize(symtab->stroff + symtab->strsize - linkEditSegCmd->fileoff());
422 linkEditSegCmd->set_vmsize((linkEditSegCmd->filesize() + 4095) & (-4096));
423
424 return 0;
425 }
426
427 };
428
429 static void make_dirs(const char* file_path)
430 {
431 //printf("make_dirs(%s)\n", file_path);
432 char dirs[strlen(file_path)+1];
433 strcpy(dirs, file_path);
434 char* lastSlash = strrchr(dirs, '/');
435 if ( lastSlash == NULL )
436 return;
437 lastSlash[1] = '\0';
438 struct stat stat_buf;
439 if ( stat(dirs, &stat_buf) != 0 ) {
440 char* afterSlash = &dirs[1];
441 char* slash;
442 while ( (slash = strchr(afterSlash, '/')) != NULL ) {
443 *slash = '\0';
444 ::mkdir(dirs, S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH);
445 //printf("mkdir(%s)\n", dirs);
446 *slash = '/';
447 afterSlash = slash+1;
448 }
449 }
450 }
451
452
453
454 template <typename A>
455 void dylib_maker(const void* mapped_cache, std::vector<uint8_t> &dylib_data, const std::vector<seg_info>& segments) {
456 typedef typename A::P P;
457
458 size_t additionalSize = 0;
459 for(std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
460 if ( strcmp(it->segName, "__LINKEDIT") != 0 )
461 additionalSize += it->sizem;
462 }
463
464 std::vector<uint8_t> new_dylib_data;
465 new_dylib_data.reserve(additionalSize);
466
467 // Write regular segments into the buffer
468 uint64_t textOffsetInCache = 0;
469 for( std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
470
471 if(strcmp(it->segName, "__TEXT") == 0 )
472 textOffsetInCache = it->offset;
473
474 //printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem);
475 // Copy all but the __LINKEDIT. It will be copied later during the optimizer in to a temporary buffer but it would
476 // not be efficient to copy it all now for each dylib.
477 if (strcmp(it->segName, "__LINKEDIT") == 0 )
478 continue;
479 std::copy(((uint8_t*)mapped_cache)+it->offset, ((uint8_t*)mapped_cache)+it->offset+it->sizem, std::back_inserter(new_dylib_data));
480 }
481
482 // optimize linkedit
483 std::vector<uint8_t> new_linkedit_data;
484 new_linkedit_data.reserve(1 << 20);
485
486 LinkeditOptimizer<A> linkeditOptimizer;
487 dyld3::MachOAnalyzer* mh = (dyld3::MachOAnalyzer*)&new_dylib_data.front();
488 linkeditOptimizer.optimize_loadcommands(mh);
489 linkeditOptimizer.optimize_linkedit(new_linkedit_data, textOffsetInCache, mapped_cache);
490
491 new_dylib_data.insert(new_dylib_data.end(), new_linkedit_data.begin(), new_linkedit_data.end());
492
493 // Page align file
494 while (new_dylib_data.size() % 4096)
495 new_dylib_data.push_back(0);
496
497 dylib_data.insert(dylib_data.end(), new_dylib_data.begin(), new_dylib_data.end());
498 }
499
500 typedef __typeof(dylib_maker<x86>) dylib_maker_func;
501 typedef void (^progress_block)(unsigned current, unsigned total);
502
503 class SharedCacheExtractor;
504 struct SharedCacheDylibExtractor {
505 SharedCacheDylibExtractor(const char* name, std::vector<seg_info> segInfo)
506 : name(name), segInfo(segInfo) { }
507
508 void extractCache(SharedCacheExtractor& context);
509
510 const char* name;
511 const std::vector<seg_info> segInfo;
512 int result = 0;
513 };
514
515 struct SharedCacheExtractor {
516 SharedCacheExtractor(const NameToSegments& map,
517 const char* extraction_root_path,
518 dylib_maker_func* dylib_create_func,
519 void* mapped_cache,
520 progress_block progress)
521 : map(map), extraction_root_path(extraction_root_path),
522 dylib_create_func(dylib_create_func), mapped_cache(mapped_cache),
523 progress(progress) {
524
525 extractors.reserve(map.size());
526 for (auto it : map)
527 extractors.emplace_back(it.first, it.second);
528
529 // Limit the number of open files. 16 seems to give better performance than higher numbers.
530 sema = dispatch_semaphore_create(16);
531 }
532 int extractCaches();
533
534 static void extractCache(void *ctx, size_t i);
535
536 const NameToSegments& map;
537 std::vector<SharedCacheDylibExtractor> extractors;
538 dispatch_semaphore_t sema;
539 const char* extraction_root_path;
540 dylib_maker_func* dylib_create_func;
541 void* mapped_cache;
542 progress_block progress;
543 std::atomic_int count = { 0 };
544 };
545
546 int SharedCacheExtractor::extractCaches() {
547 dispatch_queue_t process_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0);
548 dispatch_apply_f(map.size(), process_queue,
549 this, extractCache);
550
551 int result = 0;
552 for (const SharedCacheDylibExtractor& extractor : extractors) {
553 if (extractor.result != 0) {
554 result = extractor.result;
555 break;
556 }
557 }
558 return result;
559 }
560
561 void SharedCacheExtractor::extractCache(void *ctx, size_t i) {
562 SharedCacheExtractor& context = *(SharedCacheExtractor*)ctx;
563 dispatch_semaphore_wait(context.sema, DISPATCH_TIME_FOREVER);
564 context.extractors[i].extractCache(context);
565 dispatch_semaphore_signal(context.sema);
566 }
567
568 void SharedCacheDylibExtractor::extractCache(SharedCacheExtractor &context) {
569
570 char dylib_path[PATH_MAX];
571 strcpy(dylib_path, context.extraction_root_path);
572 strcat(dylib_path, "/");
573 strcat(dylib_path, name);
574
575 //printf("%s with %lu segments\n", dylib_path, it->second.size());
576 // make sure all directories in this path exist
577 make_dirs(dylib_path);
578
579 // open file, create if does not already exist
580 int fd = ::open(dylib_path, O_CREAT | O_TRUNC | O_EXLOCK | O_RDWR, 0644);
581 if ( fd == -1 ) {
582 fprintf(stderr, "can't open or create dylib file %s, errnor=%d\n", dylib_path, errno);
583 result = -1;
584 return;
585 }
586
587 std::vector<uint8_t> vec;
588 context.dylib_create_func(context.mapped_cache, vec, segInfo);
589 context.progress(context.count++, (unsigned)context.map.size());
590
591 // Write file data
592 if( write(fd, &vec.front(), vec.size()) == -1) {
593 fprintf(stderr, "error writing, errnor=%d\n", errno);
594 result = -1;
595 }
596
597 close(fd);
598 }
599
600 static int sharedCacheIsValid(const void* mapped_cache, uint64_t size) {
601 // First check that the size is good.
602 // Note the shared cache may not have a codeSignatureSize value set so we need to first make
603 // sure we have space for the CS_SuperBlob, then later crack that to check for the size of the rest.
604 const DyldSharedCache* dyldSharedCache = (DyldSharedCache*)mapped_cache;
605 uint64_t requiredSizeForCSSuperBlob = dyldSharedCache->header.codeSignatureOffset + sizeof(CS_SuperBlob);
606 const dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)((uint8_t*)mapped_cache + dyldSharedCache->header.mappingOffset);
607 if ( requiredSizeForCSSuperBlob > size ) {
608 fprintf(stderr, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size, requiredSizeForCSSuperBlob);
609 return -1;
610 }
611
612 // Now see if the code signatures are valid as that tells us the pages aren't corrupt.
613 // First find all of the regions of the shared cache we computed cd hashes
614 std::vector<std::pair<uint64_t, uint64_t>> sharedCacheRegions;
615 for (uint32_t i = 0; i != dyldSharedCache->header.mappingCount; ++i) {
616 sharedCacheRegions.emplace_back(std::make_pair(mappings[i].fileOffset, mappings[i].fileOffset + mappings[i].size));
617 }
618 if (dyldSharedCache->header.localSymbolsSize)
619 sharedCacheRegions.emplace_back(std::make_pair(dyldSharedCache->header.localSymbolsOffset, dyldSharedCache->header.localSymbolsOffset + dyldSharedCache->header.localSymbolsSize));
620 size_t inBbufferSize = 0;
621 for (auto& sharedCacheRegion : sharedCacheRegions)
622 inBbufferSize += (sharedCacheRegion.second - sharedCacheRegion.first);
623
624 // Now take the cd hash from the cache itself and validate the regions we found.
625 uint8_t* codeSignatureRegion = (uint8_t*)mapped_cache + dyldSharedCache->header.codeSignatureOffset;
626 CS_SuperBlob* sb = reinterpret_cast<CS_SuperBlob*>(codeSignatureRegion);
627 if (sb->magic != htonl(CSMAGIC_EMBEDDED_SIGNATURE)) {
628 fprintf(stderr, "Error: dyld shared cache code signature magic is incorrect.\n");
629 return -1;
630 }
631
632 size_t sbSize = ntohl(sb->length);
633 uint64_t requiredSizeForCS = dyldSharedCache->header.codeSignatureOffset + sbSize;
634 if ( requiredSizeForCS > size ) {
635 fprintf(stderr, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size, requiredSizeForCS);
636 return -1;
637 }
638
639 // Find the offset to the code directory.
640 CS_CodeDirectory* cd = nullptr;
641 for (unsigned i =0; i != sb->count; ++i) {
642 if (ntohl(sb->index[i].type) == CSSLOT_CODEDIRECTORY) {
643 cd = (CS_CodeDirectory*)(codeSignatureRegion + ntohl(sb->index[i].offset));
644 break;
645 }
646 }
647
648 if (!cd) {
649 fprintf(stderr, "Error: dyld shared cache code signature directory is missing.\n");
650 return -1;
651 }
652
653 if ( (uint8_t*)cd > (codeSignatureRegion + sbSize) ) {
654 fprintf(stderr, "Error: dyld shared cache code signature directory is out of bounds.\n");
655 return -1;
656 }
657
658 if ( cd->magic != htonl(CSMAGIC_CODEDIRECTORY) ) {
659 fprintf(stderr, "Error: dyld shared cache code signature directory magic is incorrect.\n");
660 return -1;
661 }
662
663 uint32_t pageSize = 1 << cd->pageSize;
664 uint32_t slotCountFromRegions = (uint32_t)((inBbufferSize + pageSize - 1) / pageSize);
665 if ( ntohl(cd->nCodeSlots) < slotCountFromRegions ) {
666 fprintf(stderr, "Error: dyld shared cache code signature directory num slots is incorrect.\n");
667 return -1;
668 }
669
670 uint32_t dscDigestFormat = kCCDigestNone;
671 switch (cd->hashType) {
672 case CS_HASHTYPE_SHA1:
673 #pragma clang diagnostic push
674 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
675 dscDigestFormat = kCCDigestSHA1;
676 #pragma clang diagnostic pop
677 break;
678 case CS_HASHTYPE_SHA256:
679 dscDigestFormat = kCCDigestSHA256;
680 break;
681 default:
682 break;
683 }
684
685 if (dscDigestFormat != kCCDigestNone) {
686 const uint64_t csPageSize = 1 << cd->pageSize;
687 size_t hashOffset = ntohl(cd->hashOffset);
688 uint8_t* hashSlot = (uint8_t*)cd + hashOffset;
689 uint8_t cdHashBuffer[cd->hashSize];
690
691 // Skip local symbols for now as those aren't being codesign correctly right now.
692 size_t inBbufferSize = 0;
693 for (auto& sharedCacheRegion : sharedCacheRegions) {
694 if (sharedCacheRegion.first == dyldSharedCache->header.localSymbolsOffset)
695 continue;
696 inBbufferSize += (sharedCacheRegion.second - sharedCacheRegion.first);
697 }
698 uint32_t slotCountToProcess = (uint32_t)((inBbufferSize + pageSize - 1) / pageSize);
699
700 for (unsigned i = 0; i != slotCountToProcess; ++i) {
701 // Skip data pages as those may have been slid by ASLR in the extracted file
702 uint64_t fileOffset = i * csPageSize;
703 bool isDataPage = false;
704 for (unsigned mappingIndex = 1; mappingIndex != (dyldSharedCache->header.mappingCount - 1); ++mappingIndex) {
705 if ( (fileOffset >= mappings[mappingIndex].fileOffset) && (fileOffset < (mappings[mappingIndex].fileOffset + mappings[mappingIndex].size)) ) {
706 isDataPage = true;
707 break;
708 }
709 }
710 if ( isDataPage )
711 continue;
712
713 CCDigest(dscDigestFormat, (uint8_t*)mapped_cache + fileOffset, (size_t)csPageSize, cdHashBuffer);
714 uint8_t* cacheCdHashBuffer = hashSlot + (i * cd->hashSize);
715 if (memcmp(cdHashBuffer, cacheCdHashBuffer, cd->hashSize) != 0) {
716 fprintf(stderr, "Error: dyld shared cache code signature for page %d is incorrect.\n", i);
717 return -1;
718 }
719 }
720 }
721 return 0;
722 }
723
724 int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path, const char* extraction_root_path,
725 progress_block progress)
726 {
727 struct stat statbuf;
728 if (stat(shared_cache_file_path, &statbuf)) {
729 fprintf(stderr, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path);
730 return -1;
731 }
732
733 int cache_fd = open(shared_cache_file_path, O_RDONLY);
734 if (cache_fd < 0) {
735 fprintf(stderr, "Error: failed to open shared cache file at %s\n", shared_cache_file_path);
736 return -1;
737 }
738
739 void* mapped_cache = mmap(NULL, (size_t)statbuf.st_size, PROT_READ, MAP_PRIVATE, cache_fd, 0);
740 if (mapped_cache == MAP_FAILED) {
741 fprintf(stderr, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path, errno);
742 return -1;
743 }
744
745 close(cache_fd);
746
747 // instantiate arch specific dylib maker
748 dylib_maker_func* dylib_create_func = nullptr;
749 if ( strcmp((char*)mapped_cache, "dyld_v1 i386") == 0 )
750 dylib_create_func = dylib_maker<x86>;
751 else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64") == 0 )
752 dylib_create_func = dylib_maker<x86_64>;
753 else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64h") == 0 )
754 dylib_create_func = dylib_maker<x86_64>;
755 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv5") == 0 )
756 dylib_create_func = dylib_maker<arm>;
757 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv6") == 0 )
758 dylib_create_func = dylib_maker<arm>;
759 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv7") == 0 )
760 dylib_create_func = dylib_maker<arm>;
761 else if ( strncmp((char*)mapped_cache, "dyld_v1 armv7", 14) == 0 )
762 dylib_create_func = dylib_maker<arm>;
763 else if ( strcmp((char*)mapped_cache, "dyld_v1 arm64") == 0 )
764 dylib_create_func = dylib_maker<arm64>;
765 #if SUPPORT_ARCH_arm64e
766 else if ( strcmp((char*)mapped_cache, "dyld_v1 arm64e") == 0 )
767 dylib_create_func = dylib_maker<arm64>;
768 #endif
769 #if SUPPORT_ARCH_arm64_32
770 else if ( strcmp((char*)mapped_cache, "dyld_v1arm64_32") == 0 )
771 dylib_create_func = dylib_maker<arm64_32>;
772 #endif
773 else {
774 fprintf(stderr, "Error: unrecognized dyld shared cache magic.\n");
775 munmap(mapped_cache, (size_t)statbuf.st_size);
776 return -1;
777 }
778
779 // Verify that the cache isn't corrupt.
780 if (int result = sharedCacheIsValid(mapped_cache, (uint64_t)statbuf.st_size)) {
781 munmap(mapped_cache, (size_t)statbuf.st_size);
782 return result;
783 }
784
785 // iterate through all images in cache and build map of dylibs and segments
786 __block NameToSegments map;
787 int result = 0;
788
789 result = dyld_shared_cache_iterate(mapped_cache, (uint32_t)statbuf.st_size, ^(const dyld_shared_cache_dylib_info* dylibInfo, const dyld_shared_cache_segment_info* segInfo) {
790 map[dylibInfo->path].push_back(seg_info(segInfo->name, segInfo->fileOffset, segInfo->fileSize));
791 });
792
793 if(result != 0) {
794 fprintf(stderr, "Error: dyld_shared_cache_iterate_segments_with_slide failed.\n");
795 munmap(mapped_cache, (size_t)statbuf.st_size);
796 return result;
797 }
798
799 // for each dylib instantiate a dylib file
800 SharedCacheExtractor extractor(map, extraction_root_path, dylib_create_func, mapped_cache, progress);
801 result = extractor.extractCaches();
802
803 munmap(mapped_cache, (size_t)statbuf.st_size);
804 return result;
805 }
806
807
808
809 int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path, const char* extraction_root_path)
810 {
811 return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path, extraction_root_path,
812 ^(unsigned , unsigned) {} );
813 }
814
815
816 #if 0
817 // test program
818 #include <stdio.h>
819 #include <stddef.h>
820 #include <dlfcn.h>
821
822
823 typedef int (*extractor_proc)(const char* shared_cache_file_path, const char* extraction_root_path,
824 void (^progress)(unsigned current, unsigned total));
825
826 int main(int argc, const char* argv[])
827 {
828 if ( argc != 3 ) {
829 fprintf(stderr, "usage: dsc_extractor <path-to-cache-file> <path-to-device-dir>\n");
830 return 1;
831 }
832
833 //void* handle = dlopen("/Volumes/my/src/dyld/build/Debug/dsc_extractor.bundle", RTLD_LAZY);
834 void* handle = dlopen("/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/usr/lib/dsc_extractor.bundle", RTLD_LAZY);
835 if ( handle == NULL ) {
836 fprintf(stderr, "dsc_extractor.bundle could not be loaded\n");
837 return 1;
838 }
839
840 extractor_proc proc = (extractor_proc)dlsym(handle, "dyld_shared_cache_extract_dylibs_progress");
841 if ( proc == NULL ) {
842 fprintf(stderr, "dsc_extractor.bundle did not have dyld_shared_cache_extract_dylibs_progress symbol\n");
843 return 1;
844 }
845
846 int result = (*proc)(argv[1], argv[2], ^(unsigned c, unsigned total) { printf("%d/%d\n", c, total); } );
847 fprintf(stderr, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result);
848 return 0;
849 }
850
851
852 #endif
853
854
855
856