dyld-750.6.tar.gz
[apple/dyld.git] / launch-cache / dsc_extractor.cpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <sys/stat.h>
29 #include <string.h>
30 #include <fcntl.h>
31 #include <stdlib.h>
32 #include <errno.h>
33 #include <sys/mman.h>
34 #include <sys/syslimits.h>
35 #include <libkern/OSByteOrder.h>
36 #include <mach-o/arch.h>
37 #include <mach-o/loader.h>
38 #include <Availability.h>
39
40 #include "CodeSigningTypes.h"
41 #include <CommonCrypto/CommonHMAC.h>
42 #include <CommonCrypto/CommonDigest.h>
43 #include <CommonCrypto/CommonDigestSPI.h>
44
45 #define NO_ULEB
46 #include "Architectures.hpp"
47 #include "MachOFileAbstraction.hpp"
48 #include "CacheFileAbstraction.hpp"
49
50 #include "dsc_iterator.h"
51 #include "dsc_extractor.h"
52 #include "MachOTrie.hpp"
53 #include "SupportedArchs.h"
54 #include "DyldSharedCache.h"
55
56 #include <vector>
57 #include <set>
58 #include <map>
59 #include <unordered_map>
60 #include <algorithm>
61 #include <dispatch/dispatch.h>
62
63 struct seg_info
64 {
65 seg_info(const char* n, uint64_t o, uint64_t s)
66 : segName(n), offset(o), sizem(s) { }
67 const char* segName;
68 uint64_t offset;
69 uint64_t sizem;
70 };
71
72 class CStringHash {
73 public:
74 size_t operator()(const char* __s) const {
75 size_t __h = 0;
76 for ( ; *__s; ++__s)
77 __h = 5 * __h + *__s;
78 return __h;
79 };
80 };
81 class CStringEquals {
82 public:
83 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
84 };
85 typedef std::unordered_map<const char*, std::vector<seg_info>, CStringHash, CStringEquals> NameToSegments;
86
87 // Filter to find individual symbol re-exports in trie
88 class NotReExportSymbol {
89 public:
90 NotReExportSymbol(const std::set<int> &rd) :_reexportDeps(rd) {}
91 bool operator()(const mach_o::trie::Entry &entry) const {
92 bool result = isSymbolReExport(entry);
93 if (result) {
94 // <rdar://problem/17671438> Xcode 6 leaks in dyld_shared_cache_extract_dylibs
95 ::free((void*)entry.name);
96 const_cast<mach_o::trie::Entry*>(&entry)->name = NULL;
97 }
98 return result;
99 }
100 private:
101 bool isSymbolReExport(const mach_o::trie::Entry &entry) const {
102 if ( (entry.flags & EXPORT_SYMBOL_FLAGS_KIND_MASK) != EXPORT_SYMBOL_FLAGS_KIND_REGULAR )
103 return true;
104 if ( (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) == 0 )
105 return true;
106 // If the symbol comes from a dylib that is re-exported, this is not an individual symbol re-export
107 if ( _reexportDeps.count((int)entry.other) != 0 )
108 return true;
109 return false;
110 }
111 const std::set<int> &_reexportDeps;
112 };
113
114 template <typename P>
115 struct LoadCommandInfo {
116 };
117
118 template <typename A>
119 class LinkeditOptimizer {
120 typedef typename A::P P;
121 typedef typename A::P::E E;
122 typedef typename A::P::uint_t pint_t;
123
124 private:
125 macho_segment_command<P>* linkEditSegCmd = NULL;
126 macho_symtab_command<P>* symtab = NULL;
127 macho_dysymtab_command<P>* dynamicSymTab = NULL;
128 macho_linkedit_data_command<P>* functionStarts = NULL;
129 macho_linkedit_data_command<P>* dataInCode = NULL;
130 uint32_t exportsTrieOffset = 0;
131 uint32_t exportsTrieSize = 0;
132 std::set<int> reexportDeps;
133
134 public:
135
136 void optimize_loadcommands(macho_header<typename A::P>* mh)
137 {
138 typedef typename A::P P;
139 typedef typename A::P::E E;
140 typedef typename A::P::uint_t pint_t;
141
142 // update header flags
143 mh->set_flags(mh->flags() & 0x7FFFFFFF); // remove in-cache bit
144
145 // update load commands
146 uint64_t cumulativeFileSize = 0;
147 const unsigned origLoadCommandsSize = mh->sizeofcmds();
148 unsigned bytesRemaining = origLoadCommandsSize;
149 unsigned removedCount = 0;
150 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
151 const uint32_t cmdCount = mh->ncmds();
152 const macho_load_command<P>* cmd = cmds;
153 int depIndex = 0;
154 for (uint32_t i = 0; i < cmdCount; ++i) {
155 bool remove = false;
156 switch ( cmd->cmd() ) {
157 case macho_segment_command<P>::CMD:
158 {
159 // update segment/section file offsets
160 macho_segment_command<P>* segCmd = (macho_segment_command<P>*)cmd;
161 segCmd->set_fileoff(cumulativeFileSize);
162 segCmd->set_filesize(segCmd->vmsize());
163 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
164 macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
165 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
166 if ( sect->offset() != 0 )
167 sect->set_offset((uint32_t)(cumulativeFileSize+sect->addr()-segCmd->vmaddr()));
168 }
169 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
170 linkEditSegCmd = segCmd;
171 }
172 cumulativeFileSize += segCmd->filesize();
173 break;
174 }
175 case LC_DYLD_INFO_ONLY:
176 {
177 // zero out all dyld info
178 macho_dyld_info_command<P>* dyldInfo = (macho_dyld_info_command<P>*)cmd;
179 exportsTrieOffset = dyldInfo->export_off();
180 exportsTrieSize = dyldInfo->export_size();
181 dyldInfo->set_rebase_off(0);
182 dyldInfo->set_rebase_size(0);
183 dyldInfo->set_bind_off(0);
184 dyldInfo->set_bind_size(0);
185 dyldInfo->set_weak_bind_off(0);
186 dyldInfo->set_weak_bind_size(0);
187 dyldInfo->set_lazy_bind_off(0);
188 dyldInfo->set_lazy_bind_size(0);
189 dyldInfo->set_export_off(0);
190 dyldInfo->set_export_size(0);
191 }
192 break;
193 case LC_SYMTAB:
194 symtab = (macho_symtab_command<P>*)cmd;
195 break;
196 case LC_DYSYMTAB:
197 dynamicSymTab = (macho_dysymtab_command<P>*)cmd;
198 break;
199 case LC_FUNCTION_STARTS:
200 functionStarts = (macho_linkedit_data_command<P>*)cmd;
201 break;
202 case LC_DATA_IN_CODE:
203 dataInCode = (macho_linkedit_data_command<P>*)cmd;
204 break;
205 case LC_LOAD_DYLIB:
206 case LC_LOAD_WEAK_DYLIB:
207 case LC_REEXPORT_DYLIB:
208 case LC_LOAD_UPWARD_DYLIB:
209 ++depIndex;
210 if ( cmd->cmd() == LC_REEXPORT_DYLIB ) {
211 reexportDeps.insert(depIndex);
212 }
213 break;
214 case LC_SEGMENT_SPLIT_INFO:
215 // <rdar://problem/23212513> dylibs iOS 9 dyld caches have bogus LC_SEGMENT_SPLIT_INFO
216 remove = true;
217 break;
218 }
219 uint32_t cmdSize = cmd->cmdsize();
220 macho_load_command<P>* nextCmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmdSize);
221 if ( remove ) {
222 ::memmove((void*)cmd, (void*)nextCmd, bytesRemaining);
223 ++removedCount;
224 }
225 else {
226 bytesRemaining -= cmdSize;
227 cmd = nextCmd;
228 }
229 }
230 // zero out stuff removed
231 ::bzero((void*)cmd, bytesRemaining);
232 // update header
233 mh->set_ncmds(cmdCount - removedCount);
234 mh->set_sizeofcmds(origLoadCommandsSize - bytesRemaining);
235 }
236
237 int optimize_linkedit(std::vector<uint8_t> &new_linkedit_data, uint64_t textOffsetInCache, const void* mapped_cache)
238 {
239 typedef typename A::P P;
240 typedef typename A::P::E E;
241 typedef typename A::P::uint_t pint_t;
242
243 // rebuild symbol table
244 if ( linkEditSegCmd == NULL ) {
245 fprintf(stderr, "__LINKEDIT not found\n");
246 return -1;
247 }
248 if ( symtab == NULL ) {
249 fprintf(stderr, "LC_SYMTAB not found\n");
250 return -1;
251 }
252 if ( dynamicSymTab == NULL ) {
253 fprintf(stderr, "LC_DYSYMTAB not found\n");
254 return -1;
255 }
256
257 const uint64_t newFunctionStartsOffset = new_linkedit_data.size();
258 uint32_t functionStartsSize = 0;
259 if ( functionStarts != NULL ) {
260 // copy function starts from original cache file to new mapped dylib file
261 functionStartsSize = functionStarts->datasize();
262 new_linkedit_data.insert(new_linkedit_data.end(),
263 (char*)mapped_cache + functionStarts->dataoff(),
264 (char*)mapped_cache + functionStarts->dataoff() + functionStartsSize);
265 }
266
267 // pointer align
268 while ((linkEditSegCmd->fileoff() + new_linkedit_data.size()) % sizeof(pint_t))
269 new_linkedit_data.push_back(0);
270
271 const uint64_t newDataInCodeOffset = new_linkedit_data.size();
272 uint32_t dataInCodeSize = 0;
273 if ( dataInCode != NULL ) {
274 // copy data-in-code info from original cache file to new mapped dylib file
275 dataInCodeSize = dataInCode->datasize();
276 new_linkedit_data.insert(new_linkedit_data.end(),
277 (char*)mapped_cache + dataInCode->dataoff(),
278 (char*)mapped_cache + dataInCode->dataoff() + dataInCodeSize);
279 }
280
281 std::vector<mach_o::trie::Entry> exports;
282 if ( exportsTrieSize != 0 ) {
283 const uint8_t* exportsStart = ((uint8_t*)mapped_cache) + exportsTrieOffset;
284 const uint8_t* exportsEnd = &exportsStart[exportsTrieSize];
285 mach_o::trie::parseTrie(exportsStart, exportsEnd, exports);
286 exports.erase(std::remove_if(exports.begin(), exports.end(), NotReExportSymbol(reexportDeps)), exports.end());
287 }
288
289 // look for local symbol info in unmapped part of shared cache
290 dyldCacheHeader<E>* header = (dyldCacheHeader<E>*)mapped_cache;
291 macho_nlist<P>* localNlists = NULL;
292 uint32_t localNlistCount = 0;
293 const char* localStrings = NULL;
294 const char* localStringsEnd = NULL;
295 if ( header->mappingOffset() > offsetof(dyld_cache_header,localSymbolsSize) ) {
296 dyldCacheLocalSymbolsInfo<E>* localInfo = (dyldCacheLocalSymbolsInfo<E>*)(((uint8_t*)mapped_cache) + header->localSymbolsOffset());
297 dyldCacheLocalSymbolEntry<E>* entries = (dyldCacheLocalSymbolEntry<E>*)(((uint8_t*)mapped_cache) + header->localSymbolsOffset() + localInfo->entriesOffset());
298 macho_nlist<P>* allLocalNlists = (macho_nlist<P>*)(((uint8_t*)localInfo) + localInfo->nlistOffset());
299 const uint32_t entriesCount = localInfo->entriesCount();
300 for (uint32_t i=0; i < entriesCount; ++i) {
301 if ( entries[i].dylibOffset() == textOffsetInCache ) {
302 uint32_t localNlistStart = entries[i].nlistStartIndex();
303 localNlistCount = entries[i].nlistCount();
304 localNlists = &allLocalNlists[localNlistStart];
305 localStrings = ((char*)localInfo) + localInfo->stringsOffset();
306 localStringsEnd = &localStrings[localInfo->stringsSize()];
307 break;
308 }
309 }
310 }
311 // compute number of symbols in new symbol table
312 const macho_nlist<P>* const mergedSymTabStart = (macho_nlist<P>*)(((uint8_t*)mapped_cache) + symtab->symoff());
313 const macho_nlist<P>* const mergedSymTabend = &mergedSymTabStart[symtab->nsyms()];
314 uint32_t newSymCount = symtab->nsyms();
315 if ( localNlists != NULL ) {
316 newSymCount = localNlistCount;
317 for (const macho_nlist<P>* s = mergedSymTabStart; s != mergedSymTabend; ++s) {
318 // skip any locals in cache
319 if ( (s->n_type() & (N_TYPE|N_EXT)) == N_SECT )
320 continue;
321 ++newSymCount;
322 }
323 }
324
325 // add room for N_INDR symbols for re-exported symbols
326 newSymCount += exports.size();
327
328 // copy symbol entries and strings from original cache file to new mapped dylib file
329 const char* mergedStringPoolStart = (char*)mapped_cache + symtab->stroff();
330 const char* mergedStringPoolEnd = &mergedStringPoolStart[symtab->strsize()];
331
332 // First count how many entries we need
333 std::vector<macho_nlist<P>> newSymTab;
334 newSymTab.reserve(newSymCount);
335 std::vector<char> newSymNames;
336
337 // first pool entry is always empty string
338 newSymNames.push_back('\0');
339
340 for (const macho_nlist<P>* s = mergedSymTabStart; s != mergedSymTabend; ++s) {
341 // if we have better local symbol info, skip any locals here
342 if ( (localNlists != NULL) && ((s->n_type() & (N_TYPE|N_EXT)) == N_SECT) )
343 continue;
344 macho_nlist<P> t = *s;
345 t.set_n_strx((uint32_t)newSymNames.size());
346 const char* symName = &mergedStringPoolStart[s->n_strx()];
347 if ( symName > mergedStringPoolEnd )
348 symName = "<corrupt symbol name>";
349 newSymNames.insert(newSymNames.end(),
350 symName,
351 symName + (strlen(symName) + 1));
352 newSymTab.push_back(t);
353 }
354 // <rdar://problem/16529213> recreate N_INDR symbols in extracted dylibs for debugger
355 for (std::vector<mach_o::trie::Entry>::iterator it = exports.begin(); it != exports.end(); ++it) {
356 macho_nlist<P> t;
357 memset(&t, 0, sizeof(t));
358 t.set_n_strx((uint32_t)newSymNames.size());
359 t.set_n_type(N_INDR | N_EXT);
360 t.set_n_sect(0);
361 t.set_n_desc(0);
362 newSymNames.insert(newSymNames.end(),
363 it->name,
364 it->name + (strlen(it->name) + 1));
365 const char* importName = it->importName;
366 if ( *importName == '\0' )
367 importName = it->name;
368 t.set_n_value(newSymNames.size());
369 newSymNames.insert(newSymNames.end(),
370 importName,
371 importName + (strlen(importName) + 1));
372 newSymTab.push_back(t);
373 }
374 if ( localNlists != NULL ) {
375 // update load command to reflect new count of locals
376 dynamicSymTab->set_ilocalsym((uint32_t)newSymTab.size());
377 dynamicSymTab->set_nlocalsym(localNlistCount);
378 // copy local symbols
379 for (uint32_t i=0; i < localNlistCount; ++i) {
380 const char* localName = &localStrings[localNlists[i].n_strx()];
381 if ( localName > localStringsEnd )
382 localName = "<corrupt local symbol name>";
383 macho_nlist<P> t = localNlists[i];
384 t.set_n_strx((uint32_t)newSymNames.size());
385 newSymNames.insert(newSymNames.end(),
386 localName,
387 localName + (strlen(localName) + 1));
388 newSymTab.push_back(t);
389 }
390 }
391
392 if ( newSymCount != newSymTab.size() ) {
393 fprintf(stderr, "symbol count miscalculation\n");
394 return -1;
395 }
396
397 //const uint64_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
398 //macho_nlist<P>* const newSymTabStart = (macho_nlist<P>*)(((uint8_t*)mh) + newSymTabOffset);
399 //char* const newStringPoolStart = (char*)mh + newStringPoolOffset;
400
401 // pointer align
402 while ((linkEditSegCmd->fileoff() + new_linkedit_data.size()) % sizeof(pint_t))
403 new_linkedit_data.push_back(0);
404
405 const uint64_t newSymTabOffset = new_linkedit_data.size();
406
407 // Copy sym tab
408 for (macho_nlist<P>& sym : newSymTab) {
409 uint8_t symData[sizeof(macho_nlist<P>)];
410 memcpy(&symData, &sym, sizeof(sym));
411 new_linkedit_data.insert(new_linkedit_data.end(), &symData[0], &symData[sizeof(macho_nlist<P>)]);
412 }
413
414 const uint64_t newIndSymTabOffset = new_linkedit_data.size();
415
416 // Copy indirect symbol table
417 const uint32_t* mergedIndSymTab = (uint32_t*)((char*)mapped_cache + dynamicSymTab->indirectsymoff());
418 new_linkedit_data.insert(new_linkedit_data.end(),
419 (char*)mergedIndSymTab,
420 (char*)(mergedIndSymTab + dynamicSymTab->nindirectsyms()));
421
422 const uint64_t newStringPoolOffset = new_linkedit_data.size();
423
424 // pointer align string pool size
425 while (newSymNames.size() % sizeof(pint_t))
426 newSymNames.push_back('\0');
427
428 new_linkedit_data.insert(new_linkedit_data.end(), newSymNames.begin(), newSymNames.end());
429
430 // update load commands
431 if ( functionStarts != NULL ) {
432 functionStarts->set_dataoff((uint32_t)(newFunctionStartsOffset + linkEditSegCmd->fileoff()));
433 functionStarts->set_datasize(functionStartsSize);
434 }
435 if ( dataInCode != NULL ) {
436 dataInCode->set_dataoff((uint32_t)(newDataInCodeOffset + linkEditSegCmd->fileoff()));
437 dataInCode->set_datasize(dataInCodeSize);
438 }
439
440 symtab->set_nsyms(newSymCount);
441 symtab->set_symoff((uint32_t)(newSymTabOffset + linkEditSegCmd->fileoff()));
442 symtab->set_stroff((uint32_t)(newStringPoolOffset + linkEditSegCmd->fileoff()));
443 symtab->set_strsize((uint32_t)newSymNames.size());
444 dynamicSymTab->set_extreloff(0);
445 dynamicSymTab->set_nextrel(0);
446 dynamicSymTab->set_locreloff(0);
447 dynamicSymTab->set_nlocrel(0);
448 dynamicSymTab->set_indirectsymoff((uint32_t)(newIndSymTabOffset + linkEditSegCmd->fileoff()));
449 linkEditSegCmd->set_filesize(symtab->stroff()+symtab->strsize() - linkEditSegCmd->fileoff());
450 linkEditSegCmd->set_vmsize( (linkEditSegCmd->filesize()+4095) & (-4096) );
451
452 // <rdar://problem/17671438> Xcode 6 leaks in dyld_shared_cache_extract_dylibs
453 for (std::vector<mach_o::trie::Entry>::iterator it = exports.begin(); it != exports.end(); ++it) {
454 ::free((void*)(it->name));
455 }
456
457
458 return 0;
459 }
460
461 };
462
463 static void make_dirs(const char* file_path)
464 {
465 //printf("make_dirs(%s)\n", file_path);
466 char dirs[strlen(file_path)+1];
467 strcpy(dirs, file_path);
468 char* lastSlash = strrchr(dirs, '/');
469 if ( lastSlash == NULL )
470 return;
471 lastSlash[1] = '\0';
472 struct stat stat_buf;
473 if ( stat(dirs, &stat_buf) != 0 ) {
474 char* afterSlash = &dirs[1];
475 char* slash;
476 while ( (slash = strchr(afterSlash, '/')) != NULL ) {
477 *slash = '\0';
478 ::mkdir(dirs, S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH);
479 //printf("mkdir(%s)\n", dirs);
480 *slash = '/';
481 afterSlash = slash+1;
482 }
483 }
484 }
485
486
487
488 template <typename A>
489 void dylib_maker(const void* mapped_cache, std::vector<uint8_t> &dylib_data, const std::vector<seg_info>& segments) {
490 typedef typename A::P P;
491
492 size_t additionalSize = 0;
493 for(std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
494 if ( strcmp(it->segName, "__LINKEDIT") != 0 )
495 additionalSize += it->sizem;
496 }
497
498 std::vector<uint8_t> new_dylib_data;
499 new_dylib_data.reserve(additionalSize);
500
501 // Write regular segments into the buffer
502 uint64_t textOffsetInCache = 0;
503 for( std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
504
505 if(strcmp(it->segName, "__TEXT") == 0 )
506 textOffsetInCache = it->offset;
507
508 //printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem);
509 // Copy all but the __LINKEDIT. It will be copied later during the optimizer in to a temporary buffer but it would
510 // not be efficient to copy it all now for each dylib.
511 if (strcmp(it->segName, "__LINKEDIT") == 0 )
512 continue;
513 std::copy(((uint8_t*)mapped_cache)+it->offset, ((uint8_t*)mapped_cache)+it->offset+it->sizem, std::back_inserter(new_dylib_data));
514 }
515
516 // optimize linkedit
517 std::vector<uint8_t> new_linkedit_data;
518 new_linkedit_data.reserve(1 << 20);
519
520 LinkeditOptimizer<A> linkeditOptimizer;
521 macho_header<P>* mh = (macho_header<P>*)&new_dylib_data.front();
522 linkeditOptimizer.optimize_loadcommands(mh);
523 linkeditOptimizer.optimize_linkedit(new_linkedit_data, textOffsetInCache, mapped_cache);
524
525 new_dylib_data.insert(new_dylib_data.end(), new_linkedit_data.begin(), new_linkedit_data.end());
526
527 // Page align file
528 while (new_dylib_data.size() % 4096)
529 new_dylib_data.push_back(0);
530
531 dylib_data.insert(dylib_data.end(), new_dylib_data.begin(), new_dylib_data.end());
532 }
533
534 typedef __typeof(dylib_maker<x86>) dylib_maker_func;
535 typedef void (^progress_block)(unsigned current, unsigned total);
536
537 class SharedCacheExtractor;
538 struct SharedCacheDylibExtractor {
539 SharedCacheDylibExtractor(const char* name, std::vector<seg_info> segInfo)
540 : name(name), segInfo(segInfo) { }
541
542 void extractCache(SharedCacheExtractor& context);
543
544 const char* name;
545 const std::vector<seg_info> segInfo;
546 int result = 0;
547 };
548
549 struct SharedCacheExtractor {
550 SharedCacheExtractor(const NameToSegments& map,
551 const char* extraction_root_path,
552 dylib_maker_func* dylib_create_func,
553 void* mapped_cache,
554 progress_block progress)
555 : map(map), extraction_root_path(extraction_root_path),
556 dylib_create_func(dylib_create_func), mapped_cache(mapped_cache),
557 progress(progress) {
558
559 extractors.reserve(map.size());
560 for (auto it : map)
561 extractors.emplace_back(it.first, it.second);
562
563 // Limit the number of open files. 16 seems to give better performance than higher numbers.
564 sema = dispatch_semaphore_create(16);
565 }
566 int extractCaches();
567
568 static void extractCache(void *ctx, size_t i);
569
570 const NameToSegments& map;
571 std::vector<SharedCacheDylibExtractor> extractors;
572 dispatch_semaphore_t sema;
573 const char* extraction_root_path;
574 dylib_maker_func* dylib_create_func;
575 void* mapped_cache;
576 progress_block progress;
577 std::atomic_int count = { 0 };
578 };
579
580 int SharedCacheExtractor::extractCaches() {
581 dispatch_queue_t process_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0);
582 dispatch_apply_f(map.size(), process_queue,
583 this, extractCache);
584
585 int result = 0;
586 for (const SharedCacheDylibExtractor& extractor : extractors) {
587 if (extractor.result != 0) {
588 result = extractor.result;
589 break;
590 }
591 }
592 return result;
593 }
594
595 void SharedCacheExtractor::extractCache(void *ctx, size_t i) {
596 SharedCacheExtractor& context = *(SharedCacheExtractor*)ctx;
597 dispatch_semaphore_wait(context.sema, DISPATCH_TIME_FOREVER);
598 context.extractors[i].extractCache(context);
599 dispatch_semaphore_signal(context.sema);
600 }
601
602 void SharedCacheDylibExtractor::extractCache(SharedCacheExtractor &context) {
603
604 char dylib_path[PATH_MAX];
605 strcpy(dylib_path, context.extraction_root_path);
606 strcat(dylib_path, "/");
607 strcat(dylib_path, name);
608
609 //printf("%s with %lu segments\n", dylib_path, it->second.size());
610 // make sure all directories in this path exist
611 make_dirs(dylib_path);
612
613 // open file, create if does not already exist
614 int fd = ::open(dylib_path, O_CREAT | O_TRUNC | O_EXLOCK | O_RDWR, 0644);
615 if ( fd == -1 ) {
616 fprintf(stderr, "can't open or create dylib file %s, errnor=%d\n", dylib_path, errno);
617 result = -1;
618 return;
619 }
620
621 std::vector<uint8_t> vec;
622 context.dylib_create_func(context.mapped_cache, vec, segInfo);
623 context.progress(context.count++, (unsigned)context.map.size());
624
625 // Write file data
626 if( write(fd, &vec.front(), vec.size()) == -1) {
627 fprintf(stderr, "error writing, errnor=%d\n", errno);
628 result = -1;
629 }
630
631 close(fd);
632 }
633
634 static int sharedCacheIsValid(const void* mapped_cache, uint64_t size) {
635 // First check that the size is good.
636 // Note the shared cache may not have a codeSignatureSize value set so we need to first make
637 // sure we have space for the CS_SuperBlob, then later crack that to check for the size of the rest.
638 const DyldSharedCache* dyldSharedCache = (DyldSharedCache*)mapped_cache;
639 uint64_t requiredSizeForCSSuperBlob = dyldSharedCache->header.codeSignatureOffset + sizeof(CS_SuperBlob);
640 const dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)((uint8_t*)mapped_cache + dyldSharedCache->header.mappingOffset);
641 if ( requiredSizeForCSSuperBlob > size ) {
642 fprintf(stderr, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size, requiredSizeForCSSuperBlob);
643 return -1;
644 }
645
646 // Now see if the code signatures are valid as that tells us the pages aren't corrupt.
647 // First find all of the regions of the shared cache we computed cd hashes
648 std::vector<std::pair<uint64_t, uint64_t>> sharedCacheRegions;
649 sharedCacheRegions.emplace_back(std::make_pair(mappings[0].fileOffset, mappings[0].fileOffset + mappings[0].size));
650 sharedCacheRegions.emplace_back(std::make_pair(mappings[1].fileOffset, mappings[1].fileOffset + mappings[1].size));
651 sharedCacheRegions.emplace_back(std::make_pair(mappings[2].fileOffset, mappings[2].fileOffset + mappings[2].size));
652 if (dyldSharedCache->header.localSymbolsSize)
653 sharedCacheRegions.emplace_back(std::make_pair(dyldSharedCache->header.localSymbolsOffset, dyldSharedCache->header.localSymbolsOffset + dyldSharedCache->header.localSymbolsSize));
654 size_t inBbufferSize = 0;
655 for (auto& sharedCacheRegion : sharedCacheRegions)
656 inBbufferSize += (sharedCacheRegion.second - sharedCacheRegion.first);
657
658 // Now take the cd hash from the cache itself and validate the regions we found.
659 uint8_t* codeSignatureRegion = (uint8_t*)mapped_cache + dyldSharedCache->header.codeSignatureOffset;
660 CS_SuperBlob* sb = reinterpret_cast<CS_SuperBlob*>(codeSignatureRegion);
661 if (sb->magic != htonl(CSMAGIC_EMBEDDED_SIGNATURE)) {
662 fprintf(stderr, "Error: dyld shared cache code signature magic is incorrect.\n");
663 return -1;
664 }
665
666 size_t sbSize = ntohl(sb->length);
667 uint64_t requiredSizeForCS = dyldSharedCache->header.codeSignatureOffset + sbSize;
668 if ( requiredSizeForCS > size ) {
669 fprintf(stderr, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size, requiredSizeForCS);
670 return -1;
671 }
672
673 // Find the offset to the code directory.
674 CS_CodeDirectory* cd = nullptr;
675 for (unsigned i =0; i != sb->count; ++i) {
676 if (ntohl(sb->index[i].type) == CSSLOT_CODEDIRECTORY) {
677 cd = (CS_CodeDirectory*)(codeSignatureRegion + ntohl(sb->index[i].offset));
678 break;
679 }
680 }
681
682 if (!cd) {
683 fprintf(stderr, "Error: dyld shared cache code signature directory is missing.\n");
684 return -1;
685 }
686
687 if ( (uint8_t*)cd > (codeSignatureRegion + sbSize) ) {
688 fprintf(stderr, "Error: dyld shared cache code signature directory is out of bounds.\n");
689 return -1;
690 }
691
692 if ( cd->magic != htonl(CSMAGIC_CODEDIRECTORY) ) {
693 fprintf(stderr, "Error: dyld shared cache code signature directory magic is incorrect.\n");
694 return -1;
695 }
696
697 uint32_t pageSize = 1 << cd->pageSize;
698 uint32_t slotCountFromRegions = (uint32_t)((inBbufferSize + pageSize - 1) / pageSize);
699 if ( ntohl(cd->nCodeSlots) < slotCountFromRegions ) {
700 fprintf(stderr, "Error: dyld shared cache code signature directory num slots is incorrect.\n");
701 return -1;
702 }
703
704 uint32_t dscDigestFormat = kCCDigestNone;
705 switch (cd->hashType) {
706 case CS_HASHTYPE_SHA1:
707 #pragma clang diagnostic push
708 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
709 dscDigestFormat = kCCDigestSHA1;
710 #pragma clang diagnostic pop
711 break;
712 case CS_HASHTYPE_SHA256:
713 dscDigestFormat = kCCDigestSHA256;
714 break;
715 default:
716 break;
717 }
718
719 if (dscDigestFormat != kCCDigestNone) {
720 const uint64_t csPageSize = 1 << cd->pageSize;
721 size_t hashOffset = ntohl(cd->hashOffset);
722 uint8_t* hashSlot = (uint8_t*)cd + hashOffset;
723 uint8_t cdHashBuffer[cd->hashSize];
724
725 // Skip local symbols for now as those aren't being codesign correctly right now.
726 size_t inBbufferSize = 0;
727 for (auto& sharedCacheRegion : sharedCacheRegions) {
728 if (sharedCacheRegion.first == dyldSharedCache->header.localSymbolsOffset)
729 continue;
730 inBbufferSize += (sharedCacheRegion.second - sharedCacheRegion.first);
731 }
732 uint32_t slotCountToProcess = (uint32_t)((inBbufferSize + pageSize - 1) / pageSize);
733
734 for (unsigned i = 0; i != slotCountToProcess; ++i) {
735 // Skip data pages as those may have been slid by ASLR in the extracted file
736 uint64_t fileOffset = i * csPageSize;
737 if ( (fileOffset >= mappings[1].fileOffset) && (fileOffset < (mappings[1].fileOffset + mappings[1].size)) )
738 continue;
739
740 CCDigest(dscDigestFormat, (uint8_t*)mapped_cache + fileOffset, (size_t)csPageSize, cdHashBuffer);
741 uint8_t* cacheCdHashBuffer = hashSlot + (i * cd->hashSize);
742 if (memcmp(cdHashBuffer, cacheCdHashBuffer, cd->hashSize) != 0) {
743 fprintf(stderr, "Error: dyld shared cache code signature for page %d is incorrect.\n", i);
744 return -1;
745 }
746 }
747 }
748 return 0;
749 }
750
751 int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path, const char* extraction_root_path,
752 progress_block progress)
753 {
754 struct stat statbuf;
755 if (stat(shared_cache_file_path, &statbuf)) {
756 fprintf(stderr, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path);
757 return -1;
758 }
759
760 int cache_fd = open(shared_cache_file_path, O_RDONLY);
761 if (cache_fd < 0) {
762 fprintf(stderr, "Error: failed to open shared cache file at %s\n", shared_cache_file_path);
763 return -1;
764 }
765
766 void* mapped_cache = mmap(NULL, (size_t)statbuf.st_size, PROT_READ, MAP_PRIVATE, cache_fd, 0);
767 if (mapped_cache == MAP_FAILED) {
768 fprintf(stderr, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path, errno);
769 return -1;
770 }
771
772 close(cache_fd);
773
774 // instantiate arch specific dylib maker
775 dylib_maker_func* dylib_create_func = nullptr;
776 if ( strcmp((char*)mapped_cache, "dyld_v1 i386") == 0 )
777 dylib_create_func = dylib_maker<x86>;
778 else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64") == 0 )
779 dylib_create_func = dylib_maker<x86_64>;
780 else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64h") == 0 )
781 dylib_create_func = dylib_maker<x86_64>;
782 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv5") == 0 )
783 dylib_create_func = dylib_maker<arm>;
784 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv6") == 0 )
785 dylib_create_func = dylib_maker<arm>;
786 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv7") == 0 )
787 dylib_create_func = dylib_maker<arm>;
788 else if ( strncmp((char*)mapped_cache, "dyld_v1 armv7", 14) == 0 )
789 dylib_create_func = dylib_maker<arm>;
790 else if ( strcmp((char*)mapped_cache, "dyld_v1 arm64") == 0 )
791 dylib_create_func = dylib_maker<arm64>;
792 #if SUPPORT_ARCH_arm64e
793 else if ( strcmp((char*)mapped_cache, "dyld_v1 arm64e") == 0 )
794 dylib_create_func = dylib_maker<arm64>;
795 #endif
796 #if SUPPORT_ARCH_arm64_32
797 else if ( strcmp((char*)mapped_cache, "dyld_v1arm64_32") == 0 )
798 dylib_create_func = dylib_maker<arm64_32>;
799 #endif
800 else {
801 fprintf(stderr, "Error: unrecognized dyld shared cache magic.\n");
802 munmap(mapped_cache, (size_t)statbuf.st_size);
803 return -1;
804 }
805
806 // Verify that the cache isn't corrupt.
807 if (int result = sharedCacheIsValid(mapped_cache, (uint64_t)statbuf.st_size)) {
808 munmap(mapped_cache, (size_t)statbuf.st_size);
809 return result;
810 }
811
812 // iterate through all images in cache and build map of dylibs and segments
813 __block NameToSegments map;
814 int result = 0;
815
816 result = dyld_shared_cache_iterate(mapped_cache, (uint32_t)statbuf.st_size, ^(const dyld_shared_cache_dylib_info* dylibInfo, const dyld_shared_cache_segment_info* segInfo) {
817 map[dylibInfo->path].push_back(seg_info(segInfo->name, segInfo->fileOffset, segInfo->fileSize));
818 });
819
820 if(result != 0) {
821 fprintf(stderr, "Error: dyld_shared_cache_iterate_segments_with_slide failed.\n");
822 munmap(mapped_cache, (size_t)statbuf.st_size);
823 return result;
824 }
825
826 // for each dylib instantiate a dylib file
827 SharedCacheExtractor extractor(map, extraction_root_path, dylib_create_func, mapped_cache, progress);
828 result = extractor.extractCaches();
829
830 munmap(mapped_cache, (size_t)statbuf.st_size);
831 return result;
832 }
833
834
835
836 int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path, const char* extraction_root_path)
837 {
838 return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path, extraction_root_path,
839 ^(unsigned , unsigned) {} );
840 }
841
842
843 #if 0
844 // test program
845 #include <stdio.h>
846 #include <stddef.h>
847 #include <dlfcn.h>
848
849
850 typedef int (*extractor_proc)(const char* shared_cache_file_path, const char* extraction_root_path,
851 void (^progress)(unsigned current, unsigned total));
852
853 int main(int argc, const char* argv[])
854 {
855 if ( argc != 3 ) {
856 fprintf(stderr, "usage: dsc_extractor <path-to-cache-file> <path-to-device-dir>\n");
857 return 1;
858 }
859
860 //void* handle = dlopen("/Volumes/my/src/dyld/build/Debug/dsc_extractor.bundle", RTLD_LAZY);
861 void* handle = dlopen("/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/usr/lib/dsc_extractor.bundle", RTLD_LAZY);
862 if ( handle == NULL ) {
863 fprintf(stderr, "dsc_extractor.bundle could not be loaded\n");
864 return 1;
865 }
866
867 extractor_proc proc = (extractor_proc)dlsym(handle, "dyld_shared_cache_extract_dylibs_progress");
868 if ( proc == NULL ) {
869 fprintf(stderr, "dsc_extractor.bundle did not have dyld_shared_cache_extract_dylibs_progress symbol\n");
870 return 1;
871 }
872
873 int result = (*proc)(argv[1], argv[2], ^(unsigned c, unsigned total) { printf("%d/%d\n", c, total); } );
874 fprintf(stderr, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result);
875 return 0;
876 }
877
878
879 #endif
880
881
882
883