1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2011 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
34 #include <sys/syslimits.h>
35 #include <libkern/OSByteOrder.h>
36 #include <mach-o/arch.h>
37 #include <mach-o/loader.h>
38 #include <Availability.h>
40 #include "CodeSigningTypes.h"
41 #include <CommonCrypto/CommonHMAC.h>
42 #include <CommonCrypto/CommonDigest.h>
43 #include <CommonCrypto/CommonDigestSPI.h>
46 #include "Architectures.hpp"
47 #include "MachOFileAbstraction.hpp"
49 #include "dsc_iterator.h"
50 #include "dsc_extractor.h"
51 #include "DyldSharedCache.h"
52 #include "MachOAnalyzer.h"
53 #include "SupportedArchs.h"
59 #include <unordered_map>
61 #include <dispatch/dispatch.h>
65 seg_info(const char* n
, uint64_t o
, uint64_t s
)
66 : segName(n
), offset(o
), sizem(s
) { }
74 size_t operator()(const char* __s
) const {
83 bool operator()(const char* left
, const char* right
) const { return (strcmp(left
, right
) == 0); }
85 typedef std::unordered_map
<const char*, std::vector
<seg_info
>, CStringHash
, CStringEquals
> NameToSegments
;
87 // Filter to find individual symbol re-exports in trie
88 class NotReExportSymbol
{
90 NotReExportSymbol(const std::set
<int> &rd
) :_reexportDeps(rd
) {}
91 bool operator()(const ExportInfoTrie::Entry
&entry
) const {
92 return isSymbolReExport(entry
);
95 bool isSymbolReExport(const ExportInfoTrie::Entry
&entry
) const {
96 if ( (entry
.info
.flags
& EXPORT_SYMBOL_FLAGS_KIND_MASK
) != EXPORT_SYMBOL_FLAGS_KIND_REGULAR
)
98 if ( (entry
.info
.flags
& EXPORT_SYMBOL_FLAGS_REEXPORT
) == 0 )
100 // If the symbol comes from a dylib that is re-exported, this is not an individual symbol re-export
101 if ( _reexportDeps
.count((int)entry
.info
.other
) != 0 )
105 const std::set
<int> &_reexportDeps
;
108 template <typename P
>
109 struct LoadCommandInfo
{
112 template <typename A
>
113 class LinkeditOptimizer
{
114 typedef typename
A::P P
;
115 typedef typename
A::P::E E
;
116 typedef typename
A::P::uint_t pint_t
;
119 macho_segment_command
<P
>* linkEditSegCmd
= nullptr;
120 symtab_command
* symtab
= nullptr;
121 dysymtab_command
* dynamicSymTab
= nullptr;
122 linkedit_data_command
* functionStarts
= nullptr;
123 linkedit_data_command
* dataInCode
= nullptr;
124 uint32_t exportsTrieOffset
= 0;
125 uint32_t exportsTrieSize
= 0;
126 std::set
<int> reexportDeps
;
130 void optimize_loadcommands(dyld3::MachOAnalyzer
* mh
)
132 // update header flags
133 mh
->flags
&= 0x7FFFFFFF; // remove in-cache bit
135 // update load commands
136 __block
uint64_t cumulativeFileSize
= 0;
137 __block
int depIndex
= 0;
139 mh
->forEachLoadCommand(diag
, ^(const load_command
* cmd
, bool &stop
) {
140 switch ( cmd
->cmd
) {
141 case macho_segment_command
<P
>::CMD
: {
142 auto segCmd
= (macho_segment_command
<P
>*)cmd
;
143 segCmd
->set_fileoff(cumulativeFileSize
);
144 segCmd
->set_filesize(segCmd
->vmsize());
146 auto const sectionsStart
= (macho_section
<P
>*)((char*)segCmd
+ sizeof(macho_segment_command
<P
>));
147 auto const sectionsEnd
= §ionsStart
[segCmd
->nsects()];
148 for (auto sect
= sectionsStart
; sect
< sectionsEnd
; ++sect
) {
149 if ( sect
->offset() != 0 ) {
150 sect
->set_offset((uint32_t)(cumulativeFileSize
+ sect
->addr() - segCmd
->vmaddr()));
153 if ( strcmp(segCmd
->segname(), "__LINKEDIT") == 0 )
154 linkEditSegCmd
= segCmd
;
155 cumulativeFileSize
+= segCmd
->filesize();
157 case LC_DYLD_INFO_ONLY
: {
158 // zero out all dyld info. lldb only uses symbol table
159 auto dyldInfo
= (dyld_info_command
*)cmd
;
160 exportsTrieOffset
= dyldInfo
->export_off
;
161 exportsTrieSize
= dyldInfo
->export_size
;
162 dyldInfo
->rebase_off
= 0;
163 dyldInfo
->rebase_size
= 0;
164 dyldInfo
->bind_off
= 0;
165 dyldInfo
->bind_size
= 0;
166 dyldInfo
->weak_bind_off
= 0;
167 dyldInfo
->weak_bind_size
= 0;
168 dyldInfo
->lazy_bind_off
= 0;
169 dyldInfo
->lazy_bind_size
= 0;
170 dyldInfo
->export_off
= 0;
171 dyldInfo
->export_size
= 0;
173 case LC_DYLD_EXPORTS_TRIE
: {
174 // don't put export trie into extracted dylib. lldb only uses symbol table
175 linkedit_data_command
* exportsTrie
= (linkedit_data_command
*)cmd
;
176 exportsTrieOffset
= exportsTrie
->dataoff
;
177 exportsTrieSize
= exportsTrie
->datasize
;
178 exportsTrie
->dataoff
= 0;
179 exportsTrie
->datasize
= 0;
182 symtab
= (symtab_command
*)cmd
;
185 dynamicSymTab
= (dysymtab_command
*)cmd
;
187 case LC_FUNCTION_STARTS
:
188 functionStarts
= (linkedit_data_command
*)cmd
;
190 case LC_DATA_IN_CODE
:
191 dataInCode
= (linkedit_data_command
*)cmd
;
194 case LC_LOAD_WEAK_DYLIB
:
195 case LC_REEXPORT_DYLIB
:
196 case LC_LOAD_UPWARD_DYLIB
:
198 if ( cmd
->cmd
== LC_REEXPORT_DYLIB
) {
199 reexportDeps
.insert(depIndex
);
207 mh
->removeLoadCommand(diag
, ^(const load_command
* cmd
, bool& remove
, bool &stop
) {
208 switch ( cmd
->cmd
) {
209 case LC_SEGMENT_SPLIT_INFO
:
210 // <rdar://problem/23212513> dylibs iOS 9 dyld caches have bogus LC_SEGMENT_SPLIT_INFO
220 int optimize_linkedit(std::vector
<uint8_t> &new_linkedit_data
, uint64_t textOffsetInCache
, const void* mapped_cache
)
222 // rebuild symbol table
223 if ( linkEditSegCmd
== nullptr ) {
224 fprintf(stderr
, "__LINKEDIT not found\n");
227 if ( symtab
== nullptr ) {
228 fprintf(stderr
, "LC_SYMTAB not found\n");
231 if ( dynamicSymTab
== nullptr ) {
232 fprintf(stderr
, "LC_DYSYMTAB not found\n");
236 const uint64_t newFunctionStartsOffset
= new_linkedit_data
.size();
237 uint32_t functionStartsSize
= 0;
238 if ( functionStarts
!= NULL
) {
239 // copy function starts from original cache file to new mapped dylib file
240 functionStartsSize
= functionStarts
->datasize
;
241 new_linkedit_data
.insert(new_linkedit_data
.end(),
242 (char*)mapped_cache
+ functionStarts
->dataoff
,
243 (char*)mapped_cache
+ functionStarts
->dataoff
+ functionStartsSize
);
247 while ((linkEditSegCmd
->fileoff() + new_linkedit_data
.size()) % sizeof(pint_t
))
248 new_linkedit_data
.push_back(0);
250 const uint64_t newDataInCodeOffset
= new_linkedit_data
.size();
251 uint32_t dataInCodeSize
= 0;
252 if ( dataInCode
!= NULL
) {
253 // copy data-in-code info from original cache file to new mapped dylib file
254 dataInCodeSize
= dataInCode
->datasize
;
255 new_linkedit_data
.insert(new_linkedit_data
.end(),
256 (char*)mapped_cache
+ dataInCode
->dataoff
,
257 (char*)mapped_cache
+ dataInCode
->dataoff
+ dataInCodeSize
);
260 std::vector
<ExportInfoTrie::Entry
> exports
;
261 if ( exportsTrieSize
!= 0 ) {
262 const uint8_t* exportsStart
= ((uint8_t*)mapped_cache
) + exportsTrieOffset
;
263 const uint8_t* exportsEnd
= &exportsStart
[exportsTrieSize
];
264 ExportInfoTrie::parseTrie(exportsStart
, exportsEnd
, exports
);
265 exports
.erase(std::remove_if(exports
.begin(), exports
.end(), NotReExportSymbol(reexportDeps
)), exports
.end());
268 const DyldSharedCache
* cache
= (DyldSharedCache
*)mapped_cache
;
269 macho_nlist
<P
>* allLocalNlists
= (macho_nlist
<P
>*)cache
->getLocalNlistEntries();
270 __block macho_nlist
<P
>* localNlists
= nullptr;
271 __block
uint32_t localNlistCount
= 0;
272 cache
->forEachLocalSymbolEntry(^(uint32_t dylibOffset
, uint32_t nlistStartIndex
, uint32_t nlistCount
, bool& stop
){
273 if (dylibOffset
== textOffsetInCache
) {
274 localNlists
= &allLocalNlists
[nlistStartIndex
];
275 localNlistCount
= nlistCount
;
279 // compute number of symbols in new symbol table
280 const macho_nlist
<P
>* const mergedSymTabStart
= (macho_nlist
<P
>*)(((uint8_t*)mapped_cache
) + symtab
->symoff
);
281 const macho_nlist
<P
>* const mergedSymTabend
= &mergedSymTabStart
[symtab
->nsyms
];
282 uint32_t newSymCount
= symtab
->nsyms
;
283 if ( localNlistCount
!= 0 ) {
284 newSymCount
= localNlistCount
;
285 for (const macho_nlist
<P
>* s
= mergedSymTabStart
; s
!= mergedSymTabend
; ++s
) {
286 // skip any locals in cache
287 if ( (s
->n_type() & (N_TYPE
|N_EXT
)) == N_SECT
)
293 // add room for N_INDR symbols for re-exported symbols
294 newSymCount
+= exports
.size();
296 // copy symbol entries and strings from original cache file to new mapped dylib file
297 const char* mergedStringPoolStart
= (char*)mapped_cache
+ symtab
->stroff
;
298 const char* mergedStringPoolEnd
= &mergedStringPoolStart
[symtab
->strsize
];
300 // First count how many entries we need
301 std::vector
<macho_nlist
<P
>> newSymTab
;
302 newSymTab
.reserve(newSymCount
);
303 std::vector
<char> newSymNames
;
305 // first pool entry is always empty string
306 newSymNames
.push_back('\0');
308 for (const macho_nlist
<P
>* s
= mergedSymTabStart
; s
!= mergedSymTabend
; ++s
) {
309 // if we have better local symbol info, skip any locals here
310 if ( (localNlists
!= NULL
) && ((s
->n_type() & (N_TYPE
|N_EXT
)) == N_SECT
) )
312 macho_nlist
<P
> t
= *s
;
313 t
.set_n_strx((uint32_t)newSymNames
.size());
314 const char* symName
= &mergedStringPoolStart
[s
->n_strx()];
315 if ( symName
> mergedStringPoolEnd
)
316 symName
= "<corrupt symbol name>";
317 newSymNames
.insert(newSymNames
.end(),
319 symName
+ (strlen(symName
) + 1));
320 newSymTab
.push_back(t
);
322 // <rdar://problem/16529213> recreate N_INDR symbols in extracted dylibs for debugger
323 for (std::vector
<ExportInfoTrie::Entry
>::iterator it
= exports
.begin(); it
!= exports
.end(); ++it
) {
325 memset(&t
, 0, sizeof(t
));
326 t
.set_n_strx((uint32_t)newSymNames
.size());
327 t
.set_n_type(N_INDR
| N_EXT
);
330 newSymNames
.insert(newSymNames
.end(),
332 it
->name
.c_str() + (it
->name
.size() + 1));
333 const char* importName
= it
->info
.importName
.c_str();
334 if ( *importName
== '\0' )
335 importName
= it
->name
.c_str();
336 t
.set_n_value(newSymNames
.size());
337 newSymNames
.insert(newSymNames
.end(),
339 importName
+ (strlen(importName
) + 1));
340 newSymTab
.push_back(t
);
342 if ( localNlistCount
!= 0 ) {
343 const char* localStrings
= cache
->getLocalStrings();
344 // update load command to reflect new count of locals
345 dynamicSymTab
->ilocalsym
= (uint32_t)newSymTab
.size();
346 dynamicSymTab
->nlocalsym
= localNlistCount
;
347 // copy local symbols
348 for (uint32_t i
=0; i
< localNlistCount
; ++i
) {
349 const char* localName
= &localStrings
[localNlists
[i
].n_strx()];
350 if ( localName
> localStrings
+ cache
->getLocalStringsSize() )
351 localName
= "<corrupt local symbol name>";
352 macho_nlist
<P
> t
= localNlists
[i
];
353 t
.set_n_strx((uint32_t)newSymNames
.size());
354 newSymNames
.insert(newSymNames
.end(),
356 localName
+ (strlen(localName
) + 1));
357 newSymTab
.push_back(t
);
361 if ( newSymCount
!= newSymTab
.size() ) {
362 fprintf(stderr
, "symbol count miscalculation\n");
366 //const uint64_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
367 //macho_nlist<P>* const newSymTabStart = (macho_nlist<P>*)(((uint8_t*)mh) + newSymTabOffset);
368 //char* const newStringPoolStart = (char*)mh + newStringPoolOffset;
371 while ((linkEditSegCmd
->fileoff() + new_linkedit_data
.size()) % sizeof(pint_t
))
372 new_linkedit_data
.push_back(0);
374 const uint64_t newSymTabOffset
= new_linkedit_data
.size();
377 for (macho_nlist
<P
>& sym
: newSymTab
) {
378 uint8_t symData
[sizeof(macho_nlist
<P
>)];
379 memcpy(&symData
, &sym
, sizeof(sym
));
380 new_linkedit_data
.insert(new_linkedit_data
.end(), &symData
[0], &symData
[sizeof(macho_nlist
<P
>)]);
383 const uint64_t newIndSymTabOffset
= new_linkedit_data
.size();
385 // Copy indirect symbol table
386 const uint32_t* mergedIndSymTab
= (uint32_t*)((char*)mapped_cache
+ dynamicSymTab
->indirectsymoff
);
387 new_linkedit_data
.insert(new_linkedit_data
.end(),
388 (char*)mergedIndSymTab
,
389 (char*)(mergedIndSymTab
+ dynamicSymTab
->nindirectsyms
));
391 const uint64_t newStringPoolOffset
= new_linkedit_data
.size();
393 // pointer align string pool size
394 while (newSymNames
.size() % sizeof(pint_t
))
395 newSymNames
.push_back('\0');
397 new_linkedit_data
.insert(new_linkedit_data
.end(), newSymNames
.begin(), newSymNames
.end());
399 // update load commands
400 if ( functionStarts
!= NULL
) {
401 functionStarts
->dataoff
= (uint32_t)(newFunctionStartsOffset
+ linkEditSegCmd
->fileoff());
402 functionStarts
->datasize
= functionStartsSize
;
404 if ( dataInCode
!= NULL
) {
405 dataInCode
->dataoff
= (uint32_t)(newDataInCodeOffset
+ linkEditSegCmd
->fileoff());
406 dataInCode
->datasize
= dataInCodeSize
;
409 symtab
->nsyms
= newSymCount
;
410 symtab
->symoff
= (uint32_t)(newSymTabOffset
+ linkEditSegCmd
->fileoff());
411 symtab
->stroff
= (uint32_t)(newStringPoolOffset
+ linkEditSegCmd
->fileoff());
412 symtab
->strsize
= (uint32_t)newSymNames
.size();
413 dynamicSymTab
->extreloff
= 0;
414 dynamicSymTab
->nextrel
= 0;
415 dynamicSymTab
->locreloff
= 0;
416 dynamicSymTab
->nlocrel
= 0;
417 dynamicSymTab
->indirectsymoff
= (uint32_t)(newIndSymTabOffset
+ linkEditSegCmd
->fileoff());
418 linkEditSegCmd
->set_filesize(symtab
->stroff
+ symtab
->strsize
- linkEditSegCmd
->fileoff());
419 linkEditSegCmd
->set_vmsize((linkEditSegCmd
->filesize() + 4095) & (-4096));
426 static void make_dirs(const char* file_path
)
428 //printf("make_dirs(%s)\n", file_path);
429 char dirs
[strlen(file_path
)+1];
430 strcpy(dirs
, file_path
);
431 char* lastSlash
= strrchr(dirs
, '/');
432 if ( lastSlash
== NULL
)
435 struct stat stat_buf
;
436 if ( stat(dirs
, &stat_buf
) != 0 ) {
437 char* afterSlash
= &dirs
[1];
439 while ( (slash
= strchr(afterSlash
, '/')) != NULL
) {
441 ::mkdir(dirs
, S_IRWXU
| S_IRGRP
|S_IXGRP
| S_IROTH
|S_IXOTH
);
442 //printf("mkdir(%s)\n", dirs);
444 afterSlash
= slash
+1;
451 template <typename A
>
452 void dylib_maker(const void* mapped_cache
, std::vector
<uint8_t> &dylib_data
, const std::vector
<seg_info
>& segments
) {
453 typedef typename
A::P P
;
455 size_t additionalSize
= 0;
456 for(std::vector
<seg_info
>::const_iterator it
=segments
.begin(); it
!= segments
.end(); ++it
) {
457 if ( strcmp(it
->segName
, "__LINKEDIT") != 0 )
458 additionalSize
+= it
->sizem
;
461 std::vector
<uint8_t> new_dylib_data
;
462 new_dylib_data
.reserve(additionalSize
);
464 // Write regular segments into the buffer
465 uint64_t textOffsetInCache
= 0;
466 for( std::vector
<seg_info
>::const_iterator it
=segments
.begin(); it
!= segments
.end(); ++it
) {
468 if(strcmp(it
->segName
, "__TEXT") == 0 )
469 textOffsetInCache
= it
->offset
;
471 //printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem);
472 // Copy all but the __LINKEDIT. It will be copied later during the optimizer in to a temporary buffer but it would
473 // not be efficient to copy it all now for each dylib.
474 if (strcmp(it
->segName
, "__LINKEDIT") == 0 )
476 std::copy(((uint8_t*)mapped_cache
)+it
->offset
, ((uint8_t*)mapped_cache
)+it
->offset
+it
->sizem
, std::back_inserter(new_dylib_data
));
480 std::vector
<uint8_t> new_linkedit_data
;
481 new_linkedit_data
.reserve(1 << 20);
483 LinkeditOptimizer
<A
> linkeditOptimizer
;
484 dyld3::MachOAnalyzer
* mh
= (dyld3::MachOAnalyzer
*)&new_dylib_data
.front();
485 linkeditOptimizer
.optimize_loadcommands(mh
);
486 linkeditOptimizer
.optimize_linkedit(new_linkedit_data
, textOffsetInCache
, mapped_cache
);
488 new_dylib_data
.insert(new_dylib_data
.end(), new_linkedit_data
.begin(), new_linkedit_data
.end());
491 while (new_dylib_data
.size() % 4096)
492 new_dylib_data
.push_back(0);
494 dylib_data
.insert(dylib_data
.end(), new_dylib_data
.begin(), new_dylib_data
.end());
497 typedef __typeof(dylib_maker
<x86
>) dylib_maker_func
;
498 typedef void (^progress_block
)(unsigned current
, unsigned total
);
500 class SharedCacheExtractor
;
501 struct SharedCacheDylibExtractor
{
502 SharedCacheDylibExtractor(const char* name
, std::vector
<seg_info
> segInfo
)
503 : name(name
), segInfo(segInfo
) { }
505 void extractCache(SharedCacheExtractor
& context
);
508 const std::vector
<seg_info
> segInfo
;
512 struct SharedCacheExtractor
{
513 SharedCacheExtractor(const NameToSegments
& map
,
514 const char* extraction_root_path
,
515 dylib_maker_func
* dylib_create_func
,
517 progress_block progress
)
518 : map(map
), extraction_root_path(extraction_root_path
),
519 dylib_create_func(dylib_create_func
), mapped_cache(mapped_cache
),
522 extractors
.reserve(map
.size());
524 extractors
.emplace_back(it
.first
, it
.second
);
526 // Limit the number of open files. 16 seems to give better performance than higher numbers.
527 sema
= dispatch_semaphore_create(16);
531 static void extractCache(void *ctx
, size_t i
);
533 const NameToSegments
& map
;
534 std::vector
<SharedCacheDylibExtractor
> extractors
;
535 dispatch_semaphore_t sema
;
536 const char* extraction_root_path
;
537 dylib_maker_func
* dylib_create_func
;
539 progress_block progress
;
540 std::atomic_int count
= { 0 };
543 int SharedCacheExtractor::extractCaches() {
544 dispatch_queue_t process_queue
= dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW
, 0);
545 dispatch_apply_f(map
.size(), process_queue
,
549 for (const SharedCacheDylibExtractor
& extractor
: extractors
) {
550 if (extractor
.result
!= 0) {
551 result
= extractor
.result
;
558 void SharedCacheExtractor::extractCache(void *ctx
, size_t i
) {
559 SharedCacheExtractor
& context
= *(SharedCacheExtractor
*)ctx
;
560 dispatch_semaphore_wait(context
.sema
, DISPATCH_TIME_FOREVER
);
561 context
.extractors
[i
].extractCache(context
);
562 dispatch_semaphore_signal(context
.sema
);
565 void SharedCacheDylibExtractor::extractCache(SharedCacheExtractor
&context
) {
567 char dylib_path
[PATH_MAX
];
568 strcpy(dylib_path
, context
.extraction_root_path
);
569 strcat(dylib_path
, "/");
570 strcat(dylib_path
, name
);
572 //printf("%s with %lu segments\n", dylib_path, it->second.size());
573 // make sure all directories in this path exist
574 make_dirs(dylib_path
);
576 // open file, create if does not already exist
577 int fd
= ::open(dylib_path
, O_CREAT
| O_TRUNC
| O_EXLOCK
| O_RDWR
, 0644);
579 fprintf(stderr
, "can't open or create dylib file %s, errnor=%d\n", dylib_path
, errno
);
584 std::vector
<uint8_t> vec
;
585 context
.dylib_create_func(context
.mapped_cache
, vec
, segInfo
);
586 context
.progress(context
.count
++, (unsigned)context
.map
.size());
589 if( write(fd
, &vec
.front(), vec
.size()) == -1) {
590 fprintf(stderr
, "error writing, errnor=%d\n", errno
);
597 static int sharedCacheIsValid(const void* mapped_cache
, uint64_t size
) {
598 // First check that the size is good.
599 // Note the shared cache may not have a codeSignatureSize value set so we need to first make
600 // sure we have space for the CS_SuperBlob, then later crack that to check for the size of the rest.
601 const DyldSharedCache
* dyldSharedCache
= (DyldSharedCache
*)mapped_cache
;
602 uint64_t requiredSizeForCSSuperBlob
= dyldSharedCache
->header
.codeSignatureOffset
+ sizeof(CS_SuperBlob
);
603 const dyld_cache_mapping_info
* mappings
= (dyld_cache_mapping_info
*)((uint8_t*)mapped_cache
+ dyldSharedCache
->header
.mappingOffset
);
604 if ( requiredSizeForCSSuperBlob
> size
) {
605 fprintf(stderr
, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size
, requiredSizeForCSSuperBlob
);
609 // Now see if the code signatures are valid as that tells us the pages aren't corrupt.
610 // First find all of the regions of the shared cache we computed cd hashes
611 std::vector
<std::pair
<uint64_t, uint64_t>> sharedCacheRegions
;
612 for (uint32_t i
= 0; i
!= dyldSharedCache
->header
.mappingCount
; ++i
) {
613 sharedCacheRegions
.emplace_back(std::make_pair(mappings
[i
].fileOffset
, mappings
[i
].fileOffset
+ mappings
[i
].size
));
615 if (dyldSharedCache
->header
.localSymbolsSize
)
616 sharedCacheRegions
.emplace_back(std::make_pair(dyldSharedCache
->header
.localSymbolsOffset
, dyldSharedCache
->header
.localSymbolsOffset
+ dyldSharedCache
->header
.localSymbolsSize
));
617 size_t inBbufferSize
= 0;
618 for (auto& sharedCacheRegion
: sharedCacheRegions
)
619 inBbufferSize
+= (sharedCacheRegion
.second
- sharedCacheRegion
.first
);
621 // Now take the cd hash from the cache itself and validate the regions we found.
622 uint8_t* codeSignatureRegion
= (uint8_t*)mapped_cache
+ dyldSharedCache
->header
.codeSignatureOffset
;
623 CS_SuperBlob
* sb
= reinterpret_cast<CS_SuperBlob
*>(codeSignatureRegion
);
624 if (sb
->magic
!= htonl(CSMAGIC_EMBEDDED_SIGNATURE
)) {
625 fprintf(stderr
, "Error: dyld shared cache code signature magic is incorrect.\n");
629 size_t sbSize
= ntohl(sb
->length
);
630 uint64_t requiredSizeForCS
= dyldSharedCache
->header
.codeSignatureOffset
+ sbSize
;
631 if ( requiredSizeForCS
> size
) {
632 fprintf(stderr
, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size
, requiredSizeForCS
);
636 // Find the offset to the code directory.
637 CS_CodeDirectory
* cd
= nullptr;
638 for (unsigned i
=0; i
!= sb
->count
; ++i
) {
639 if (ntohl(sb
->index
[i
].type
) == CSSLOT_CODEDIRECTORY
) {
640 cd
= (CS_CodeDirectory
*)(codeSignatureRegion
+ ntohl(sb
->index
[i
].offset
));
646 fprintf(stderr
, "Error: dyld shared cache code signature directory is missing.\n");
650 if ( (uint8_t*)cd
> (codeSignatureRegion
+ sbSize
) ) {
651 fprintf(stderr
, "Error: dyld shared cache code signature directory is out of bounds.\n");
655 if ( cd
->magic
!= htonl(CSMAGIC_CODEDIRECTORY
) ) {
656 fprintf(stderr
, "Error: dyld shared cache code signature directory magic is incorrect.\n");
660 uint32_t pageSize
= 1 << cd
->pageSize
;
661 uint32_t slotCountFromRegions
= (uint32_t)((inBbufferSize
+ pageSize
- 1) / pageSize
);
662 if ( ntohl(cd
->nCodeSlots
) < slotCountFromRegions
) {
663 fprintf(stderr
, "Error: dyld shared cache code signature directory num slots is incorrect.\n");
667 uint32_t dscDigestFormat
= kCCDigestNone
;
668 switch (cd
->hashType
) {
669 case CS_HASHTYPE_SHA1
:
670 #pragma clang diagnostic push
671 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
672 dscDigestFormat
= kCCDigestSHA1
;
673 #pragma clang diagnostic pop
675 case CS_HASHTYPE_SHA256
:
676 dscDigestFormat
= kCCDigestSHA256
;
682 if (dscDigestFormat
!= kCCDigestNone
) {
683 const uint64_t csPageSize
= 1 << cd
->pageSize
;
684 size_t hashOffset
= ntohl(cd
->hashOffset
);
685 uint8_t* hashSlot
= (uint8_t*)cd
+ hashOffset
;
686 uint8_t cdHashBuffer
[cd
->hashSize
];
688 // Skip local symbols for now as those aren't being codesign correctly right now.
689 size_t inBbufferSize
= 0;
690 for (auto& sharedCacheRegion
: sharedCacheRegions
) {
691 if (sharedCacheRegion
.first
== dyldSharedCache
->header
.localSymbolsOffset
)
693 inBbufferSize
+= (sharedCacheRegion
.second
- sharedCacheRegion
.first
);
695 uint32_t slotCountToProcess
= (uint32_t)((inBbufferSize
+ pageSize
- 1) / pageSize
);
697 for (unsigned i
= 0; i
!= slotCountToProcess
; ++i
) {
698 // Skip data pages as those may have been slid by ASLR in the extracted file
699 uint64_t fileOffset
= i
* csPageSize
;
700 bool isDataPage
= false;
701 for (unsigned mappingIndex
= 1; mappingIndex
!= (dyldSharedCache
->header
.mappingCount
- 1); ++mappingIndex
) {
702 if ( (fileOffset
>= mappings
[mappingIndex
].fileOffset
) && (fileOffset
< (mappings
[mappingIndex
].fileOffset
+ mappings
[mappingIndex
].size
)) ) {
710 CCDigest(dscDigestFormat
, (uint8_t*)mapped_cache
+ fileOffset
, (size_t)csPageSize
, cdHashBuffer
);
711 uint8_t* cacheCdHashBuffer
= hashSlot
+ (i
* cd
->hashSize
);
712 if (memcmp(cdHashBuffer
, cacheCdHashBuffer
, cd
->hashSize
) != 0) {
713 fprintf(stderr
, "Error: dyld shared cache code signature for page %d is incorrect.\n", i
);
721 int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path
, const char* extraction_root_path
,
722 progress_block progress
)
725 if (stat(shared_cache_file_path
, &statbuf
)) {
726 fprintf(stderr
, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path
);
730 int cache_fd
= open(shared_cache_file_path
, O_RDONLY
);
732 fprintf(stderr
, "Error: failed to open shared cache file at %s\n", shared_cache_file_path
);
736 void* mapped_cache
= mmap(NULL
, (size_t)statbuf
.st_size
, PROT_READ
, MAP_PRIVATE
, cache_fd
, 0);
737 if (mapped_cache
== MAP_FAILED
) {
738 fprintf(stderr
, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path
, errno
);
744 // instantiate arch specific dylib maker
745 dylib_maker_func
* dylib_create_func
= nullptr;
746 if ( strcmp((char*)mapped_cache
, "dyld_v1 i386") == 0 )
747 dylib_create_func
= dylib_maker
<x86
>;
748 else if ( strcmp((char*)mapped_cache
, "dyld_v1 x86_64") == 0 )
749 dylib_create_func
= dylib_maker
<x86_64
>;
750 else if ( strcmp((char*)mapped_cache
, "dyld_v1 x86_64h") == 0 )
751 dylib_create_func
= dylib_maker
<x86_64
>;
752 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv5") == 0 )
753 dylib_create_func
= dylib_maker
<arm
>;
754 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv6") == 0 )
755 dylib_create_func
= dylib_maker
<arm
>;
756 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv7") == 0 )
757 dylib_create_func
= dylib_maker
<arm
>;
758 else if ( strncmp((char*)mapped_cache
, "dyld_v1 armv7", 14) == 0 )
759 dylib_create_func
= dylib_maker
<arm
>;
760 else if ( strcmp((char*)mapped_cache
, "dyld_v1 arm64") == 0 )
761 dylib_create_func
= dylib_maker
<arm64
>;
762 #if SUPPORT_ARCH_arm64e
763 else if ( strcmp((char*)mapped_cache
, "dyld_v1 arm64e") == 0 )
764 dylib_create_func
= dylib_maker
<arm64
>;
766 #if SUPPORT_ARCH_arm64_32
767 else if ( strcmp((char*)mapped_cache
, "dyld_v1arm64_32") == 0 )
768 dylib_create_func
= dylib_maker
<arm64_32
>;
771 fprintf(stderr
, "Error: unrecognized dyld shared cache magic.\n");
772 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
776 // Verify that the cache isn't corrupt.
777 if (int result
= sharedCacheIsValid(mapped_cache
, (uint64_t)statbuf
.st_size
)) {
778 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
782 // iterate through all images in cache and build map of dylibs and segments
783 __block NameToSegments map
;
786 result
= dyld_shared_cache_iterate(mapped_cache
, (uint32_t)statbuf
.st_size
, ^(const dyld_shared_cache_dylib_info
* dylibInfo
, const dyld_shared_cache_segment_info
* segInfo
) {
787 map
[dylibInfo
->path
].push_back(seg_info(segInfo
->name
, segInfo
->fileOffset
, segInfo
->fileSize
));
791 fprintf(stderr
, "Error: dyld_shared_cache_iterate_segments_with_slide failed.\n");
792 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
796 // for each dylib instantiate a dylib file
797 SharedCacheExtractor
extractor(map
, extraction_root_path
, dylib_create_func
, mapped_cache
, progress
);
798 result
= extractor
.extractCaches();
800 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
806 int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path
, const char* extraction_root_path
)
808 return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path
, extraction_root_path
,
809 ^(unsigned , unsigned) {} );
820 typedef int (*extractor_proc
)(const char* shared_cache_file_path
, const char* extraction_root_path
,
821 void (^progress
)(unsigned current
, unsigned total
));
823 int main(int argc
, const char* argv
[])
826 fprintf(stderr
, "usage: dsc_extractor <path-to-cache-file> <path-to-device-dir>\n");
830 //void* handle = dlopen("/Volumes/my/src/dyld/build/Debug/dsc_extractor.bundle", RTLD_LAZY);
831 void* handle
= dlopen("/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/usr/lib/dsc_extractor.bundle", RTLD_LAZY
);
832 if ( handle
== NULL
) {
833 fprintf(stderr
, "dsc_extractor.bundle could not be loaded\n");
837 extractor_proc proc
= (extractor_proc
)dlsym(handle
, "dyld_shared_cache_extract_dylibs_progress");
838 if ( proc
== NULL
) {
839 fprintf(stderr
, "dsc_extractor.bundle did not have dyld_shared_cache_extract_dylibs_progress symbol\n");
843 int result
= (*proc
)(argv
[1], argv
[2], ^(unsigned c
, unsigned total
) { printf("%d/%d\n", c
, total
); } );
844 fprintf(stderr
, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result
);