1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2011 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
34 #include <sys/syslimits.h>
35 #include <libkern/OSByteOrder.h>
36 #include <mach-o/arch.h>
37 #include <mach-o/loader.h>
38 #include <Availability.h>
40 #include "CodeSigningTypes.h"
41 #include <CommonCrypto/CommonHMAC.h>
42 #include <CommonCrypto/CommonDigest.h>
43 #include <CommonCrypto/CommonDigestSPI.h>
46 #include "Architectures.hpp"
47 #include "MachOFileAbstraction.hpp"
49 #include "dsc_iterator.h"
50 #include "dsc_extractor.h"
51 #include "DyldSharedCache.h"
52 #include "MachOAnalyzer.h"
53 #include "SupportedArchs.h"
59 #include <unordered_map>
61 #include <dispatch/dispatch.h>
65 seg_info(const char* n
, uint64_t o
, uint64_t s
)
66 : segName(n
), offset(o
), sizem(s
) { }
74 size_t operator()(const char* __s
) const {
83 bool operator()(const char* left
, const char* right
) const { return (strcmp(left
, right
) == 0); }
85 typedef std::unordered_map
<const char*, std::vector
<seg_info
>, CStringHash
, CStringEquals
> NameToSegments
;
87 // Filter to find individual symbol re-exports in trie
88 class NotReExportSymbol
{
90 NotReExportSymbol(const std::set
<int> &rd
) :_reexportDeps(rd
) {}
91 bool operator()(const ExportInfoTrie::Entry
&entry
) const {
92 return isSymbolReExport(entry
);
95 bool isSymbolReExport(const ExportInfoTrie::Entry
&entry
) const {
96 if ( (entry
.info
.flags
& EXPORT_SYMBOL_FLAGS_KIND_MASK
) != EXPORT_SYMBOL_FLAGS_KIND_REGULAR
)
98 if ( (entry
.info
.flags
& EXPORT_SYMBOL_FLAGS_REEXPORT
) == 0 )
100 // If the symbol comes from a dylib that is re-exported, this is not an individual symbol re-export
101 if ( _reexportDeps
.count((int)entry
.info
.other
) != 0 )
105 const std::set
<int> &_reexportDeps
;
108 template <typename P
>
109 struct LoadCommandInfo
{
112 template <typename A
>
113 class LinkeditOptimizer
{
114 typedef typename
A::P P
;
115 typedef typename
A::P::E E
;
116 typedef typename
A::P::uint_t pint_t
;
119 macho_segment_command
<P
>* linkEditSegCmd
= nullptr;
120 symtab_command
* symtab
= nullptr;
121 dysymtab_command
* dynamicSymTab
= nullptr;
122 linkedit_data_command
* functionStarts
= nullptr;
123 linkedit_data_command
* dataInCode
= nullptr;
124 uint32_t exportsTrieOffset
= 0;
125 uint32_t exportsTrieSize
= 0;
126 std::set
<int> reexportDeps
;
130 void optimize_loadcommands(dyld3::MachOAnalyzer
* mh
)
132 // update header flags
133 mh
->flags
&= 0x7FFFFFFF; // remove in-cache bit
135 // update load commands
136 __block
uint64_t cumulativeFileSize
= 0;
137 __block
int depIndex
= 0;
139 mh
->forEachLoadCommand(diag
, ^(const load_command
* cmd
, bool &stop
) {
140 switch ( cmd
->cmd
) {
141 case macho_segment_command
<P
>::CMD
: {
142 auto segCmd
= (macho_segment_command
<P
>*)cmd
;
143 segCmd
->set_fileoff(cumulativeFileSize
);
144 segCmd
->set_filesize(segCmd
->vmsize());
146 auto const sectionsStart
= (macho_section
<P
>*)((char*)segCmd
+ sizeof(macho_segment_command
<P
>));
147 auto const sectionsEnd
= §ionsStart
[segCmd
->nsects()];
148 for (auto sect
= sectionsStart
; sect
< sectionsEnd
; ++sect
) {
149 if ( sect
->offset() != 0 ) {
150 sect
->set_offset((uint32_t)(cumulativeFileSize
+ sect
->addr() - segCmd
->vmaddr()));
153 if ( strcmp(segCmd
->segname(), "__LINKEDIT") == 0 )
154 linkEditSegCmd
= segCmd
;
155 cumulativeFileSize
+= segCmd
->filesize();
157 case LC_DYLD_INFO_ONLY
: {
158 // zero out all dyld info. lldb only uses symbol table
159 auto dyldInfo
= (dyld_info_command
*)cmd
;
160 exportsTrieOffset
= dyldInfo
->export_off
;
161 exportsTrieSize
= dyldInfo
->export_size
;
162 dyldInfo
->rebase_off
= 0;
163 dyldInfo
->rebase_size
= 0;
164 dyldInfo
->bind_off
= 0;
165 dyldInfo
->bind_size
= 0;
166 dyldInfo
->weak_bind_off
= 0;
167 dyldInfo
->weak_bind_size
= 0;
168 dyldInfo
->lazy_bind_off
= 0;
169 dyldInfo
->lazy_bind_size
= 0;
170 dyldInfo
->export_off
= 0;
171 dyldInfo
->export_size
= 0;
173 case LC_DYLD_EXPORTS_TRIE
: {
174 // don't put export trie into extracted dylib. lldb only uses symbol table
175 linkedit_data_command
* exportsTrie
= (linkedit_data_command
*)cmd
;
176 exportsTrieOffset
= exportsTrie
->dataoff
;
177 exportsTrieSize
= exportsTrie
->datasize
;
178 exportsTrie
->dataoff
= 0;
179 exportsTrie
->datasize
= 0;
182 symtab
= (symtab_command
*)cmd
;
185 dynamicSymTab
= (dysymtab_command
*)cmd
;
187 case LC_FUNCTION_STARTS
:
188 functionStarts
= (linkedit_data_command
*)cmd
;
190 case LC_DATA_IN_CODE
:
191 dataInCode
= (linkedit_data_command
*)cmd
;
194 case LC_LOAD_WEAK_DYLIB
:
195 case LC_REEXPORT_DYLIB
:
196 case LC_LOAD_UPWARD_DYLIB
:
198 if ( cmd
->cmd
== LC_REEXPORT_DYLIB
) {
199 reexportDeps
.insert(depIndex
);
207 mh
->removeLoadCommand(diag
, ^(const load_command
* cmd
, bool& remove
, bool &stop
) {
208 switch ( cmd
->cmd
) {
209 case LC_SEGMENT_SPLIT_INFO
:
210 // <rdar://problem/23212513> dylibs iOS 9 dyld caches have bogus LC_SEGMENT_SPLIT_INFO
220 int optimize_linkedit(std::vector
<uint8_t> &new_linkedit_data
, uint64_t textOffsetInCache
, const void* mapped_cache
)
222 // rebuild symbol table
223 if ( linkEditSegCmd
== nullptr ) {
224 fprintf(stderr
, "__LINKEDIT not found\n");
227 if ( symtab
== nullptr ) {
228 fprintf(stderr
, "LC_SYMTAB not found\n");
231 if ( dynamicSymTab
== nullptr ) {
232 fprintf(stderr
, "LC_DYSYMTAB not found\n");
236 const uint64_t newFunctionStartsOffset
= new_linkedit_data
.size();
237 uint32_t functionStartsSize
= 0;
238 if ( functionStarts
!= NULL
) {
239 // copy function starts from original cache file to new mapped dylib file
240 functionStartsSize
= functionStarts
->datasize
;
241 new_linkedit_data
.insert(new_linkedit_data
.end(),
242 (char*)mapped_cache
+ functionStarts
->dataoff
,
243 (char*)mapped_cache
+ functionStarts
->dataoff
+ functionStartsSize
);
247 while ((linkEditSegCmd
->fileoff() + new_linkedit_data
.size()) % sizeof(pint_t
))
248 new_linkedit_data
.push_back(0);
250 const uint64_t newDataInCodeOffset
= new_linkedit_data
.size();
251 uint32_t dataInCodeSize
= 0;
252 if ( dataInCode
!= NULL
) {
253 // copy data-in-code info from original cache file to new mapped dylib file
254 dataInCodeSize
= dataInCode
->datasize
;
255 new_linkedit_data
.insert(new_linkedit_data
.end(),
256 (char*)mapped_cache
+ dataInCode
->dataoff
,
257 (char*)mapped_cache
+ dataInCode
->dataoff
+ dataInCodeSize
);
260 std::vector
<ExportInfoTrie::Entry
> exports
;
261 if ( exportsTrieSize
!= 0 ) {
262 const uint8_t* exportsStart
= ((uint8_t*)mapped_cache
) + exportsTrieOffset
;
263 const uint8_t* exportsEnd
= &exportsStart
[exportsTrieSize
];
264 ExportInfoTrie::parseTrie(exportsStart
, exportsEnd
, exports
);
265 exports
.erase(std::remove_if(exports
.begin(), exports
.end(), NotReExportSymbol(reexportDeps
)), exports
.end());
268 const DyldSharedCache
* cache
= (DyldSharedCache
*)mapped_cache
;
269 macho_nlist
<P
>* allLocalNlists
= (macho_nlist
<P
>*)cache
->getLocalNlistEntries();
270 __block macho_nlist
<P
>* localNlists
= nullptr;
271 __block
uint32_t localNlistCount
= 0;
272 cache
->forEachLocalSymbolEntry(^(uint32_t dylibOffset
, uint32_t nlistStartIndex
, uint32_t nlistCount
, bool& stop
){
273 if (dylibOffset
== textOffsetInCache
) {
274 localNlists
= &allLocalNlists
[nlistStartIndex
];
275 localNlistCount
= nlistCount
;
279 // compute number of symbols in new symbol table
280 const macho_nlist
<P
>* mergedSymTabStart
= (macho_nlist
<P
>*)(((uint8_t*)mapped_cache
) + symtab
->symoff
);
281 const macho_nlist
<P
>* const mergedSymTabend
= &mergedSymTabStart
[symtab
->nsyms
];
282 uint32_t newSymCount
= symtab
->nsyms
;
283 if ( localNlistCount
!= 0 ) {
284 // if we are recombining with unmapped locals, recompute new total size
285 newSymCount
= localNlistCount
+ dynamicSymTab
->nextdefsym
+ dynamicSymTab
->nundefsym
;
288 // add room for N_INDR symbols for re-exported symbols
289 newSymCount
+= exports
.size();
291 // copy symbol entries and strings from original cache file to new mapped dylib file
292 const char* mergedStringPoolStart
= (char*)mapped_cache
+ symtab
->stroff
;
293 const char* mergedStringPoolEnd
= &mergedStringPoolStart
[symtab
->strsize
];
295 // First count how many entries we need
296 std::vector
<macho_nlist
<P
>> newSymTab
;
297 newSymTab
.reserve(newSymCount
);
298 std::vector
<char> newSymNames
;
300 // first pool entry is always empty string
301 newSymNames
.push_back('\0');
303 // local symbols are first in dylibs, if this cache has unmapped locals, insert them all first
304 uint32_t undefSymbolShift
= 0;
305 if ( localNlistCount
!= 0 ) {
306 const char* localStrings
= cache
->getLocalStrings();
307 undefSymbolShift
= localNlistCount
- dynamicSymTab
->nlocalsym
;
308 // update load command to reflect new count of locals
309 dynamicSymTab
->ilocalsym
= (uint32_t)newSymTab
.size();
310 dynamicSymTab
->nlocalsym
= localNlistCount
;
311 // copy local symbols
312 for (uint32_t i
=0; i
< localNlistCount
; ++i
) {
313 const char* localName
= &localStrings
[localNlists
[i
].n_strx()];
314 if ( localName
> localStrings
+ cache
->getLocalStringsSize() )
315 localName
= "<corrupt local symbol name>";
316 macho_nlist
<P
> t
= localNlists
[i
];
317 t
.set_n_strx((uint32_t)newSymNames
.size());
318 newSymNames
.insert(newSymNames
.end(),
320 localName
+ (strlen(localName
) + 1));
321 newSymTab
.push_back(t
);
323 // now start copying symbol table from start of externs instead of start of locals
324 mergedSymTabStart
= &mergedSymTabStart
[dynamicSymTab
->iextdefsym
];
326 // copy full symbol table from cache (skipping locals if they where elsewhere)
327 for (const macho_nlist
<P
>* s
= mergedSymTabStart
; s
!= mergedSymTabend
; ++s
) {
328 macho_nlist
<P
> t
= *s
;
329 t
.set_n_strx((uint32_t)newSymNames
.size());
330 const char* symName
= &mergedStringPoolStart
[s
->n_strx()];
331 if ( symName
> mergedStringPoolEnd
)
332 symName
= "<corrupt symbol name>";
333 newSymNames
.insert(newSymNames
.end(),
335 symName
+ (strlen(symName
) + 1));
336 newSymTab
.push_back(t
);
338 // <rdar://problem/16529213> recreate N_INDR symbols in extracted dylibs for debugger
339 for (std::vector
<ExportInfoTrie::Entry
>::iterator it
= exports
.begin(); it
!= exports
.end(); ++it
) {
341 memset(&t
, 0, sizeof(t
));
342 t
.set_n_strx((uint32_t)newSymNames
.size());
343 t
.set_n_type(N_INDR
| N_EXT
);
346 newSymNames
.insert(newSymNames
.end(),
348 it
->name
.c_str() + (it
->name
.size() + 1));
349 const char* importName
= it
->info
.importName
.c_str();
350 if ( *importName
== '\0' )
351 importName
= it
->name
.c_str();
352 t
.set_n_value(newSymNames
.size());
353 newSymNames
.insert(newSymNames
.end(),
355 importName
+ (strlen(importName
) + 1));
356 newSymTab
.push_back(t
);
359 if ( newSymCount
!= newSymTab
.size() ) {
360 fprintf(stderr
, "symbol count miscalculation\n");
364 //const uint64_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
365 //macho_nlist<P>* const newSymTabStart = (macho_nlist<P>*)(((uint8_t*)mh) + newSymTabOffset);
366 //char* const newStringPoolStart = (char*)mh + newStringPoolOffset;
369 while ((linkEditSegCmd
->fileoff() + new_linkedit_data
.size()) % sizeof(pint_t
))
370 new_linkedit_data
.push_back(0);
372 const uint64_t newSymTabOffset
= new_linkedit_data
.size();
375 for (macho_nlist
<P
>& sym
: newSymTab
) {
376 uint8_t symData
[sizeof(macho_nlist
<P
>)];
377 memcpy(&symData
, &sym
, sizeof(sym
));
378 new_linkedit_data
.insert(new_linkedit_data
.end(), &symData
[0], &symData
[sizeof(macho_nlist
<P
>)]);
381 const uint64_t newIndSymTabOffset
= new_linkedit_data
.size();
383 // Copy (and adjust) indirect symbol table
384 const uint32_t* mergedIndSymTab
= (uint32_t*)((char*)mapped_cache
+ dynamicSymTab
->indirectsymoff
);
385 new_linkedit_data
.insert(new_linkedit_data
.end(),
386 (char*)mergedIndSymTab
,
387 (char*)(mergedIndSymTab
+ dynamicSymTab
->nindirectsyms
));
388 if ( undefSymbolShift
!= 0 ) {
389 uint32_t* newIndSymTab
= (uint32_t*)&new_linkedit_data
[newIndSymTabOffset
];
390 for (int i
=0; i
< dynamicSymTab
->nindirectsyms
; ++i
) {
391 newIndSymTab
[i
] += undefSymbolShift
;
394 const uint64_t newStringPoolOffset
= new_linkedit_data
.size();
396 // pointer align string pool size
397 while (newSymNames
.size() % sizeof(pint_t
))
398 newSymNames
.push_back('\0');
400 new_linkedit_data
.insert(new_linkedit_data
.end(), newSymNames
.begin(), newSymNames
.end());
402 // update load commands
403 if ( functionStarts
!= NULL
) {
404 functionStarts
->dataoff
= (uint32_t)(newFunctionStartsOffset
+ linkEditSegCmd
->fileoff());
405 functionStarts
->datasize
= functionStartsSize
;
407 if ( dataInCode
!= NULL
) {
408 dataInCode
->dataoff
= (uint32_t)(newDataInCodeOffset
+ linkEditSegCmd
->fileoff());
409 dataInCode
->datasize
= dataInCodeSize
;
412 symtab
->nsyms
= newSymCount
;
413 symtab
->symoff
= (uint32_t)(newSymTabOffset
+ linkEditSegCmd
->fileoff());
414 symtab
->stroff
= (uint32_t)(newStringPoolOffset
+ linkEditSegCmd
->fileoff());
415 symtab
->strsize
= (uint32_t)newSymNames
.size();
416 dynamicSymTab
->extreloff
= 0;
417 dynamicSymTab
->nextrel
= 0;
418 dynamicSymTab
->locreloff
= 0;
419 dynamicSymTab
->nlocrel
= 0;
420 dynamicSymTab
->indirectsymoff
= (uint32_t)(newIndSymTabOffset
+ linkEditSegCmd
->fileoff());
421 linkEditSegCmd
->set_filesize(symtab
->stroff
+ symtab
->strsize
- linkEditSegCmd
->fileoff());
422 linkEditSegCmd
->set_vmsize((linkEditSegCmd
->filesize() + 4095) & (-4096));
429 static void make_dirs(const char* file_path
)
431 //printf("make_dirs(%s)\n", file_path);
432 char dirs
[strlen(file_path
)+1];
433 strcpy(dirs
, file_path
);
434 char* lastSlash
= strrchr(dirs
, '/');
435 if ( lastSlash
== NULL
)
438 struct stat stat_buf
;
439 if ( stat(dirs
, &stat_buf
) != 0 ) {
440 char* afterSlash
= &dirs
[1];
442 while ( (slash
= strchr(afterSlash
, '/')) != NULL
) {
444 ::mkdir(dirs
, S_IRWXU
| S_IRGRP
|S_IXGRP
| S_IROTH
|S_IXOTH
);
445 //printf("mkdir(%s)\n", dirs);
447 afterSlash
= slash
+1;
454 template <typename A
>
455 void dylib_maker(const void* mapped_cache
, std::vector
<uint8_t> &dylib_data
, const std::vector
<seg_info
>& segments
) {
456 typedef typename
A::P P
;
458 size_t additionalSize
= 0;
459 for(std::vector
<seg_info
>::const_iterator it
=segments
.begin(); it
!= segments
.end(); ++it
) {
460 if ( strcmp(it
->segName
, "__LINKEDIT") != 0 )
461 additionalSize
+= it
->sizem
;
464 std::vector
<uint8_t> new_dylib_data
;
465 new_dylib_data
.reserve(additionalSize
);
467 // Write regular segments into the buffer
468 uint64_t textOffsetInCache
= 0;
469 for( std::vector
<seg_info
>::const_iterator it
=segments
.begin(); it
!= segments
.end(); ++it
) {
471 if(strcmp(it
->segName
, "__TEXT") == 0 )
472 textOffsetInCache
= it
->offset
;
474 //printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem);
475 // Copy all but the __LINKEDIT. It will be copied later during the optimizer in to a temporary buffer but it would
476 // not be efficient to copy it all now for each dylib.
477 if (strcmp(it
->segName
, "__LINKEDIT") == 0 )
479 std::copy(((uint8_t*)mapped_cache
)+it
->offset
, ((uint8_t*)mapped_cache
)+it
->offset
+it
->sizem
, std::back_inserter(new_dylib_data
));
483 std::vector
<uint8_t> new_linkedit_data
;
484 new_linkedit_data
.reserve(1 << 20);
486 LinkeditOptimizer
<A
> linkeditOptimizer
;
487 dyld3::MachOAnalyzer
* mh
= (dyld3::MachOAnalyzer
*)&new_dylib_data
.front();
488 linkeditOptimizer
.optimize_loadcommands(mh
);
489 linkeditOptimizer
.optimize_linkedit(new_linkedit_data
, textOffsetInCache
, mapped_cache
);
491 new_dylib_data
.insert(new_dylib_data
.end(), new_linkedit_data
.begin(), new_linkedit_data
.end());
494 while (new_dylib_data
.size() % 4096)
495 new_dylib_data
.push_back(0);
497 dylib_data
.insert(dylib_data
.end(), new_dylib_data
.begin(), new_dylib_data
.end());
500 typedef __typeof(dylib_maker
<x86
>) dylib_maker_func
;
501 typedef void (^progress_block
)(unsigned current
, unsigned total
);
503 class SharedCacheExtractor
;
504 struct SharedCacheDylibExtractor
{
505 SharedCacheDylibExtractor(const char* name
, std::vector
<seg_info
> segInfo
)
506 : name(name
), segInfo(segInfo
) { }
508 void extractCache(SharedCacheExtractor
& context
);
511 const std::vector
<seg_info
> segInfo
;
515 struct SharedCacheExtractor
{
516 SharedCacheExtractor(const NameToSegments
& map
,
517 const char* extraction_root_path
,
518 dylib_maker_func
* dylib_create_func
,
520 progress_block progress
)
521 : map(map
), extraction_root_path(extraction_root_path
),
522 dylib_create_func(dylib_create_func
), mapped_cache(mapped_cache
),
525 extractors
.reserve(map
.size());
527 extractors
.emplace_back(it
.first
, it
.second
);
529 // Limit the number of open files. 16 seems to give better performance than higher numbers.
530 sema
= dispatch_semaphore_create(16);
534 static void extractCache(void *ctx
, size_t i
);
536 const NameToSegments
& map
;
537 std::vector
<SharedCacheDylibExtractor
> extractors
;
538 dispatch_semaphore_t sema
;
539 const char* extraction_root_path
;
540 dylib_maker_func
* dylib_create_func
;
542 progress_block progress
;
543 std::atomic_int count
= { 0 };
546 int SharedCacheExtractor::extractCaches() {
547 dispatch_queue_t process_queue
= dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW
, 0);
548 dispatch_apply_f(map
.size(), process_queue
,
552 for (const SharedCacheDylibExtractor
& extractor
: extractors
) {
553 if (extractor
.result
!= 0) {
554 result
= extractor
.result
;
561 void SharedCacheExtractor::extractCache(void *ctx
, size_t i
) {
562 SharedCacheExtractor
& context
= *(SharedCacheExtractor
*)ctx
;
563 dispatch_semaphore_wait(context
.sema
, DISPATCH_TIME_FOREVER
);
564 context
.extractors
[i
].extractCache(context
);
565 dispatch_semaphore_signal(context
.sema
);
568 void SharedCacheDylibExtractor::extractCache(SharedCacheExtractor
&context
) {
570 char dylib_path
[PATH_MAX
];
571 strcpy(dylib_path
, context
.extraction_root_path
);
572 strcat(dylib_path
, "/");
573 strcat(dylib_path
, name
);
575 //printf("%s with %lu segments\n", dylib_path, it->second.size());
576 // make sure all directories in this path exist
577 make_dirs(dylib_path
);
579 // open file, create if does not already exist
580 int fd
= ::open(dylib_path
, O_CREAT
| O_TRUNC
| O_EXLOCK
| O_RDWR
, 0644);
582 fprintf(stderr
, "can't open or create dylib file %s, errnor=%d\n", dylib_path
, errno
);
587 std::vector
<uint8_t> vec
;
588 context
.dylib_create_func(context
.mapped_cache
, vec
, segInfo
);
589 context
.progress(context
.count
++, (unsigned)context
.map
.size());
592 if( write(fd
, &vec
.front(), vec
.size()) == -1) {
593 fprintf(stderr
, "error writing, errnor=%d\n", errno
);
600 static int sharedCacheIsValid(const void* mapped_cache
, uint64_t size
) {
601 // First check that the size is good.
602 // Note the shared cache may not have a codeSignatureSize value set so we need to first make
603 // sure we have space for the CS_SuperBlob, then later crack that to check for the size of the rest.
604 const DyldSharedCache
* dyldSharedCache
= (DyldSharedCache
*)mapped_cache
;
605 uint64_t requiredSizeForCSSuperBlob
= dyldSharedCache
->header
.codeSignatureOffset
+ sizeof(CS_SuperBlob
);
606 const dyld_cache_mapping_info
* mappings
= (dyld_cache_mapping_info
*)((uint8_t*)mapped_cache
+ dyldSharedCache
->header
.mappingOffset
);
607 if ( requiredSizeForCSSuperBlob
> size
) {
608 fprintf(stderr
, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size
, requiredSizeForCSSuperBlob
);
612 // Now see if the code signatures are valid as that tells us the pages aren't corrupt.
613 // First find all of the regions of the shared cache we computed cd hashes
614 std::vector
<std::pair
<uint64_t, uint64_t>> sharedCacheRegions
;
615 for (uint32_t i
= 0; i
!= dyldSharedCache
->header
.mappingCount
; ++i
) {
616 sharedCacheRegions
.emplace_back(std::make_pair(mappings
[i
].fileOffset
, mappings
[i
].fileOffset
+ mappings
[i
].size
));
618 if (dyldSharedCache
->header
.localSymbolsSize
)
619 sharedCacheRegions
.emplace_back(std::make_pair(dyldSharedCache
->header
.localSymbolsOffset
, dyldSharedCache
->header
.localSymbolsOffset
+ dyldSharedCache
->header
.localSymbolsSize
));
620 size_t inBbufferSize
= 0;
621 for (auto& sharedCacheRegion
: sharedCacheRegions
)
622 inBbufferSize
+= (sharedCacheRegion
.second
- sharedCacheRegion
.first
);
624 // Now take the cd hash from the cache itself and validate the regions we found.
625 uint8_t* codeSignatureRegion
= (uint8_t*)mapped_cache
+ dyldSharedCache
->header
.codeSignatureOffset
;
626 CS_SuperBlob
* sb
= reinterpret_cast<CS_SuperBlob
*>(codeSignatureRegion
);
627 if (sb
->magic
!= htonl(CSMAGIC_EMBEDDED_SIGNATURE
)) {
628 fprintf(stderr
, "Error: dyld shared cache code signature magic is incorrect.\n");
632 size_t sbSize
= ntohl(sb
->length
);
633 uint64_t requiredSizeForCS
= dyldSharedCache
->header
.codeSignatureOffset
+ sbSize
;
634 if ( requiredSizeForCS
> size
) {
635 fprintf(stderr
, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size
, requiredSizeForCS
);
639 // Find the offset to the code directory.
640 CS_CodeDirectory
* cd
= nullptr;
641 for (unsigned i
=0; i
!= sb
->count
; ++i
) {
642 if (ntohl(sb
->index
[i
].type
) == CSSLOT_CODEDIRECTORY
) {
643 cd
= (CS_CodeDirectory
*)(codeSignatureRegion
+ ntohl(sb
->index
[i
].offset
));
649 fprintf(stderr
, "Error: dyld shared cache code signature directory is missing.\n");
653 if ( (uint8_t*)cd
> (codeSignatureRegion
+ sbSize
) ) {
654 fprintf(stderr
, "Error: dyld shared cache code signature directory is out of bounds.\n");
658 if ( cd
->magic
!= htonl(CSMAGIC_CODEDIRECTORY
) ) {
659 fprintf(stderr
, "Error: dyld shared cache code signature directory magic is incorrect.\n");
663 uint32_t pageSize
= 1 << cd
->pageSize
;
664 uint32_t slotCountFromRegions
= (uint32_t)((inBbufferSize
+ pageSize
- 1) / pageSize
);
665 if ( ntohl(cd
->nCodeSlots
) < slotCountFromRegions
) {
666 fprintf(stderr
, "Error: dyld shared cache code signature directory num slots is incorrect.\n");
670 uint32_t dscDigestFormat
= kCCDigestNone
;
671 switch (cd
->hashType
) {
672 case CS_HASHTYPE_SHA1
:
673 #pragma clang diagnostic push
674 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
675 dscDigestFormat
= kCCDigestSHA1
;
676 #pragma clang diagnostic pop
678 case CS_HASHTYPE_SHA256
:
679 dscDigestFormat
= kCCDigestSHA256
;
685 if (dscDigestFormat
!= kCCDigestNone
) {
686 const uint64_t csPageSize
= 1 << cd
->pageSize
;
687 size_t hashOffset
= ntohl(cd
->hashOffset
);
688 uint8_t* hashSlot
= (uint8_t*)cd
+ hashOffset
;
689 uint8_t cdHashBuffer
[cd
->hashSize
];
691 // Skip local symbols for now as those aren't being codesign correctly right now.
692 size_t inBbufferSize
= 0;
693 for (auto& sharedCacheRegion
: sharedCacheRegions
) {
694 if (sharedCacheRegion
.first
== dyldSharedCache
->header
.localSymbolsOffset
)
696 inBbufferSize
+= (sharedCacheRegion
.second
- sharedCacheRegion
.first
);
698 uint32_t slotCountToProcess
= (uint32_t)((inBbufferSize
+ pageSize
- 1) / pageSize
);
700 for (unsigned i
= 0; i
!= slotCountToProcess
; ++i
) {
701 // Skip data pages as those may have been slid by ASLR in the extracted file
702 uint64_t fileOffset
= i
* csPageSize
;
703 bool isDataPage
= false;
704 for (unsigned mappingIndex
= 1; mappingIndex
!= (dyldSharedCache
->header
.mappingCount
- 1); ++mappingIndex
) {
705 if ( (fileOffset
>= mappings
[mappingIndex
].fileOffset
) && (fileOffset
< (mappings
[mappingIndex
].fileOffset
+ mappings
[mappingIndex
].size
)) ) {
713 CCDigest(dscDigestFormat
, (uint8_t*)mapped_cache
+ fileOffset
, (size_t)csPageSize
, cdHashBuffer
);
714 uint8_t* cacheCdHashBuffer
= hashSlot
+ (i
* cd
->hashSize
);
715 if (memcmp(cdHashBuffer
, cacheCdHashBuffer
, cd
->hashSize
) != 0) {
716 fprintf(stderr
, "Error: dyld shared cache code signature for page %d is incorrect.\n", i
);
724 int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path
, const char* extraction_root_path
,
725 progress_block progress
)
728 if (stat(shared_cache_file_path
, &statbuf
)) {
729 fprintf(stderr
, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path
);
733 int cache_fd
= open(shared_cache_file_path
, O_RDONLY
);
735 fprintf(stderr
, "Error: failed to open shared cache file at %s\n", shared_cache_file_path
);
739 void* mapped_cache
= mmap(NULL
, (size_t)statbuf
.st_size
, PROT_READ
, MAP_PRIVATE
, cache_fd
, 0);
740 if (mapped_cache
== MAP_FAILED
) {
741 fprintf(stderr
, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path
, errno
);
747 // instantiate arch specific dylib maker
748 dylib_maker_func
* dylib_create_func
= nullptr;
749 if ( strcmp((char*)mapped_cache
, "dyld_v1 i386") == 0 )
750 dylib_create_func
= dylib_maker
<x86
>;
751 else if ( strcmp((char*)mapped_cache
, "dyld_v1 x86_64") == 0 )
752 dylib_create_func
= dylib_maker
<x86_64
>;
753 else if ( strcmp((char*)mapped_cache
, "dyld_v1 x86_64h") == 0 )
754 dylib_create_func
= dylib_maker
<x86_64
>;
755 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv5") == 0 )
756 dylib_create_func
= dylib_maker
<arm
>;
757 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv6") == 0 )
758 dylib_create_func
= dylib_maker
<arm
>;
759 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv7") == 0 )
760 dylib_create_func
= dylib_maker
<arm
>;
761 else if ( strncmp((char*)mapped_cache
, "dyld_v1 armv7", 14) == 0 )
762 dylib_create_func
= dylib_maker
<arm
>;
763 else if ( strcmp((char*)mapped_cache
, "dyld_v1 arm64") == 0 )
764 dylib_create_func
= dylib_maker
<arm64
>;
765 #if SUPPORT_ARCH_arm64e
766 else if ( strcmp((char*)mapped_cache
, "dyld_v1 arm64e") == 0 )
767 dylib_create_func
= dylib_maker
<arm64
>;
769 #if SUPPORT_ARCH_arm64_32
770 else if ( strcmp((char*)mapped_cache
, "dyld_v1arm64_32") == 0 )
771 dylib_create_func
= dylib_maker
<arm64_32
>;
774 fprintf(stderr
, "Error: unrecognized dyld shared cache magic.\n");
775 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
779 // Verify that the cache isn't corrupt.
780 if (int result
= sharedCacheIsValid(mapped_cache
, (uint64_t)statbuf
.st_size
)) {
781 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
785 // iterate through all images in cache and build map of dylibs and segments
786 __block NameToSegments map
;
789 result
= dyld_shared_cache_iterate(mapped_cache
, (uint32_t)statbuf
.st_size
, ^(const dyld_shared_cache_dylib_info
* dylibInfo
, const dyld_shared_cache_segment_info
* segInfo
) {
790 map
[dylibInfo
->path
].push_back(seg_info(segInfo
->name
, segInfo
->fileOffset
, segInfo
->fileSize
));
794 fprintf(stderr
, "Error: dyld_shared_cache_iterate_segments_with_slide failed.\n");
795 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
799 // for each dylib instantiate a dylib file
800 SharedCacheExtractor
extractor(map
, extraction_root_path
, dylib_create_func
, mapped_cache
, progress
);
801 result
= extractor
.extractCaches();
803 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
809 int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path
, const char* extraction_root_path
)
811 return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path
, extraction_root_path
,
812 ^(unsigned , unsigned) {} );
823 typedef int (*extractor_proc
)(const char* shared_cache_file_path
, const char* extraction_root_path
,
824 void (^progress
)(unsigned current
, unsigned total
));
826 int main(int argc
, const char* argv
[])
829 fprintf(stderr
, "usage: dsc_extractor <path-to-cache-file> <path-to-device-dir>\n");
833 //void* handle = dlopen("/Volumes/my/src/dyld/build/Debug/dsc_extractor.bundle", RTLD_LAZY);
834 void* handle
= dlopen("/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/usr/lib/dsc_extractor.bundle", RTLD_LAZY
);
835 if ( handle
== NULL
) {
836 fprintf(stderr
, "dsc_extractor.bundle could not be loaded\n");
840 extractor_proc proc
= (extractor_proc
)dlsym(handle
, "dyld_shared_cache_extract_dylibs_progress");
841 if ( proc
== NULL
) {
842 fprintf(stderr
, "dsc_extractor.bundle did not have dyld_shared_cache_extract_dylibs_progress symbol\n");
846 int result
= (*proc
)(argv
[1], argv
[2], ^(unsigned c
, unsigned total
) { printf("%d/%d\n", c
, total
); } );
847 fprintf(stderr
, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result
);