1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2011 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
34 #include <sys/syslimits.h>
35 #include <libkern/OSByteOrder.h>
36 #include <mach-o/arch.h>
37 #include <mach-o/loader.h>
38 #include <Availability.h>
40 #include "CodeSigningTypes.h"
41 #include <CommonCrypto/CommonHMAC.h>
42 #include <CommonCrypto/CommonDigest.h>
43 #include <CommonCrypto/CommonDigestSPI.h>
46 #include "Architectures.hpp"
47 #include "MachOFileAbstraction.hpp"
48 #include "CacheFileAbstraction.hpp"
50 #include "dsc_iterator.h"
51 #include "dsc_extractor.h"
52 #include "MachOTrie.hpp"
53 #include "SupportedArchs.h"
54 #include "DyldSharedCache.h"
59 #include <unordered_map>
61 #include <dispatch/dispatch.h>
65 seg_info(const char* n
, uint64_t o
, uint64_t s
)
66 : segName(n
), offset(o
), sizem(s
) { }
74 size_t operator()(const char* __s
) const {
83 bool operator()(const char* left
, const char* right
) const { return (strcmp(left
, right
) == 0); }
85 typedef std::unordered_map
<const char*, std::vector
<seg_info
>, CStringHash
, CStringEquals
> NameToSegments
;
87 // Filter to find individual symbol re-exports in trie
88 class NotReExportSymbol
{
90 NotReExportSymbol(const std::set
<int> &rd
) :_reexportDeps(rd
) {}
91 bool operator()(const mach_o::trie::Entry
&entry
) const {
92 bool result
= isSymbolReExport(entry
);
94 // <rdar://problem/17671438> Xcode 6 leaks in dyld_shared_cache_extract_dylibs
95 ::free((void*)entry
.name
);
96 const_cast<mach_o::trie::Entry
*>(&entry
)->name
= NULL
;
101 bool isSymbolReExport(const mach_o::trie::Entry
&entry
) const {
102 if ( (entry
.flags
& EXPORT_SYMBOL_FLAGS_KIND_MASK
) != EXPORT_SYMBOL_FLAGS_KIND_REGULAR
)
104 if ( (entry
.flags
& EXPORT_SYMBOL_FLAGS_REEXPORT
) == 0 )
106 // If the symbol comes from a dylib that is re-exported, this is not an individual symbol re-export
107 if ( _reexportDeps
.count((int)entry
.other
) != 0 )
111 const std::set
<int> &_reexportDeps
;
114 template <typename P
>
115 struct LoadCommandInfo
{
118 template <typename A
>
119 class LinkeditOptimizer
{
120 typedef typename
A::P P
;
121 typedef typename
A::P::E E
;
122 typedef typename
A::P::uint_t pint_t
;
125 macho_segment_command
<P
>* linkEditSegCmd
= NULL
;
126 macho_symtab_command
<P
>* symtab
= NULL
;
127 macho_dysymtab_command
<P
>* dynamicSymTab
= NULL
;
128 macho_linkedit_data_command
<P
>* functionStarts
= NULL
;
129 macho_linkedit_data_command
<P
>* dataInCode
= NULL
;
130 uint32_t exportsTrieOffset
= 0;
131 uint32_t exportsTrieSize
= 0;
132 std::set
<int> reexportDeps
;
136 void optimize_loadcommands(macho_header
<typename
A::P
>* mh
)
138 typedef typename
A::P P
;
139 typedef typename
A::P::E E
;
140 typedef typename
A::P::uint_t pint_t
;
142 // update header flags
143 mh
->set_flags(mh
->flags() & 0x7FFFFFFF); // remove in-cache bit
145 // update load commands
146 uint64_t cumulativeFileSize
= 0;
147 const unsigned origLoadCommandsSize
= mh
->sizeofcmds();
148 unsigned bytesRemaining
= origLoadCommandsSize
;
149 unsigned removedCount
= 0;
150 const macho_load_command
<P
>* const cmds
= (macho_load_command
<P
>*)((uint8_t*)mh
+ sizeof(macho_header
<P
>));
151 const uint32_t cmdCount
= mh
->ncmds();
152 const macho_load_command
<P
>* cmd
= cmds
;
154 for (uint32_t i
= 0; i
< cmdCount
; ++i
) {
156 switch ( cmd
->cmd() ) {
157 case macho_segment_command
<P
>::CMD
:
159 // update segment/section file offsets
160 macho_segment_command
<P
>* segCmd
= (macho_segment_command
<P
>*)cmd
;
161 segCmd
->set_fileoff(cumulativeFileSize
);
162 segCmd
->set_filesize(segCmd
->vmsize());
163 macho_section
<P
>* const sectionsStart
= (macho_section
<P
>*)((char*)segCmd
+ sizeof(macho_segment_command
<P
>));
164 macho_section
<P
>* const sectionsEnd
= §ionsStart
[segCmd
->nsects()];
165 for(macho_section
<P
>* sect
= sectionsStart
; sect
< sectionsEnd
; ++sect
) {
166 if ( sect
->offset() != 0 )
167 sect
->set_offset((uint32_t)(cumulativeFileSize
+sect
->addr()-segCmd
->vmaddr()));
169 if ( strcmp(segCmd
->segname(), "__LINKEDIT") == 0 ) {
170 linkEditSegCmd
= segCmd
;
172 cumulativeFileSize
+= segCmd
->filesize();
175 case LC_DYLD_INFO_ONLY
:
177 // zero out all dyld info
178 macho_dyld_info_command
<P
>* dyldInfo
= (macho_dyld_info_command
<P
>*)cmd
;
179 exportsTrieOffset
= dyldInfo
->export_off();
180 exportsTrieSize
= dyldInfo
->export_size();
181 dyldInfo
->set_rebase_off(0);
182 dyldInfo
->set_rebase_size(0);
183 dyldInfo
->set_bind_off(0);
184 dyldInfo
->set_bind_size(0);
185 dyldInfo
->set_weak_bind_off(0);
186 dyldInfo
->set_weak_bind_size(0);
187 dyldInfo
->set_lazy_bind_off(0);
188 dyldInfo
->set_lazy_bind_size(0);
189 dyldInfo
->set_export_off(0);
190 dyldInfo
->set_export_size(0);
194 symtab
= (macho_symtab_command
<P
>*)cmd
;
197 dynamicSymTab
= (macho_dysymtab_command
<P
>*)cmd
;
199 case LC_FUNCTION_STARTS
:
200 functionStarts
= (macho_linkedit_data_command
<P
>*)cmd
;
202 case LC_DATA_IN_CODE
:
203 dataInCode
= (macho_linkedit_data_command
<P
>*)cmd
;
206 case LC_LOAD_WEAK_DYLIB
:
207 case LC_REEXPORT_DYLIB
:
208 case LC_LOAD_UPWARD_DYLIB
:
210 if ( cmd
->cmd() == LC_REEXPORT_DYLIB
) {
211 reexportDeps
.insert(depIndex
);
214 case LC_SEGMENT_SPLIT_INFO
:
215 // <rdar://problem/23212513> dylibs iOS 9 dyld caches have bogus LC_SEGMENT_SPLIT_INFO
219 uint32_t cmdSize
= cmd
->cmdsize();
220 macho_load_command
<P
>* nextCmd
= (macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmdSize
);
222 ::memmove((void*)cmd
, (void*)nextCmd
, bytesRemaining
);
226 bytesRemaining
-= cmdSize
;
230 // zero out stuff removed
231 ::bzero((void*)cmd
, bytesRemaining
);
233 mh
->set_ncmds(cmdCount
- removedCount
);
234 mh
->set_sizeofcmds(origLoadCommandsSize
- bytesRemaining
);
237 int optimize_linkedit(std::vector
<uint8_t> &new_linkedit_data
, uint64_t textOffsetInCache
, const void* mapped_cache
)
239 typedef typename
A::P P
;
240 typedef typename
A::P::E E
;
241 typedef typename
A::P::uint_t pint_t
;
243 // rebuild symbol table
244 if ( linkEditSegCmd
== NULL
) {
245 fprintf(stderr
, "__LINKEDIT not found\n");
248 if ( symtab
== NULL
) {
249 fprintf(stderr
, "LC_SYMTAB not found\n");
252 if ( dynamicSymTab
== NULL
) {
253 fprintf(stderr
, "LC_DYSYMTAB not found\n");
257 const uint64_t newFunctionStartsOffset
= new_linkedit_data
.size();
258 uint32_t functionStartsSize
= 0;
259 if ( functionStarts
!= NULL
) {
260 // copy function starts from original cache file to new mapped dylib file
261 functionStartsSize
= functionStarts
->datasize();
262 new_linkedit_data
.insert(new_linkedit_data
.end(),
263 (char*)mapped_cache
+ functionStarts
->dataoff(),
264 (char*)mapped_cache
+ functionStarts
->dataoff() + functionStartsSize
);
268 while ((linkEditSegCmd
->fileoff() + new_linkedit_data
.size()) % sizeof(pint_t
))
269 new_linkedit_data
.push_back(0);
271 const uint64_t newDataInCodeOffset
= new_linkedit_data
.size();
272 uint32_t dataInCodeSize
= 0;
273 if ( dataInCode
!= NULL
) {
274 // copy data-in-code info from original cache file to new mapped dylib file
275 dataInCodeSize
= dataInCode
->datasize();
276 new_linkedit_data
.insert(new_linkedit_data
.end(),
277 (char*)mapped_cache
+ dataInCode
->dataoff(),
278 (char*)mapped_cache
+ dataInCode
->dataoff() + dataInCodeSize
);
281 std::vector
<mach_o::trie::Entry
> exports
;
282 if ( exportsTrieSize
!= 0 ) {
283 const uint8_t* exportsStart
= ((uint8_t*)mapped_cache
) + exportsTrieOffset
;
284 const uint8_t* exportsEnd
= &exportsStart
[exportsTrieSize
];
285 mach_o::trie::parseTrie(exportsStart
, exportsEnd
, exports
);
286 exports
.erase(std::remove_if(exports
.begin(), exports
.end(), NotReExportSymbol(reexportDeps
)), exports
.end());
289 // look for local symbol info in unmapped part of shared cache
290 dyldCacheHeader
<E
>* header
= (dyldCacheHeader
<E
>*)mapped_cache
;
291 macho_nlist
<P
>* localNlists
= NULL
;
292 uint32_t localNlistCount
= 0;
293 const char* localStrings
= NULL
;
294 const char* localStringsEnd
= NULL
;
295 if ( header
->mappingOffset() > offsetof(dyld_cache_header
,localSymbolsSize
) ) {
296 dyldCacheLocalSymbolsInfo
<E
>* localInfo
= (dyldCacheLocalSymbolsInfo
<E
>*)(((uint8_t*)mapped_cache
) + header
->localSymbolsOffset());
297 dyldCacheLocalSymbolEntry
<E
>* entries
= (dyldCacheLocalSymbolEntry
<E
>*)(((uint8_t*)mapped_cache
) + header
->localSymbolsOffset() + localInfo
->entriesOffset());
298 macho_nlist
<P
>* allLocalNlists
= (macho_nlist
<P
>*)(((uint8_t*)localInfo
) + localInfo
->nlistOffset());
299 const uint32_t entriesCount
= localInfo
->entriesCount();
300 for (uint32_t i
=0; i
< entriesCount
; ++i
) {
301 if ( entries
[i
].dylibOffset() == textOffsetInCache
) {
302 uint32_t localNlistStart
= entries
[i
].nlistStartIndex();
303 localNlistCount
= entries
[i
].nlistCount();
304 localNlists
= &allLocalNlists
[localNlistStart
];
305 localStrings
= ((char*)localInfo
) + localInfo
->stringsOffset();
306 localStringsEnd
= &localStrings
[localInfo
->stringsSize()];
311 // compute number of symbols in new symbol table
312 const macho_nlist
<P
>* const mergedSymTabStart
= (macho_nlist
<P
>*)(((uint8_t*)mapped_cache
) + symtab
->symoff());
313 const macho_nlist
<P
>* const mergedSymTabend
= &mergedSymTabStart
[symtab
->nsyms()];
314 uint32_t newSymCount
= symtab
->nsyms();
315 if ( localNlists
!= NULL
) {
316 newSymCount
= localNlistCount
;
317 for (const macho_nlist
<P
>* s
= mergedSymTabStart
; s
!= mergedSymTabend
; ++s
) {
318 // skip any locals in cache
319 if ( (s
->n_type() & (N_TYPE
|N_EXT
)) == N_SECT
)
325 // add room for N_INDR symbols for re-exported symbols
326 newSymCount
+= exports
.size();
328 // copy symbol entries and strings from original cache file to new mapped dylib file
329 const char* mergedStringPoolStart
= (char*)mapped_cache
+ symtab
->stroff();
330 const char* mergedStringPoolEnd
= &mergedStringPoolStart
[symtab
->strsize()];
332 // First count how many entries we need
333 std::vector
<macho_nlist
<P
>> newSymTab
;
334 newSymTab
.reserve(newSymCount
);
335 std::vector
<char> newSymNames
;
337 // first pool entry is always empty string
338 newSymNames
.push_back('\0');
340 for (const macho_nlist
<P
>* s
= mergedSymTabStart
; s
!= mergedSymTabend
; ++s
) {
341 // if we have better local symbol info, skip any locals here
342 if ( (localNlists
!= NULL
) && ((s
->n_type() & (N_TYPE
|N_EXT
)) == N_SECT
) )
344 macho_nlist
<P
> t
= *s
;
345 t
.set_n_strx((uint32_t)newSymNames
.size());
346 const char* symName
= &mergedStringPoolStart
[s
->n_strx()];
347 if ( symName
> mergedStringPoolEnd
)
348 symName
= "<corrupt symbol name>";
349 newSymNames
.insert(newSymNames
.end(),
351 symName
+ (strlen(symName
) + 1));
352 newSymTab
.push_back(t
);
354 // <rdar://problem/16529213> recreate N_INDR symbols in extracted dylibs for debugger
355 for (std::vector
<mach_o::trie::Entry
>::iterator it
= exports
.begin(); it
!= exports
.end(); ++it
) {
357 memset(&t
, 0, sizeof(t
));
358 t
.set_n_strx((uint32_t)newSymNames
.size());
359 t
.set_n_type(N_INDR
| N_EXT
);
362 newSymNames
.insert(newSymNames
.end(),
364 it
->name
+ (strlen(it
->name
) + 1));
365 const char* importName
= it
->importName
;
366 if ( *importName
== '\0' )
367 importName
= it
->name
;
368 t
.set_n_value(newSymNames
.size());
369 newSymNames
.insert(newSymNames
.end(),
371 importName
+ (strlen(importName
) + 1));
372 newSymTab
.push_back(t
);
374 if ( localNlists
!= NULL
) {
375 // update load command to reflect new count of locals
376 dynamicSymTab
->set_ilocalsym((uint32_t)newSymTab
.size());
377 dynamicSymTab
->set_nlocalsym(localNlistCount
);
378 // copy local symbols
379 for (uint32_t i
=0; i
< localNlistCount
; ++i
) {
380 const char* localName
= &localStrings
[localNlists
[i
].n_strx()];
381 if ( localName
> localStringsEnd
)
382 localName
= "<corrupt local symbol name>";
383 macho_nlist
<P
> t
= localNlists
[i
];
384 t
.set_n_strx((uint32_t)newSymNames
.size());
385 newSymNames
.insert(newSymNames
.end(),
387 localName
+ (strlen(localName
) + 1));
388 newSymTab
.push_back(t
);
392 if ( newSymCount
!= newSymTab
.size() ) {
393 fprintf(stderr
, "symbol count miscalculation\n");
397 //const uint64_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
398 //macho_nlist<P>* const newSymTabStart = (macho_nlist<P>*)(((uint8_t*)mh) + newSymTabOffset);
399 //char* const newStringPoolStart = (char*)mh + newStringPoolOffset;
402 while ((linkEditSegCmd
->fileoff() + new_linkedit_data
.size()) % sizeof(pint_t
))
403 new_linkedit_data
.push_back(0);
405 const uint64_t newSymTabOffset
= new_linkedit_data
.size();
408 for (macho_nlist
<P
>& sym
: newSymTab
) {
409 uint8_t symData
[sizeof(macho_nlist
<P
>)];
410 memcpy(&symData
, &sym
, sizeof(sym
));
411 new_linkedit_data
.insert(new_linkedit_data
.end(), &symData
[0], &symData
[sizeof(macho_nlist
<P
>)]);
414 const uint64_t newIndSymTabOffset
= new_linkedit_data
.size();
416 // Copy indirect symbol table
417 const uint32_t* mergedIndSymTab
= (uint32_t*)((char*)mapped_cache
+ dynamicSymTab
->indirectsymoff());
418 new_linkedit_data
.insert(new_linkedit_data
.end(),
419 (char*)mergedIndSymTab
,
420 (char*)(mergedIndSymTab
+ dynamicSymTab
->nindirectsyms()));
422 const uint64_t newStringPoolOffset
= new_linkedit_data
.size();
424 // pointer align string pool size
425 while (newSymNames
.size() % sizeof(pint_t
))
426 newSymNames
.push_back('\0');
428 new_linkedit_data
.insert(new_linkedit_data
.end(), newSymNames
.begin(), newSymNames
.end());
430 // update load commands
431 if ( functionStarts
!= NULL
) {
432 functionStarts
->set_dataoff((uint32_t)(newFunctionStartsOffset
+ linkEditSegCmd
->fileoff()));
433 functionStarts
->set_datasize(functionStartsSize
);
435 if ( dataInCode
!= NULL
) {
436 dataInCode
->set_dataoff((uint32_t)(newDataInCodeOffset
+ linkEditSegCmd
->fileoff()));
437 dataInCode
->set_datasize(dataInCodeSize
);
440 symtab
->set_nsyms(newSymCount
);
441 symtab
->set_symoff((uint32_t)(newSymTabOffset
+ linkEditSegCmd
->fileoff()));
442 symtab
->set_stroff((uint32_t)(newStringPoolOffset
+ linkEditSegCmd
->fileoff()));
443 symtab
->set_strsize((uint32_t)newSymNames
.size());
444 dynamicSymTab
->set_extreloff(0);
445 dynamicSymTab
->set_nextrel(0);
446 dynamicSymTab
->set_locreloff(0);
447 dynamicSymTab
->set_nlocrel(0);
448 dynamicSymTab
->set_indirectsymoff((uint32_t)(newIndSymTabOffset
+ linkEditSegCmd
->fileoff()));
449 linkEditSegCmd
->set_filesize(symtab
->stroff()+symtab
->strsize() - linkEditSegCmd
->fileoff());
450 linkEditSegCmd
->set_vmsize( (linkEditSegCmd
->filesize()+4095) & (-4096) );
452 // <rdar://problem/17671438> Xcode 6 leaks in dyld_shared_cache_extract_dylibs
453 for (std::vector
<mach_o::trie::Entry
>::iterator it
= exports
.begin(); it
!= exports
.end(); ++it
) {
454 ::free((void*)(it
->name
));
463 static void make_dirs(const char* file_path
)
465 //printf("make_dirs(%s)\n", file_path);
466 char dirs
[strlen(file_path
)+1];
467 strcpy(dirs
, file_path
);
468 char* lastSlash
= strrchr(dirs
, '/');
469 if ( lastSlash
== NULL
)
472 struct stat stat_buf
;
473 if ( stat(dirs
, &stat_buf
) != 0 ) {
474 char* afterSlash
= &dirs
[1];
476 while ( (slash
= strchr(afterSlash
, '/')) != NULL
) {
478 ::mkdir(dirs
, S_IRWXU
| S_IRGRP
|S_IXGRP
| S_IROTH
|S_IXOTH
);
479 //printf("mkdir(%s)\n", dirs);
481 afterSlash
= slash
+1;
488 template <typename A
>
489 void dylib_maker(const void* mapped_cache
, std::vector
<uint8_t> &dylib_data
, const std::vector
<seg_info
>& segments
) {
490 typedef typename
A::P P
;
492 size_t additionalSize
= 0;
493 for(std::vector
<seg_info
>::const_iterator it
=segments
.begin(); it
!= segments
.end(); ++it
) {
494 if ( strcmp(it
->segName
, "__LINKEDIT") != 0 )
495 additionalSize
+= it
->sizem
;
498 std::vector
<uint8_t> new_dylib_data
;
499 new_dylib_data
.reserve(additionalSize
);
501 // Write regular segments into the buffer
502 uint64_t textOffsetInCache
= 0;
503 for( std::vector
<seg_info
>::const_iterator it
=segments
.begin(); it
!= segments
.end(); ++it
) {
505 if(strcmp(it
->segName
, "__TEXT") == 0 )
506 textOffsetInCache
= it
->offset
;
508 //printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem);
509 // Copy all but the __LINKEDIT. It will be copied later during the optimizer in to a temporary buffer but it would
510 // not be efficient to copy it all now for each dylib.
511 if (strcmp(it
->segName
, "__LINKEDIT") == 0 )
513 std::copy(((uint8_t*)mapped_cache
)+it
->offset
, ((uint8_t*)mapped_cache
)+it
->offset
+it
->sizem
, std::back_inserter(new_dylib_data
));
517 std::vector
<uint8_t> new_linkedit_data
;
518 new_linkedit_data
.reserve(1 << 20);
520 LinkeditOptimizer
<A
> linkeditOptimizer
;
521 macho_header
<P
>* mh
= (macho_header
<P
>*)&new_dylib_data
.front();
522 linkeditOptimizer
.optimize_loadcommands(mh
);
523 linkeditOptimizer
.optimize_linkedit(new_linkedit_data
, textOffsetInCache
, mapped_cache
);
525 new_dylib_data
.insert(new_dylib_data
.end(), new_linkedit_data
.begin(), new_linkedit_data
.end());
528 while (new_dylib_data
.size() % 4096)
529 new_dylib_data
.push_back(0);
531 dylib_data
.insert(dylib_data
.end(), new_dylib_data
.begin(), new_dylib_data
.end());
534 typedef __typeof(dylib_maker
<x86
>) dylib_maker_func
;
535 typedef void (^progress_block
)(unsigned current
, unsigned total
);
537 class SharedCacheExtractor
;
538 struct SharedCacheDylibExtractor
{
539 SharedCacheDylibExtractor(const char* name
, std::vector
<seg_info
> segInfo
)
540 : name(name
), segInfo(segInfo
) { }
542 void extractCache(SharedCacheExtractor
& context
);
545 const std::vector
<seg_info
> segInfo
;
549 struct SharedCacheExtractor
{
550 SharedCacheExtractor(const NameToSegments
& map
,
551 const char* extraction_root_path
,
552 dylib_maker_func
* dylib_create_func
,
554 progress_block progress
)
555 : map(map
), extraction_root_path(extraction_root_path
),
556 dylib_create_func(dylib_create_func
), mapped_cache(mapped_cache
),
559 extractors
.reserve(map
.size());
561 extractors
.emplace_back(it
.first
, it
.second
);
563 // Limit the number of open files. 16 seems to give better performance than higher numbers.
564 sema
= dispatch_semaphore_create(16);
568 static void extractCache(void *ctx
, size_t i
);
570 const NameToSegments
& map
;
571 std::vector
<SharedCacheDylibExtractor
> extractors
;
572 dispatch_semaphore_t sema
;
573 const char* extraction_root_path
;
574 dylib_maker_func
* dylib_create_func
;
576 progress_block progress
;
577 std::atomic_int count
= { 0 };
580 int SharedCacheExtractor::extractCaches() {
581 dispatch_queue_t process_queue
= dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW
, 0);
582 dispatch_apply_f(map
.size(), process_queue
,
586 for (const SharedCacheDylibExtractor
& extractor
: extractors
) {
587 if (extractor
.result
!= 0) {
588 result
= extractor
.result
;
595 void SharedCacheExtractor::extractCache(void *ctx
, size_t i
) {
596 SharedCacheExtractor
& context
= *(SharedCacheExtractor
*)ctx
;
597 dispatch_semaphore_wait(context
.sema
, DISPATCH_TIME_FOREVER
);
598 context
.extractors
[i
].extractCache(context
);
599 dispatch_semaphore_signal(context
.sema
);
602 void SharedCacheDylibExtractor::extractCache(SharedCacheExtractor
&context
) {
604 char dylib_path
[PATH_MAX
];
605 strcpy(dylib_path
, context
.extraction_root_path
);
606 strcat(dylib_path
, "/");
607 strcat(dylib_path
, name
);
609 //printf("%s with %lu segments\n", dylib_path, it->second.size());
610 // make sure all directories in this path exist
611 make_dirs(dylib_path
);
613 // open file, create if does not already exist
614 int fd
= ::open(dylib_path
, O_CREAT
| O_TRUNC
| O_EXLOCK
| O_RDWR
, 0644);
616 fprintf(stderr
, "can't open or create dylib file %s, errnor=%d\n", dylib_path
, errno
);
621 std::vector
<uint8_t> vec
;
622 context
.dylib_create_func(context
.mapped_cache
, vec
, segInfo
);
623 context
.progress(context
.count
++, (unsigned)context
.map
.size());
626 if( write(fd
, &vec
.front(), vec
.size()) == -1) {
627 fprintf(stderr
, "error writing, errnor=%d\n", errno
);
634 static int sharedCacheIsValid(const void* mapped_cache
, uint64_t size
) {
635 // First check that the size is good.
636 // Note the shared cache may not have a codeSignatureSize value set so we need to first make
637 // sure we have space for the CS_SuperBlob, then later crack that to check for the size of the rest.
638 const DyldSharedCache
* dyldSharedCache
= (DyldSharedCache
*)mapped_cache
;
639 uint64_t requiredSizeForCSSuperBlob
= dyldSharedCache
->header
.codeSignatureOffset
+ sizeof(CS_SuperBlob
);
640 const dyld_cache_mapping_info
* mappings
= (dyld_cache_mapping_info
*)((uint8_t*)mapped_cache
+ dyldSharedCache
->header
.mappingOffset
);
641 if ( requiredSizeForCSSuperBlob
> size
) {
642 fprintf(stderr
, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size
, requiredSizeForCSSuperBlob
);
646 // Now see if the code signatures are valid as that tells us the pages aren't corrupt.
647 // First find all of the regions of the shared cache we computed cd hashes
648 std::vector
<std::pair
<uint64_t, uint64_t>> sharedCacheRegions
;
649 sharedCacheRegions
.emplace_back(std::make_pair(mappings
[0].fileOffset
, mappings
[0].fileOffset
+ mappings
[0].size
));
650 sharedCacheRegions
.emplace_back(std::make_pair(mappings
[1].fileOffset
, mappings
[1].fileOffset
+ mappings
[1].size
));
651 sharedCacheRegions
.emplace_back(std::make_pair(mappings
[2].fileOffset
, mappings
[2].fileOffset
+ mappings
[2].size
));
652 if (dyldSharedCache
->header
.localSymbolsSize
)
653 sharedCacheRegions
.emplace_back(std::make_pair(dyldSharedCache
->header
.localSymbolsOffset
, dyldSharedCache
->header
.localSymbolsOffset
+ dyldSharedCache
->header
.localSymbolsSize
));
654 size_t inBbufferSize
= 0;
655 for (auto& sharedCacheRegion
: sharedCacheRegions
)
656 inBbufferSize
+= (sharedCacheRegion
.second
- sharedCacheRegion
.first
);
657 uint32_t slotCountFromRegions
= (uint32_t)((inBbufferSize
+ CS_PAGE_SIZE
- 1) / CS_PAGE_SIZE
);
659 // Now take the cd hash from the cache itself and validate the regions we found.
660 uint8_t* codeSignatureRegion
= (uint8_t*)mapped_cache
+ dyldSharedCache
->header
.codeSignatureOffset
;
661 CS_SuperBlob
* sb
= reinterpret_cast<CS_SuperBlob
*>(codeSignatureRegion
);
662 if (sb
->magic
!= htonl(CSMAGIC_EMBEDDED_SIGNATURE
)) {
663 fprintf(stderr
, "Error: dyld shared cache code signature magic is incorrect.\n");
667 size_t sbSize
= ntohl(sb
->length
);
668 uint64_t requiredSizeForCS
= dyldSharedCache
->header
.codeSignatureOffset
+ sbSize
;
669 if ( requiredSizeForCS
> size
) {
670 fprintf(stderr
, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size
, requiredSizeForCS
);
674 // Find the offset to the code directory.
675 CS_CodeDirectory
* cd
= nullptr;
676 for (unsigned i
=0; i
!= sb
->count
; ++i
) {
677 if (ntohl(sb
->index
[i
].type
) == CSSLOT_CODEDIRECTORY
) {
678 cd
= (CS_CodeDirectory
*)(codeSignatureRegion
+ ntohl(sb
->index
[i
].offset
));
684 fprintf(stderr
, "Error: dyld shared cache code signature directory is missing.\n");
688 if ( (uint8_t*)cd
> (codeSignatureRegion
+ sbSize
) ) {
689 fprintf(stderr
, "Error: dyld shared cache code signature directory is out of bounds.\n");
693 if ( cd
->magic
!= htonl(CSMAGIC_CODEDIRECTORY
) ) {
694 fprintf(stderr
, "Error: dyld shared cache code signature directory magic is incorrect.\n");
698 if ( ntohl(cd
->nCodeSlots
) < slotCountFromRegions
) {
699 fprintf(stderr
, "Error: dyld shared cache code signature directory num slots is incorrect.\n");
703 uint32_t dscDigestFormat
= kCCDigestNone
;
704 switch (cd
->hashType
) {
705 case CS_HASHTYPE_SHA1
:
706 #pragma clang diagnostic push
707 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
708 dscDigestFormat
= kCCDigestSHA1
;
709 #pragma clang diagnostic pop
711 case CS_HASHTYPE_SHA256
:
712 dscDigestFormat
= kCCDigestSHA256
;
718 if (dscDigestFormat
!= kCCDigestNone
) {
719 const uint64_t csPageSize
= 1 << cd
->pageSize
;
720 size_t hashOffset
= ntohl(cd
->hashOffset
);
721 uint8_t* hashSlot
= (uint8_t*)cd
+ hashOffset
;
722 uint8_t cdHashBuffer
[cd
->hashSize
];
724 // Skip local symbols for now as those aren't being codesign correctly right now.
725 size_t inBbufferSize
= 0;
726 for (auto& sharedCacheRegion
: sharedCacheRegions
) {
727 if (sharedCacheRegion
.first
== dyldSharedCache
->header
.localSymbolsOffset
)
729 inBbufferSize
+= (sharedCacheRegion
.second
- sharedCacheRegion
.first
);
731 uint32_t slotCountToProcess
= (uint32_t)((inBbufferSize
+ CS_PAGE_SIZE
- 1) / CS_PAGE_SIZE
);
733 for (unsigned i
= 0; i
!= slotCountToProcess
; ++i
) {
734 // Skip data pages as those may have been slid by ASLR in the extracted file
735 uint64_t fileOffset
= i
* csPageSize
;
736 if ( (fileOffset
>= mappings
[1].fileOffset
) && (fileOffset
< (mappings
[1].fileOffset
+ mappings
[1].size
)) )
739 CCDigest(dscDigestFormat
, (uint8_t*)mapped_cache
+ fileOffset
, (size_t)csPageSize
, cdHashBuffer
);
740 uint8_t* cacheCdHashBuffer
= hashSlot
+ (i
* cd
->hashSize
);
741 if (memcmp(cdHashBuffer
, cacheCdHashBuffer
, cd
->hashSize
) != 0) {
742 fprintf(stderr
, "Error: dyld shared cache code signature for page %d is incorrect.\n", i
);
750 int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path
, const char* extraction_root_path
,
751 progress_block progress
)
754 if (stat(shared_cache_file_path
, &statbuf
)) {
755 fprintf(stderr
, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path
);
759 int cache_fd
= open(shared_cache_file_path
, O_RDONLY
);
761 fprintf(stderr
, "Error: failed to open shared cache file at %s\n", shared_cache_file_path
);
765 void* mapped_cache
= mmap(NULL
, (size_t)statbuf
.st_size
, PROT_READ
, MAP_PRIVATE
, cache_fd
, 0);
766 if (mapped_cache
== MAP_FAILED
) {
767 fprintf(stderr
, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path
, errno
);
773 // instantiate arch specific dylib maker
774 dylib_maker_func
* dylib_create_func
= nullptr;
775 if ( strcmp((char*)mapped_cache
, "dyld_v1 i386") == 0 )
776 dylib_create_func
= dylib_maker
<x86
>;
777 else if ( strcmp((char*)mapped_cache
, "dyld_v1 x86_64") == 0 )
778 dylib_create_func
= dylib_maker
<x86_64
>;
779 else if ( strcmp((char*)mapped_cache
, "dyld_v1 x86_64h") == 0 )
780 dylib_create_func
= dylib_maker
<x86_64
>;
781 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv5") == 0 )
782 dylib_create_func
= dylib_maker
<arm
>;
783 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv6") == 0 )
784 dylib_create_func
= dylib_maker
<arm
>;
785 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv7") == 0 )
786 dylib_create_func
= dylib_maker
<arm
>;
787 else if ( strncmp((char*)mapped_cache
, "dyld_v1 armv7", 14) == 0 )
788 dylib_create_func
= dylib_maker
<arm
>;
789 else if ( strcmp((char*)mapped_cache
, "dyld_v1 arm64") == 0 )
790 dylib_create_func
= dylib_maker
<arm64
>;
791 #if SUPPORT_ARCH_arm64e
792 else if ( strcmp((char*)mapped_cache
, "dyld_v1 arm64e") == 0 )
793 dylib_create_func
= dylib_maker
<arm64
>;
795 #if SUPPORT_ARCH_arm64_32
796 else if ( strcmp((char*)mapped_cache
, "dyld_v1arm64_32") == 0 )
797 dylib_create_func
= dylib_maker
<arm64_32
>;
800 fprintf(stderr
, "Error: unrecognized dyld shared cache magic.\n");
801 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
805 // Verify that the cache isn't corrupt.
806 if (int result
= sharedCacheIsValid(mapped_cache
, (uint64_t)statbuf
.st_size
)) {
807 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
811 // iterate through all images in cache and build map of dylibs and segments
812 __block NameToSegments map
;
815 result
= dyld_shared_cache_iterate(mapped_cache
, (uint32_t)statbuf
.st_size
, ^(const dyld_shared_cache_dylib_info
* dylibInfo
, const dyld_shared_cache_segment_info
* segInfo
) {
816 map
[dylibInfo
->path
].push_back(seg_info(segInfo
->name
, segInfo
->fileOffset
, segInfo
->fileSize
));
820 fprintf(stderr
, "Error: dyld_shared_cache_iterate_segments_with_slide failed.\n");
821 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
825 // for each dylib instantiate a dylib file
826 SharedCacheExtractor
extractor(map
, extraction_root_path
, dylib_create_func
, mapped_cache
, progress
);
827 result
= extractor
.extractCaches();
829 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
835 int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path
, const char* extraction_root_path
)
837 return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path
, extraction_root_path
,
838 ^(unsigned , unsigned) {} );
849 typedef int (*extractor_proc
)(const char* shared_cache_file_path
, const char* extraction_root_path
,
850 void (^progress
)(unsigned current
, unsigned total
));
852 int main(int argc
, const char* argv
[])
855 fprintf(stderr
, "usage: dsc_extractor <path-to-cache-file> <path-to-device-dir>\n");
859 //void* handle = dlopen("/Volumes/my/src/dyld/build/Debug/dsc_extractor.bundle", RTLD_LAZY);
860 void* handle
= dlopen("/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/usr/lib/dsc_extractor.bundle", RTLD_LAZY
);
861 if ( handle
== NULL
) {
862 fprintf(stderr
, "dsc_extractor.bundle could not be loaded\n");
866 extractor_proc proc
= (extractor_proc
)dlsym(handle
, "dyld_shared_cache_extract_dylibs_progress");
867 if ( proc
== NULL
) {
868 fprintf(stderr
, "dsc_extractor.bundle did not have dyld_shared_cache_extract_dylibs_progress symbol\n");
872 int result
= (*proc
)(argv
[1], argv
[2], ^(unsigned c
, unsigned total
) { printf("%d/%d\n", c
, total
); } );
873 fprintf(stderr
, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result
);