1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2011 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
34 #include <sys/syslimits.h>
35 #include <libkern/OSByteOrder.h>
36 #include <mach-o/fat.h>
37 #include <mach-o/arch.h>
38 #include <mach-o/loader.h>
39 #include <Availability.h>
41 #include "CodeSigningTypes.h"
42 #include <CommonCrypto/CommonHMAC.h>
43 #include <CommonCrypto/CommonDigest.h>
44 #include <CommonCrypto/CommonDigestSPI.h>
47 #include "Architectures.hpp"
48 #include "MachOFileAbstraction.hpp"
49 #include "CacheFileAbstraction.hpp"
51 #include "dsc_iterator.h"
52 #include "dsc_extractor.h"
53 #include "MachOTrie.hpp"
54 #include "SupportedArchs.h"
55 #include "DyldSharedCache.h"
60 #include <unordered_map>
62 #include <dispatch/dispatch.h>
66 seg_info(const char* n
, uint64_t o
, uint64_t s
)
67 : segName(n
), offset(o
), sizem(s
) { }
75 size_t operator()(const char* __s
) const {
84 bool operator()(const char* left
, const char* right
) const { return (strcmp(left
, right
) == 0); }
86 typedef std
::unordered_map
<const char*, std
::vector
<seg_info
>, CStringHash
, CStringEquals
> NameToSegments
;
88 // Filter to find individual symbol re-exports in trie
89 class NotReExportSymbol
{
91 NotReExportSymbol(const std
::set
<int> &rd
) :_reexportDeps(rd
) {}
92 bool operator()(const mach_o
::trie
::Entry
&entry
) const {
93 bool result
= isSymbolReExport(entry
);
95 // <rdar://problem/17671438> Xcode 6 leaks in dyld_shared_cache_extract_dylibs
96 ::free((void*)entry
.name
);
97 const_cast<mach_o
::trie
::Entry
*>(&entry
)->name
= NULL
;
102 bool isSymbolReExport(const mach_o
::trie
::Entry
&entry
) const {
103 if ( (entry
.flags
& EXPORT_SYMBOL_FLAGS_KIND_MASK
) != EXPORT_SYMBOL_FLAGS_KIND_REGULAR
)
105 if ( (entry
.flags
& EXPORT_SYMBOL_FLAGS_REEXPORT
) == 0 )
107 // If the symbol comes from a dylib that is re-exported, this is not an individual symbol re-export
108 if ( _reexportDeps
.count((int)entry
.other
) != 0 )
112 const std
::set
<int> &_reexportDeps
;
115 template <typename P
>
116 struct LoadCommandInfo
{
119 template <typename A
>
120 class LinkeditOptimizer
{
121 typedef typename A
::P P
;
122 typedef typename A
::P
::E E
;
123 typedef typename A
::P
::uint_t pint_t
;
126 macho_segment_command
<P
>* linkEditSegCmd
= NULL
;
127 macho_symtab_command
<P
>* symtab
= NULL
;
128 macho_dysymtab_command
<P
>* dynamicSymTab
= NULL
;
129 macho_linkedit_data_command
<P
>* functionStarts
= NULL
;
130 macho_linkedit_data_command
<P
>* dataInCode
= NULL
;
131 uint32_t exportsTrieOffset
= 0;
132 uint32_t exportsTrieSize
= 0;
133 std
::set
<int> reexportDeps
;
137 void optimize_loadcommands(macho_header
<typename A
::P
>* mh
)
139 typedef typename A
::P P
;
140 typedef typename A
::P
::E E
;
141 typedef typename A
::P
::uint_t pint_t
;
143 // update header flags
144 mh
->set_flags(mh
->flags() & 0x7FFFFFFF); // remove in-cache bit
146 // update load commands
147 uint64_t cumulativeFileSize
= 0;
148 const unsigned origLoadCommandsSize
= mh
->sizeofcmds();
149 unsigned bytesRemaining
= origLoadCommandsSize
;
150 unsigned removedCount
= 0;
151 const macho_load_command
<P
>* const cmds
= (macho_load_command
<P
>*)((uint8_t*)mh
+ sizeof(macho_header
<P
>));
152 const uint32_t cmdCount
= mh
->ncmds();
153 const macho_load_command
<P
>* cmd
= cmds
;
155 for (uint32_t i
= 0; i
< cmdCount
; ++i
) {
157 switch ( cmd
->cmd() ) {
158 case macho_segment_command
<P
>::CMD
:
160 // update segment/section file offsets
161 macho_segment_command
<P
>* segCmd
= (macho_segment_command
<P
>*)cmd
;
162 segCmd
->set_fileoff(cumulativeFileSize
);
163 segCmd
->set_filesize(segCmd
->vmsize());
164 macho_section
<P
>* const sectionsStart
= (macho_section
<P
>*)((char*)segCmd
+ sizeof(macho_segment_command
<P
>));
165 macho_section
<P
>* const sectionsEnd
= §ionsStart
[segCmd
->nsects()];
166 for(macho_section
<P
>* sect
= sectionsStart
; sect
< sectionsEnd
; ++sect
) {
167 if ( sect
->offset() != 0 )
168 sect
->set_offset((uint32_t)(cumulativeFileSize
+sect
->addr()-segCmd
->vmaddr()));
170 if ( strcmp(segCmd
->segname(), "__LINKEDIT") == 0 ) {
171 linkEditSegCmd
= segCmd
;
173 cumulativeFileSize
+= segCmd
->filesize();
176 case LC_DYLD_INFO_ONLY
:
178 // zero out all dyld info
179 macho_dyld_info_command
<P
>* dyldInfo
= (macho_dyld_info_command
<P
>*)cmd
;
180 exportsTrieOffset
= dyldInfo
->export_off();
181 exportsTrieSize
= dyldInfo
->export_size();
182 dyldInfo
->set_rebase_off(0);
183 dyldInfo
->set_rebase_size(0);
184 dyldInfo
->set_bind_off(0);
185 dyldInfo
->set_bind_size(0);
186 dyldInfo
->set_weak_bind_off(0);
187 dyldInfo
->set_weak_bind_size(0);
188 dyldInfo
->set_lazy_bind_off(0);
189 dyldInfo
->set_lazy_bind_size(0);
190 dyldInfo
->set_export_off(0);
191 dyldInfo
->set_export_size(0);
195 symtab
= (macho_symtab_command
<P
>*)cmd
;
198 dynamicSymTab
= (macho_dysymtab_command
<P
>*)cmd
;
200 case LC_FUNCTION_STARTS
:
201 functionStarts
= (macho_linkedit_data_command
<P
>*)cmd
;
203 case LC_DATA_IN_CODE
:
204 dataInCode
= (macho_linkedit_data_command
<P
>*)cmd
;
207 case LC_LOAD_WEAK_DYLIB
:
208 case LC_REEXPORT_DYLIB
:
209 case LC_LOAD_UPWARD_DYLIB
:
211 if ( cmd
->cmd() == LC_REEXPORT_DYLIB
) {
212 reexportDeps
.insert(depIndex
);
215 case LC_SEGMENT_SPLIT_INFO
:
216 // <rdar://problem/23212513> dylibs iOS 9 dyld caches have bogus LC_SEGMENT_SPLIT_INFO
220 uint32_t cmdSize
= cmd
->cmdsize();
221 macho_load_command
<P
>* nextCmd
= (macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmdSize
);
223 ::memmove((void*)cmd
, (void*)nextCmd
, bytesRemaining
);
227 bytesRemaining
-= cmdSize
;
231 // zero out stuff removed
232 ::bzero((void*)cmd
, bytesRemaining
);
234 mh
->set_ncmds(cmdCount
- removedCount
);
235 mh
->set_sizeofcmds(origLoadCommandsSize
- bytesRemaining
);
238 int optimize_linkedit(std
::vector
<uint8_t> &new_linkedit_data
, uint64_t textOffsetInCache
, const void* mapped_cache
)
240 typedef typename A
::P P
;
241 typedef typename A
::P
::E E
;
242 typedef typename A
::P
::uint_t pint_t
;
244 // rebuild symbol table
245 if ( linkEditSegCmd
== NULL
) {
246 fprintf(stderr
, "__LINKEDIT not found\n");
249 if ( symtab
== NULL
) {
250 fprintf(stderr
, "LC_SYMTAB not found\n");
253 if ( dynamicSymTab
== NULL
) {
254 fprintf(stderr
, "LC_DYSYMTAB not found\n");
258 const uint64_t newFunctionStartsOffset
= new_linkedit_data
.size();
259 uint32_t functionStartsSize
= 0;
260 if ( functionStarts
!= NULL
) {
261 // copy function starts from original cache file to new mapped dylib file
262 functionStartsSize
= functionStarts
->datasize();
263 new_linkedit_data
.insert(new_linkedit_data
.end(),
264 (char*)mapped_cache
+ functionStarts
->dataoff(),
265 (char*)mapped_cache
+ functionStarts
->dataoff() + functionStartsSize
);
269 while ((linkEditSegCmd
->fileoff() + new_linkedit_data
.size()) % sizeof(pint_t
))
270 new_linkedit_data
.push_back(0);
272 const uint64_t newDataInCodeOffset
= new_linkedit_data
.size();
273 uint32_t dataInCodeSize
= 0;
274 if ( dataInCode
!= NULL
) {
275 // copy data-in-code info from original cache file to new mapped dylib file
276 dataInCodeSize
= dataInCode
->datasize();
277 new_linkedit_data
.insert(new_linkedit_data
.end(),
278 (char*)mapped_cache
+ dataInCode
->dataoff(),
279 (char*)mapped_cache
+ dataInCode
->dataoff() + dataInCodeSize
);
282 std
::vector
<mach_o
::trie
::Entry
> exports
;
283 if ( exportsTrieSize
!= 0 ) {
284 const uint8_t* exportsStart
= ((uint8_t*)mapped_cache
) + exportsTrieOffset
;
285 const uint8_t* exportsEnd
= &exportsStart
[exportsTrieSize
];
286 mach_o
::trie
::parseTrie(exportsStart
, exportsEnd
, exports
);
287 exports
.erase(std
::remove_if(exports
.begin(), exports
.end(), NotReExportSymbol(reexportDeps
)), exports
.end());
290 // look for local symbol info in unmapped part of shared cache
291 dyldCacheHeader
<E
>* header
= (dyldCacheHeader
<E
>*)mapped_cache
;
292 macho_nlist
<P
>* localNlists
= NULL
;
293 uint32_t localNlistCount
= 0;
294 const char* localStrings
= NULL
;
295 const char* localStringsEnd
= NULL
;
296 if ( header
->mappingOffset() > offsetof(dyld_cache_header
,localSymbolsSize
) ) {
297 dyldCacheLocalSymbolsInfo
<E
>* localInfo
= (dyldCacheLocalSymbolsInfo
<E
>*)(((uint8_t*)mapped_cache
) + header
->localSymbolsOffset());
298 dyldCacheLocalSymbolEntry
<E
>* entries
= (dyldCacheLocalSymbolEntry
<E
>*)(((uint8_t*)mapped_cache
) + header
->localSymbolsOffset() + localInfo
->entriesOffset());
299 macho_nlist
<P
>* allLocalNlists
= (macho_nlist
<P
>*)(((uint8_t*)localInfo
) + localInfo
->nlistOffset());
300 const uint32_t entriesCount
= localInfo
->entriesCount();
301 for (uint32_t i
=0; i
< entriesCount
; ++i
) {
302 if ( entries
[i
].dylibOffset() == textOffsetInCache
) {
303 uint32_t localNlistStart
= entries
[i
].nlistStartIndex();
304 localNlistCount
= entries
[i
].nlistCount();
305 localNlists
= &allLocalNlists
[localNlistStart
];
306 localStrings
= ((char*)localInfo
) + localInfo
->stringsOffset();
307 localStringsEnd
= &localStrings
[localInfo
->stringsSize()];
312 // compute number of symbols in new symbol table
313 const macho_nlist
<P
>* const mergedSymTabStart
= (macho_nlist
<P
>*)(((uint8_t*)mapped_cache
) + symtab
->symoff());
314 const macho_nlist
<P
>* const mergedSymTabend
= &mergedSymTabStart
[symtab
->nsyms()];
315 uint32_t newSymCount
= symtab
->nsyms();
316 if ( localNlists
!= NULL
) {
317 newSymCount
= localNlistCount
;
318 for (const macho_nlist
<P
>* s
= mergedSymTabStart
; s
!= mergedSymTabend
; ++s
) {
319 // skip any locals in cache
320 if ( (s
->n_type() & (N_TYPE
|N_EXT
)) == N_SECT
)
326 // add room for N_INDR symbols for re-exported symbols
327 newSymCount
+= exports
.size();
329 // copy symbol entries and strings from original cache file to new mapped dylib file
330 const char* mergedStringPoolStart
= (char*)mapped_cache
+ symtab
->stroff();
331 const char* mergedStringPoolEnd
= &mergedStringPoolStart
[symtab
->strsize()];
333 // First count how many entries we need
334 std
::vector
<macho_nlist
<P
>> newSymTab
;
335 newSymTab
.reserve(newSymCount
);
336 std
::vector
<char> newSymNames
;
338 // first pool entry is always empty string
339 newSymNames
.push_back('\0');
341 for (const macho_nlist
<P
>* s
= mergedSymTabStart
; s
!= mergedSymTabend
; ++s
) {
342 // if we have better local symbol info, skip any locals here
343 if ( (localNlists
!= NULL
) && ((s
->n_type() & (N_TYPE
|N_EXT
)) == N_SECT
) )
345 macho_nlist
<P
> t
= *s
;
346 t
.set_n_strx((uint32_t)newSymNames
.size());
347 const char* symName
= &mergedStringPoolStart
[s
->n_strx()];
348 if ( symName
> mergedStringPoolEnd
)
349 symName
= "<corrupt symbol name>";
350 newSymNames
.insert(newSymNames
.end(),
352 symName
+ (strlen(symName
) + 1));
353 newSymTab
.push_back(t
);
355 // <rdar://problem/16529213> recreate N_INDR symbols in extracted dylibs for debugger
356 for (std
::vector
<mach_o
::trie
::Entry
>::iterator it
= exports
.begin(); it
!= exports
.end(); ++it
) {
358 memset(&t
, 0, sizeof(t
));
359 t
.set_n_strx((uint32_t)newSymNames
.size());
360 t
.set_n_type(N_INDR
| N_EXT
);
363 newSymNames
.insert(newSymNames
.end(),
365 it
->name
+ (strlen(it
->name
) + 1));
366 const char* importName
= it
->importName
;
367 if ( *importName
== '\0' )
368 importName
= it
->name
;
369 t
.set_n_value(newSymNames
.size());
370 newSymNames
.insert(newSymNames
.end(),
372 importName
+ (strlen(importName
) + 1));
373 newSymTab
.push_back(t
);
375 if ( localNlists
!= NULL
) {
376 // update load command to reflect new count of locals
377 dynamicSymTab
->set_ilocalsym((uint32_t)newSymTab
.size());
378 dynamicSymTab
->set_nlocalsym(localNlistCount
);
379 // copy local symbols
380 for (uint32_t i
=0; i
< localNlistCount
; ++i
) {
381 const char* localName
= &localStrings
[localNlists
[i
].n_strx()];
382 if ( localName
> localStringsEnd
)
383 localName
= "<corrupt local symbol name>";
384 macho_nlist
<P
> t
= localNlists
[i
];
385 t
.set_n_strx((uint32_t)newSymNames
.size());
386 newSymNames
.insert(newSymNames
.end(),
388 localName
+ (strlen(localName
) + 1));
389 newSymTab
.push_back(t
);
393 if ( newSymCount
!= newSymTab
.size() ) {
394 fprintf(stderr
, "symbol count miscalculation\n");
398 //const uint64_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
399 //macho_nlist<P>* const newSymTabStart = (macho_nlist<P>*)(((uint8_t*)mh) + newSymTabOffset);
400 //char* const newStringPoolStart = (char*)mh + newStringPoolOffset;
403 while ((linkEditSegCmd
->fileoff() + new_linkedit_data
.size()) % sizeof(pint_t
))
404 new_linkedit_data
.push_back(0);
406 const uint64_t newSymTabOffset
= new_linkedit_data
.size();
409 for (macho_nlist
<P
>& sym
: newSymTab
) {
410 uint8_t symData
[sizeof(macho_nlist
<P
>)];
411 memcpy(&symData
, &sym
, sizeof(sym
));
412 new_linkedit_data
.insert(new_linkedit_data
.end(), &symData
[0], &symData
[sizeof(macho_nlist
<P
>)]);
415 const uint64_t newIndSymTabOffset
= new_linkedit_data
.size();
417 // Copy indirect symbol table
418 const uint32_t* mergedIndSymTab
= (uint32_t*)((char*)mapped_cache
+ dynamicSymTab
->indirectsymoff());
419 new_linkedit_data
.insert(new_linkedit_data
.end(),
420 (char*)mergedIndSymTab
,
421 (char*)(mergedIndSymTab
+ dynamicSymTab
->nindirectsyms()));
423 const uint64_t newStringPoolOffset
= new_linkedit_data
.size();
425 // pointer align string pool size
426 while (newSymNames
.size() % sizeof(pint_t
))
427 newSymNames
.push_back('\0');
429 new_linkedit_data
.insert(new_linkedit_data
.end(), newSymNames
.begin(), newSymNames
.end());
431 // update load commands
432 if ( functionStarts
!= NULL
) {
433 functionStarts
->set_dataoff((uint32_t)(newFunctionStartsOffset
+ linkEditSegCmd
->fileoff()));
434 functionStarts
->set_datasize(functionStartsSize
);
436 if ( dataInCode
!= NULL
) {
437 dataInCode
->set_dataoff((uint32_t)(newDataInCodeOffset
+ linkEditSegCmd
->fileoff()));
438 dataInCode
->set_datasize(dataInCodeSize
);
441 symtab
->set_nsyms(newSymCount
);
442 symtab
->set_symoff((uint32_t)(newSymTabOffset
+ linkEditSegCmd
->fileoff()));
443 symtab
->set_stroff((uint32_t)(newStringPoolOffset
+ linkEditSegCmd
->fileoff()));
444 symtab
->set_strsize((uint32_t)newSymNames
.size());
445 dynamicSymTab
->set_extreloff(0);
446 dynamicSymTab
->set_nextrel(0);
447 dynamicSymTab
->set_locreloff(0);
448 dynamicSymTab
->set_nlocrel(0);
449 dynamicSymTab
->set_indirectsymoff((uint32_t)(newIndSymTabOffset
+ linkEditSegCmd
->fileoff()));
450 linkEditSegCmd
->set_filesize(symtab
->stroff()+symtab
->strsize() - linkEditSegCmd
->fileoff());
451 linkEditSegCmd
->set_vmsize( (linkEditSegCmd
->filesize()+4095) & (-4096) );
453 // <rdar://problem/17671438> Xcode 6 leaks in dyld_shared_cache_extract_dylibs
454 for (std
::vector
<mach_o
::trie
::Entry
>::iterator it
= exports
.begin(); it
!= exports
.end(); ++it
) {
455 ::free((void*)(it
->name
));
464 static void make_dirs(const char* file_path
)
466 //printf("make_dirs(%s)\n", file_path);
467 char dirs
[strlen(file_path
)+1];
468 strcpy(dirs
, file_path
);
469 char* lastSlash
= strrchr(dirs
, '/');
470 if ( lastSlash
== NULL
)
473 struct stat stat_buf
;
474 if ( stat(dirs
, &stat_buf
) != 0 ) {
475 char* afterSlash
= &dirs
[1];
477 while ( (slash
= strchr(afterSlash
, '/')) != NULL
) {
479 ::mkdir(dirs
, S_IRWXU
| S_IRGRP
|S_IXGRP
| S_IROTH
|S_IXOTH
);
480 //printf("mkdir(%s)\n", dirs);
482 afterSlash
= slash
+1;
489 template <typename A
>
490 size_t dylib_maker(const void* mapped_cache
, std
::vector
<uint8_t> &dylib_data
, const std
::vector
<seg_info
>& segments
) {
491 typedef typename A
::P P
;
493 int32_t nfat_archs
= 0;
494 uint32_t offsetInFatFile
= 4096;
495 uint8_t *base_ptr
= &dylib_data
.front();
497 #define FH reinterpret_cast<fat_header*>(base_ptr)
498 #define FA reinterpret_cast<fat_arch*>(base_ptr + (8 + (nfat_archs - 1) * sizeof(fat_arch)))
500 if(dylib_data
.size() >= 4096 && OSSwapBigToHostInt32(FH
->magic
) == FAT_MAGIC
) {
501 // have fat header, append new arch to end
502 nfat_archs
= OSSwapBigToHostInt32(FH
->nfat_arch
);
503 offsetInFatFile
= OSSwapBigToHostInt32(FA
->offset
) + OSSwapBigToHostInt32(FA
->size
);
506 // First see if this slice already exists.
507 for(std
::vector
<seg_info
>::const_iterator it
=segments
.begin(); it
!= segments
.end(); ++it
) {
508 if(strcmp(it
->segName
, "__TEXT") == 0 ) {
509 const macho_header
<P
> *textMH
= reinterpret_cast<macho_header
<P
>*>((uint8_t*)mapped_cache
+it
->offset
);
511 // if this cputype/subtype already exist in fat header, then return immediately
512 for(int32_t i
=0; i
< nfat_archs
; ++i
) {
513 fat_arch
*afa
= reinterpret_cast<fat_arch
*>(base_ptr
+8)+i
;
514 if (afa
->cputype
== (cpu_type_t
)OSSwapHostToBigInt32(textMH
->cputype()) && afa
->cpusubtype
== (cpu_type_t
)OSSwapHostToBigInt32(textMH
->cpusubtype())) {
515 //fprintf(stderr, "arch already exists in fat dylib\n");
516 return offsetInFatFile
;
522 if (dylib_data
.empty()) {
523 // Reserve space for the fat header.
524 dylib_data
.resize(4096);
525 base_ptr
= &dylib_data
.front();
526 FH
->magic
= OSSwapHostToBigInt32(FAT_MAGIC
);
529 FH
->nfat_arch
= OSSwapHostToBigInt32(++nfat_archs
);
531 FA
->cputype
= 0; // filled in later
532 FA
->cpusubtype
= 0; // filled in later
533 FA
->offset
= OSSwapHostToBigInt32(offsetInFatFile
);
534 FA
->size
= 0; // filled in later
535 FA
->align
= OSSwapHostToBigInt32(12);
537 size_t additionalSize
= 0;
538 for(std
::vector
<seg_info
>::const_iterator it
=segments
.begin(); it
!= segments
.end(); ++it
) {
539 if ( strcmp(it
->segName
, "__LINKEDIT") != 0 )
540 additionalSize
+= it
->sizem
;
543 std
::vector
<uint8_t> new_dylib_data
;
544 new_dylib_data
.reserve(additionalSize
);
546 // Write regular segments into the buffer
547 uint64_t textOffsetInCache
= 0;
548 for( std
::vector
<seg_info
>::const_iterator it
=segments
.begin(); it
!= segments
.end(); ++it
) {
550 if(strcmp(it
->segName
, "__TEXT") == 0 ) {
551 textOffsetInCache
= it
->offset
;
552 const macho_header
<P
> *textMH
= reinterpret_cast<macho_header
<P
>*>((uint8_t*)mapped_cache
+textOffsetInCache
);
553 FA
->cputype
= OSSwapHostToBigInt32(textMH
->cputype());
554 FA
->cpusubtype
= OSSwapHostToBigInt32(textMH
->cpusubtype());
557 //printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem);
558 // Copy all but the __LINKEDIT. It will be copied later during the optimizer in to a temporary buffer but it would
559 // not be efficient to copy it all now for each dylib.
560 if (strcmp(it
->segName
, "__LINKEDIT") == 0 )
562 std
::copy(((uint8_t*)mapped_cache
)+it
->offset
, ((uint8_t*)mapped_cache
)+it
->offset
+it
->sizem
, std
::back_inserter(new_dylib_data
));
566 std
::vector
<uint8_t> new_linkedit_data
;
567 new_linkedit_data
.reserve(1 << 20);
569 LinkeditOptimizer
<A
> linkeditOptimizer
;
570 macho_header
<P
>* mh
= (macho_header
<P
>*)&new_dylib_data
.front();
571 linkeditOptimizer
.optimize_loadcommands(mh
);
572 linkeditOptimizer
.optimize_linkedit(new_linkedit_data
, textOffsetInCache
, mapped_cache
);
574 new_dylib_data
.insert(new_dylib_data
.end(), new_linkedit_data
.begin(), new_linkedit_data
.end());
577 while (new_dylib_data
.size() % 4096)
578 new_dylib_data
.push_back(0);
580 // update fat header with new file size
581 FA
->size
= OSSwapHostToBigInt32(new_dylib_data
.size());
584 dylib_data
.insert(dylib_data
.end(), new_dylib_data
.begin(), new_dylib_data
.end());
585 return offsetInFatFile
;
588 typedef __typeof(dylib_maker
<x86
>) dylib_maker_func
;
589 typedef void (^progress_block
)(unsigned current
, unsigned total
);
591 class SharedCacheExtractor
;
592 struct SharedCacheDylibExtractor
{
593 SharedCacheDylibExtractor(const char* name
, std
::vector
<seg_info
> segInfo
)
594 : name(name
), segInfo(segInfo
) { }
596 void extractCache(SharedCacheExtractor
& context
);
599 const std
::vector
<seg_info
> segInfo
;
603 struct SharedCacheExtractor
{
604 SharedCacheExtractor(const NameToSegments
& map
,
605 const char* extraction_root_path
,
606 dylib_maker_func
* dylib_create_func
,
608 progress_block progress
)
609 : map(map
), extraction_root_path(extraction_root_path
),
610 dylib_create_func(dylib_create_func
), mapped_cache(mapped_cache
),
613 extractors
.reserve(map
.size());
614 for (const std
::pair
<const char*, std
::vector
<seg_info
>>& it
: map
)
615 extractors
.emplace_back(it
.first
, it
.second
);
617 // Limit the number of open files. 16 seems to give better performance than higher numbers.
618 sema
= dispatch_semaphore_create(16);
622 static void extractCache(void *ctx
, size_t i
);
624 const NameToSegments
& map
;
625 std
::vector
<SharedCacheDylibExtractor
> extractors
;
626 dispatch_semaphore_t sema
;
627 const char* extraction_root_path
;
628 dylib_maker_func
* dylib_create_func
;
630 progress_block progress
;
631 std
::atomic_int count
= { 0 };
634 int SharedCacheExtractor
::extractCaches() {
635 dispatch_queue_t process_queue
= dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW
, 0);
636 dispatch_apply_f(map
.size(), process_queue
,
640 for (const SharedCacheDylibExtractor
& extractor
: extractors
) {
641 if (extractor
.result
!= 0) {
642 result
= extractor
.result
;
649 void SharedCacheExtractor
::extractCache(void *ctx
, size_t i
) {
650 SharedCacheExtractor
& context
= *(SharedCacheExtractor
*)ctx
;
651 dispatch_semaphore_wait(context
.sema
, DISPATCH_TIME_FOREVER
);
652 context
.extractors
[i
].extractCache(context
);
653 dispatch_semaphore_signal(context
.sema
);
656 void SharedCacheDylibExtractor
::extractCache(SharedCacheExtractor
&context
) {
658 char dylib_path
[PATH_MAX
];
659 strcpy(dylib_path
, context
.extraction_root_path
);
660 strcat(dylib_path
, "/");
661 strcat(dylib_path
, name
);
663 //printf("%s with %lu segments\n", dylib_path, it->second.size());
664 // make sure all directories in this path exist
665 make_dirs(dylib_path
);
667 // open file, create if does not already exist
668 int fd
= ::open(dylib_path
, O_CREAT
| O_EXLOCK
| O_RDWR
, 0644);
670 fprintf(stderr
, "can't open or create dylib file %s, errnor=%d\n", dylib_path
, errno
);
676 if (fstat(fd
, &statbuf
)) {
677 fprintf(stderr
, "Error: stat failed for dyld file %s, errnor=%d\n", dylib_path
, errno
);
683 std
::vector
<uint8_t> vec((size_t)statbuf
.st_size
);
684 if(pread(fd
, &vec
.front(), vec
.size(), 0) != (long)vec
.size()) {
685 fprintf(stderr
, "can't read dylib file %s, errnor=%d\n", dylib_path
, errno
);
691 const size_t offset
= context
.dylib_create_func(context
.mapped_cache
, vec
, segInfo
);
692 context
.progress(context
.count
++, (unsigned)context
.map
.size());
694 if(offset
!= vec
.size()) {
695 //Write out the first page, and everything after offset
696 if( pwrite(fd
, &vec
.front(), 4096, 0) == -1
697 || pwrite(fd
, &vec
.front() + offset
, vec
.size() - offset
, offset
) == -1) {
698 fprintf(stderr
, "error writing, errnor=%d\n", errno
);
706 static int sharedCacheIsValid(const void* mapped_cache
, uint64_t size
) {
707 // First check that the size is good.
708 // Note the shared cache may not have a codeSignatureSize value set so we need to first make
709 // sure we have space for the CS_SuperBlob, then later crack that to check for the size of the rest.
710 const DyldSharedCache
* dyldSharedCache
= (DyldSharedCache
*)mapped_cache
;
711 uint64_t requiredSizeForCSSuperBlob
= dyldSharedCache
->header
.codeSignatureOffset
+ sizeof(CS_SuperBlob
);
712 const dyld_cache_mapping_info
* mappings
= (dyld_cache_mapping_info
*)((uint8_t*)mapped_cache
+ dyldSharedCache
->header
.mappingOffset
);
713 if ( requiredSizeForCSSuperBlob
> size
) {
714 fprintf(stderr
, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size
, requiredSizeForCSSuperBlob
);
718 // Now see if the code signatures are valid as that tells us the pages aren't corrupt.
719 // First find all of the regions of the shared cache we computed cd hashes
720 std
::vector
<std
::pair
<uint64_t, uint64_t>> sharedCacheRegions
;
721 sharedCacheRegions
.emplace_back(std
::make_pair(mappings
[0].fileOffset
, mappings
[0].fileOffset
+ mappings
[0].size
));
722 sharedCacheRegions
.emplace_back(std
::make_pair(mappings
[1].fileOffset
, mappings
[1].fileOffset
+ mappings
[1].size
));
723 sharedCacheRegions
.emplace_back(std
::make_pair(mappings
[2].fileOffset
, mappings
[2].fileOffset
+ mappings
[2].size
));
724 if (dyldSharedCache
->header
.localSymbolsSize
)
725 sharedCacheRegions
.emplace_back(std
::make_pair(dyldSharedCache
->header
.localSymbolsOffset
, dyldSharedCache
->header
.localSymbolsOffset
+ dyldSharedCache
->header
.localSymbolsSize
));
726 size_t inBbufferSize
= 0;
727 for (auto& sharedCacheRegion
: sharedCacheRegions
)
728 inBbufferSize
+= (sharedCacheRegion
.second
- sharedCacheRegion
.first
);
729 uint32_t slotCountFromRegions
= (uint32_t)((inBbufferSize
+ CS_PAGE_SIZE
- 1) / CS_PAGE_SIZE
);
731 // Now take the cd hash from the cache itself and validate the regions we found.
732 uint8_t* codeSignatureRegion
= (uint8_t*)mapped_cache
+ dyldSharedCache
->header
.codeSignatureOffset
;
733 CS_SuperBlob
* sb
= reinterpret_cast<CS_SuperBlob
*>(codeSignatureRegion
);
734 if (sb
->magic
!= htonl(CSMAGIC_EMBEDDED_SIGNATURE
)) {
735 fprintf(stderr
, "Error: dyld shared cache code signature magic is incorrect.\n");
739 size_t sbSize
= ntohl(sb
->length
);
740 uint64_t requiredSizeForCS
= dyldSharedCache
->header
.codeSignatureOffset
+ sbSize
;
741 if ( requiredSizeForCS
> size
) {
742 fprintf(stderr
, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size
, requiredSizeForCS
);
746 // Find the offset to the code directory.
747 CS_CodeDirectory
* cd
= nullptr;
748 for (unsigned i
=0; i
!= sb
->count
; ++i
) {
749 if (ntohl(sb
->index
[i
].type
) == CSSLOT_CODEDIRECTORY
) {
750 cd
= (CS_CodeDirectory
*)(codeSignatureRegion
+ ntohl(sb
->index
[i
].offset
));
756 fprintf(stderr
, "Error: dyld shared cache code signature directory is missing.\n");
760 if ( (uint8_t*)cd
> (codeSignatureRegion
+ sbSize
) ) {
761 fprintf(stderr
, "Error: dyld shared cache code signature directory is out of bounds.\n");
765 if ( cd
->magic
!= htonl(CSMAGIC_CODEDIRECTORY
) ) {
766 fprintf(stderr
, "Error: dyld shared cache code signature directory magic is incorrect.\n");
770 if ( ntohl(cd
->nCodeSlots
) < slotCountFromRegions
) {
771 fprintf(stderr
, "Error: dyld shared cache code signature directory num slots is incorrect.\n");
775 uint32_t dscDigestFormat
= kCCDigestNone
;
776 switch (cd
->hashType
) {
777 case CS_HASHTYPE_SHA1
:
778 dscDigestFormat
= kCCDigestSHA1
;
780 case CS_HASHTYPE_SHA256
:
781 dscDigestFormat
= kCCDigestSHA256
;
787 if (dscDigestFormat
!= kCCDigestNone
) {
788 const uint64_t csPageSize
= 1 << cd
->pageSize
;
789 size_t hashOffset
= ntohl(cd
->hashOffset
);
790 uint8_t* hashSlot
= (uint8_t*)cd
+ hashOffset
;
791 uint8_t cdHashBuffer
[cd
->hashSize
];
793 // Skip local symbols for now as those aren't being codesign correctly right now.
794 size_t inBbufferSize
= 0;
795 for (auto& sharedCacheRegion
: sharedCacheRegions
) {
796 if (sharedCacheRegion
.first
== dyldSharedCache
->header
.localSymbolsOffset
)
798 inBbufferSize
+= (sharedCacheRegion
.second
- sharedCacheRegion
.first
);
800 uint32_t slotCountToProcess
= (uint32_t)((inBbufferSize
+ CS_PAGE_SIZE
- 1) / CS_PAGE_SIZE
);
802 for (unsigned i
= 0; i
!= slotCountToProcess
; ++i
) {
803 // Skip data pages as those may have been slid by ASLR in the extracted file
804 uint64_t fileOffset
= i
* csPageSize
;
805 if ( (fileOffset
>= mappings
[1].fileOffset
) && (fileOffset
< (mappings
[1].fileOffset
+ mappings
[1].size
)) )
808 CCDigest(dscDigestFormat
, (uint8_t*)mapped_cache
+ fileOffset
, csPageSize
, cdHashBuffer
);
809 uint8_t* cacheCdHashBuffer
= hashSlot
+ (i
* cd
->hashSize
);
810 if (memcmp(cdHashBuffer
, cacheCdHashBuffer
, cd
->hashSize
) != 0) {
811 fprintf(stderr
, "Error: dyld shared cache code signature for page %d is incorrect.\n", i
);
819 int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path
, const char* extraction_root_path
,
820 progress_block progress
)
823 if (stat(shared_cache_file_path
, &statbuf
)) {
824 fprintf(stderr
, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path
);
828 int cache_fd
= open(shared_cache_file_path
, O_RDONLY
);
830 fprintf(stderr
, "Error: failed to open shared cache file at %s\n", shared_cache_file_path
);
834 void* mapped_cache
= mmap(NULL
, (size_t)statbuf
.st_size
, PROT_READ
, MAP_PRIVATE
, cache_fd
, 0);
835 if (mapped_cache
== MAP_FAILED
) {
836 fprintf(stderr
, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path
, errno
);
842 // instantiate arch specific dylib maker
843 dylib_maker_func
* dylib_create_func
= nullptr;
844 if ( strcmp((char*)mapped_cache
, "dyld_v1 i386") == 0 )
845 dylib_create_func
= dylib_maker
<x86
>;
846 else if ( strcmp((char*)mapped_cache
, "dyld_v1 x86_64") == 0 )
847 dylib_create_func
= dylib_maker
<x86_64
>;
848 else if ( strcmp((char*)mapped_cache
, "dyld_v1 x86_64h") == 0 )
849 dylib_create_func
= dylib_maker
<x86_64
>;
850 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv5") == 0 )
851 dylib_create_func
= dylib_maker
<arm
>;
852 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv6") == 0 )
853 dylib_create_func
= dylib_maker
<arm
>;
854 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv7") == 0 )
855 dylib_create_func
= dylib_maker
<arm
>;
856 else if ( strncmp((char*)mapped_cache
, "dyld_v1 armv7", 14) == 0 )
857 dylib_create_func
= dylib_maker
<arm
>;
858 else if ( strcmp((char*)mapped_cache
, "dyld_v1 arm64") == 0 )
859 dylib_create_func
= dylib_maker
<arm64
>;
860 #if SUPPORT_ARCH_arm64e
861 else if ( strcmp((char*)mapped_cache
, "dyld_v1 arm64e") == 0 )
862 dylib_create_func
= dylib_maker
<arm64
>;
864 #if SUPPORT_ARCH_arm64_32
865 else if ( strcmp((char*)mapped_cache
, "dyld_v1arm64_32") == 0 )
866 dylib_create_func
= dylib_maker
<arm64_32
>;
869 fprintf(stderr
, "Error: unrecognized dyld shared cache magic.\n");
870 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
874 // Verify that the cache isn't corrupt.
875 if (int result
= sharedCacheIsValid(mapped_cache
, (uint64_t)statbuf
.st_size
)) {
876 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
880 // iterate through all images in cache and build map of dylibs and segments
881 __block NameToSegments map
;
884 result
= dyld_shared_cache_iterate(mapped_cache
, (uint32_t)statbuf
.st_size
, ^(const dyld_shared_cache_dylib_info
* dylibInfo
, const dyld_shared_cache_segment_info
* segInfo
) {
885 map
[dylibInfo
->path
].push_back(seg_info(segInfo
->name
, segInfo
->fileOffset
, segInfo
->fileSize
));
889 fprintf(stderr
, "Error: dyld_shared_cache_iterate_segments_with_slide failed.\n");
890 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
894 // for each dylib instantiate a dylib file
895 SharedCacheExtractor
extractor(map
, extraction_root_path
, dylib_create_func
, mapped_cache
, progress
);
896 result
= extractor
.extractCaches();
898 munmap(mapped_cache
, (size_t)statbuf
.st_size
);
904 int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path
, const char* extraction_root_path
)
906 return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path
, extraction_root_path
,
907 ^(unsigned , unsigned) {} );
918 typedef int (*extractor_proc
)(const char* shared_cache_file_path
, const char* extraction_root_path
,
919 void (^progress
)(unsigned current
, unsigned total
));
921 int main(int argc
, const char* argv
[])
924 fprintf(stderr
, "usage: dsc_extractor <path-to-cache-file> <path-to-device-dir>\n");
928 //void* handle = dlopen("/Volumes/my/src/dyld/build/Debug/dsc_extractor.bundle", RTLD_LAZY);
929 void* handle
= dlopen("/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/usr/lib/dsc_extractor.bundle", RTLD_LAZY
);
930 if ( handle
== NULL
) {
931 fprintf(stderr
, "dsc_extractor.bundle could not be loaded\n");
935 extractor_proc proc
= (extractor_proc
)dlsym(handle
, "dyld_shared_cache_extract_dylibs_progress");
936 if ( proc
== NULL
) {
937 fprintf(stderr
, "dsc_extractor.bundle did not have dyld_shared_cache_extract_dylibs_progress symbol\n");
941 int result
= (*proc
)(argv
[1], argv
[2], ^(unsigned c
, unsigned total
) { printf("%d/%d\n", c
, total
); } );
942 fprintf(stderr
, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result
);