]> git.saurik.com Git - apple/dyld.git/blob - launch-cache/dsc_extractor.cpp
dyld-551.3.tar.gz
[apple/dyld.git] / launch-cache / dsc_extractor.cpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <sys/stat.h>
29 #include <string.h>
30 #include <fcntl.h>
31 #include <stdlib.h>
32 #include <errno.h>
33 #include <sys/mman.h>
34 #include <sys/syslimits.h>
35 #include <libkern/OSByteOrder.h>
36 #include <mach-o/fat.h>
37 #include <mach-o/arch.h>
38 #include <mach-o/loader.h>
39 #include <Availability.h>
40
41 #define NO_ULEB
42 #include "Architectures.hpp"
43 #include "MachOFileAbstraction.hpp"
44 #include "CacheFileAbstraction.hpp"
45
46 #include "dsc_iterator.h"
47 #include "dsc_extractor.h"
48 #include "MachOTrie.hpp"
49
50 #include <vector>
51 #include <set>
52 #include <map>
53 #include <unordered_map>
54 #include <algorithm>
55 #include <dispatch/dispatch.h>
56
57 struct seg_info
58 {
59 seg_info(const char* n, uint64_t o, uint64_t s)
60 : segName(n), offset(o), sizem(s) { }
61 const char* segName;
62 uint64_t offset;
63 uint64_t sizem;
64 };
65
66 class CStringHash {
67 public:
68 size_t operator()(const char* __s) const {
69 size_t __h = 0;
70 for ( ; *__s; ++__s)
71 __h = 5 * __h + *__s;
72 return __h;
73 };
74 };
75 class CStringEquals {
76 public:
77 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
78 };
79 typedef std::unordered_map<const char*, std::vector<seg_info>, CStringHash, CStringEquals> NameToSegments;
80
81 // Filter to find individual symbol re-exports in trie
82 class NotReExportSymbol {
83 public:
84 NotReExportSymbol(const std::set<int> &rd) :_reexportDeps(rd) {}
85 bool operator()(const mach_o::trie::Entry &entry) const {
86 bool result = isSymbolReExport(entry);
87 if (result) {
88 // <rdar://problem/17671438> Xcode 6 leaks in dyld_shared_cache_extract_dylibs
89 ::free((void*)entry.name);
90 const_cast<mach_o::trie::Entry*>(&entry)->name = NULL;
91 }
92 return result;
93 }
94 private:
95 bool isSymbolReExport(const mach_o::trie::Entry &entry) const {
96 if ( (entry.flags & EXPORT_SYMBOL_FLAGS_KIND_MASK) != EXPORT_SYMBOL_FLAGS_KIND_REGULAR )
97 return true;
98 if ( (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) == 0 )
99 return true;
100 // If the symbol comes from a dylib that is re-exported, this is not an individual symbol re-export
101 if ( _reexportDeps.count((int)entry.other) != 0 )
102 return true;
103 return false;
104 }
105 const std::set<int> &_reexportDeps;
106 };
107
108
109 template <typename A>
110 int optimize_linkedit(macho_header<typename A::P>* mh, uint64_t textOffsetInCache, const void* mapped_cache, uint64_t* newSize)
111 {
112 typedef typename A::P P;
113 typedef typename A::P::E E;
114 typedef typename A::P::uint_t pint_t;
115
116 // update header flags
117 mh->set_flags(mh->flags() & 0x7FFFFFFF); // remove in-cache bit
118
119 // update load commands
120 uint64_t cumulativeFileSize = 0;
121 const unsigned origLoadCommandsSize = mh->sizeofcmds();
122 unsigned bytesRemaining = origLoadCommandsSize;
123 unsigned removedCount = 0;
124 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
125 const uint32_t cmdCount = mh->ncmds();
126 const macho_load_command<P>* cmd = cmds;
127 macho_segment_command<P>* linkEditSegCmd = NULL;
128 macho_symtab_command<P>* symtab = NULL;
129 macho_dysymtab_command<P>* dynamicSymTab = NULL;
130 macho_linkedit_data_command<P>* functionStarts = NULL;
131 macho_linkedit_data_command<P>* dataInCode = NULL;
132 uint32_t exportsTrieOffset = 0;
133 uint32_t exportsTrieSize = 0;
134 std::set<int> reexportDeps;
135 int depIndex = 0;
136 for (uint32_t i = 0; i < cmdCount; ++i) {
137 bool remove = false;
138 switch ( cmd->cmd() ) {
139 case macho_segment_command<P>::CMD:
140 {
141 // update segment/section file offsets
142 macho_segment_command<P>* segCmd = (macho_segment_command<P>*)cmd;
143 segCmd->set_fileoff(cumulativeFileSize);
144 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
145 macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
146 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
147 if ( sect->offset() != 0 )
148 sect->set_offset((uint32_t)(cumulativeFileSize+sect->addr()-segCmd->vmaddr()));
149 }
150 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
151 linkEditSegCmd = segCmd;
152 }
153 cumulativeFileSize += segCmd->filesize();
154 }
155 break;
156 case LC_DYLD_INFO_ONLY:
157 {
158 // zero out all dyld info
159 macho_dyld_info_command<P>* dyldInfo = (macho_dyld_info_command<P>*)cmd;
160 exportsTrieOffset = dyldInfo->export_off();
161 exportsTrieSize = dyldInfo->export_size();
162 dyldInfo->set_rebase_off(0);
163 dyldInfo->set_rebase_size(0);
164 dyldInfo->set_bind_off(0);
165 dyldInfo->set_bind_size(0);
166 dyldInfo->set_weak_bind_off(0);
167 dyldInfo->set_weak_bind_size(0);
168 dyldInfo->set_lazy_bind_off(0);
169 dyldInfo->set_lazy_bind_size(0);
170 dyldInfo->set_export_off(0);
171 dyldInfo->set_export_size(0);
172 }
173 break;
174 case LC_SYMTAB:
175 symtab = (macho_symtab_command<P>*)cmd;
176 break;
177 case LC_DYSYMTAB:
178 dynamicSymTab = (macho_dysymtab_command<P>*)cmd;
179 break;
180 case LC_FUNCTION_STARTS:
181 functionStarts = (macho_linkedit_data_command<P>*)cmd;
182 break;
183 case LC_DATA_IN_CODE:
184 dataInCode = (macho_linkedit_data_command<P>*)cmd;
185 break;
186 case LC_LOAD_DYLIB:
187 case LC_LOAD_WEAK_DYLIB:
188 case LC_REEXPORT_DYLIB:
189 case LC_LOAD_UPWARD_DYLIB:
190 ++depIndex;
191 if ( cmd->cmd() == LC_REEXPORT_DYLIB ) {
192 reexportDeps.insert(depIndex);
193 }
194 break;
195 case LC_SEGMENT_SPLIT_INFO:
196 // <rdar://problem/23212513> dylibs iOS 9 dyld caches have bogus LC_SEGMENT_SPLIT_INFO
197 remove = true;
198 break;
199 }
200 uint32_t cmdSize = cmd->cmdsize();
201 macho_load_command<P>* nextCmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmdSize);
202 if ( remove ) {
203 ::memmove((void*)cmd, (void*)nextCmd, bytesRemaining);
204 ++removedCount;
205 }
206 else {
207 bytesRemaining -= cmdSize;
208 cmd = nextCmd;
209 }
210 }
211 // zero out stuff removed
212 ::bzero((void*)cmd, bytesRemaining);
213 // update header
214 mh->set_ncmds(cmdCount - removedCount);
215 mh->set_sizeofcmds(origLoadCommandsSize - bytesRemaining);
216
217 // rebuild symbol table
218 if ( linkEditSegCmd == NULL ) {
219 fprintf(stderr, "__LINKEDIT not found\n");
220 return -1;
221 }
222 if ( symtab == NULL ) {
223 fprintf(stderr, "LC_SYMTAB not found\n");
224 return -1;
225 }
226 if ( dynamicSymTab == NULL ) {
227 fprintf(stderr, "LC_DYSYMTAB not found\n");
228 return -1;
229 }
230
231 const uint64_t newFunctionStartsOffset = linkEditSegCmd->fileoff();
232 uint32_t functionStartsSize = 0;
233 if ( functionStarts != NULL ) {
234 // copy function starts from original cache file to new mapped dylib file
235 functionStartsSize = functionStarts->datasize();
236 memcpy((char*)mh + newFunctionStartsOffset, (char*)mapped_cache + functionStarts->dataoff(), functionStartsSize);
237 }
238 const uint64_t newDataInCodeOffset = (newFunctionStartsOffset + functionStartsSize + sizeof(pint_t) - 1) & (-sizeof(pint_t)); // pointer align
239 uint32_t dataInCodeSize = 0;
240 if ( dataInCode != NULL ) {
241 // copy data-in-code info from original cache file to new mapped dylib file
242 dataInCodeSize = dataInCode->datasize();
243 memcpy((char*)mh + newDataInCodeOffset, (char*)mapped_cache + dataInCode->dataoff(), dataInCodeSize);
244 }
245
246 std::vector<mach_o::trie::Entry> exports;
247 if ( exportsTrieSize != 0 ) {
248 const uint8_t* exportsStart = ((uint8_t*)mapped_cache) + exportsTrieOffset;
249 const uint8_t* exportsEnd = &exportsStart[exportsTrieSize];
250 mach_o::trie::parseTrie(exportsStart, exportsEnd, exports);
251 exports.erase(std::remove_if(exports.begin(), exports.end(), NotReExportSymbol(reexportDeps)), exports.end());
252 }
253
254 // look for local symbol info in unmapped part of shared cache
255 dyldCacheHeader<E>* header = (dyldCacheHeader<E>*)mapped_cache;
256 macho_nlist<P>* localNlists = NULL;
257 uint32_t localNlistCount = 0;
258 const char* localStrings = NULL;
259 const char* localStringsEnd = NULL;
260 if ( header->mappingOffset() > offsetof(dyld_cache_header,localSymbolsSize) ) {
261 dyldCacheLocalSymbolsInfo<E>* localInfo = (dyldCacheLocalSymbolsInfo<E>*)(((uint8_t*)mapped_cache) + header->localSymbolsOffset());
262 dyldCacheLocalSymbolEntry<E>* entries = (dyldCacheLocalSymbolEntry<E>*)(((uint8_t*)mapped_cache) + header->localSymbolsOffset() + localInfo->entriesOffset());
263 macho_nlist<P>* allLocalNlists = (macho_nlist<P>*)(((uint8_t*)localInfo) + localInfo->nlistOffset());
264 const uint32_t entriesCount = localInfo->entriesCount();
265 for (uint32_t i=0; i < entriesCount; ++i) {
266 if ( entries[i].dylibOffset() == textOffsetInCache ) {
267 uint32_t localNlistStart = entries[i].nlistStartIndex();
268 localNlistCount = entries[i].nlistCount();
269 localNlists = &allLocalNlists[localNlistStart];
270 localStrings = ((char*)localInfo) + localInfo->stringsOffset();
271 localStringsEnd = &localStrings[localInfo->stringsSize()];
272 break;
273 }
274 }
275 }
276 // compute number of symbols in new symbol table
277 const macho_nlist<P>* const mergedSymTabStart = (macho_nlist<P>*)(((uint8_t*)mapped_cache) + symtab->symoff());
278 const macho_nlist<P>* const mergedSymTabend = &mergedSymTabStart[symtab->nsyms()];
279 uint32_t newSymCount = symtab->nsyms();
280 if ( localNlists != NULL ) {
281 newSymCount = localNlistCount;
282 for (const macho_nlist<P>* s = mergedSymTabStart; s != mergedSymTabend; ++s) {
283 // skip any locals in cache
284 if ( (s->n_type() & (N_TYPE|N_EXT)) == N_SECT )
285 continue;
286 ++newSymCount;
287 }
288 }
289
290 // add room for N_INDR symbols for re-exported symbols
291 newSymCount += exports.size();
292
293 // copy symbol entries and strings from original cache file to new mapped dylib file
294 const uint64_t newSymTabOffset = (newDataInCodeOffset + dataInCodeSize + sizeof(pint_t) - 1) & (-sizeof(pint_t)); // pointer align
295 const uint64_t newIndSymTabOffset = newSymTabOffset + newSymCount*sizeof(macho_nlist<P>);
296 const uint64_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
297 macho_nlist<P>* const newSymTabStart = (macho_nlist<P>*)(((uint8_t*)mh) + newSymTabOffset);
298 char* const newStringPoolStart = (char*)mh + newStringPoolOffset;
299 const uint32_t* mergedIndSymTab = (uint32_t*)((char*)mapped_cache + dynamicSymTab->indirectsymoff());
300 const char* mergedStringPoolStart = (char*)mapped_cache + symtab->stroff();
301 const char* mergedStringPoolEnd = &mergedStringPoolStart[symtab->strsize()];
302 macho_nlist<P>* t = newSymTabStart;
303 int poolOffset = 0;
304 uint32_t symbolsCopied = 0;
305 newStringPoolStart[poolOffset++] = '\0'; // first pool entry is always empty string
306 for (const macho_nlist<P>* s = mergedSymTabStart; s != mergedSymTabend; ++s) {
307 // if we have better local symbol info, skip any locals here
308 if ( (localNlists != NULL) && ((s->n_type() & (N_TYPE|N_EXT)) == N_SECT) )
309 continue;
310 *t = *s;
311 t->set_n_strx(poolOffset);
312 const char* symName = &mergedStringPoolStart[s->n_strx()];
313 if ( symName > mergedStringPoolEnd )
314 symName = "<corrupt symbol name>";
315 strcpy(&newStringPoolStart[poolOffset], symName);
316 poolOffset += (strlen(symName) + 1);
317 ++t;
318 ++symbolsCopied;
319 }
320 // <rdar://problem/16529213> recreate N_INDR symbols in extracted dylibs for debugger
321 for (std::vector<mach_o::trie::Entry>::iterator it = exports.begin(); it != exports.end(); ++it) {
322 strcpy(&newStringPoolStart[poolOffset], it->name);
323 t->set_n_strx(poolOffset);
324 poolOffset += (strlen(it->name) + 1);
325 t->set_n_type(N_INDR | N_EXT);
326 t->set_n_sect(0);
327 t->set_n_desc(0);
328 const char* importName = it->importName;
329 if ( *importName == '\0' )
330 importName = it->name;
331 strcpy(&newStringPoolStart[poolOffset], importName);
332 t->set_n_value(poolOffset);
333 poolOffset += (strlen(importName) + 1);
334 ++t;
335 ++symbolsCopied;
336 }
337 if ( localNlists != NULL ) {
338 // update load command to reflect new count of locals
339 dynamicSymTab->set_ilocalsym(symbolsCopied);
340 dynamicSymTab->set_nlocalsym(localNlistCount);
341 // copy local symbols
342 for (uint32_t i=0; i < localNlistCount; ++i) {
343 const char* localName = &localStrings[localNlists[i].n_strx()];
344 if ( localName > localStringsEnd )
345 localName = "<corrupt local symbol name>";
346 *t = localNlists[i];
347 t->set_n_strx(poolOffset);
348 strcpy(&newStringPoolStart[poolOffset], localName);
349 poolOffset += (strlen(localName) + 1);
350 ++t;
351 ++symbolsCopied;
352 }
353 }
354
355 if ( newSymCount != symbolsCopied ) {
356 fprintf(stderr, "symbol count miscalculation\n");
357 return -1;
358 }
359
360 // pointer align string pool size
361 while ( (poolOffset % sizeof(pint_t)) != 0 )
362 ++poolOffset;
363 // copy indirect symbol table
364 uint32_t* newIndSymTab = (uint32_t*)((char*)mh + newIndSymTabOffset);
365 memcpy(newIndSymTab, mergedIndSymTab, dynamicSymTab->nindirectsyms()*sizeof(uint32_t));
366
367 // update load commands
368 if ( functionStarts != NULL ) {
369 functionStarts->set_dataoff((uint32_t)newFunctionStartsOffset);
370 functionStarts->set_datasize(functionStartsSize);
371 }
372 if ( dataInCode != NULL ) {
373 dataInCode->set_dataoff((uint32_t)newDataInCodeOffset);
374 dataInCode->set_datasize(dataInCodeSize);
375 }
376 symtab->set_nsyms(symbolsCopied);
377 symtab->set_symoff((uint32_t)newSymTabOffset);
378 symtab->set_stroff((uint32_t)newStringPoolOffset);
379 symtab->set_strsize(poolOffset);
380 dynamicSymTab->set_extreloff(0);
381 dynamicSymTab->set_nextrel(0);
382 dynamicSymTab->set_locreloff(0);
383 dynamicSymTab->set_nlocrel(0);
384 dynamicSymTab->set_indirectsymoff((uint32_t)newIndSymTabOffset);
385 linkEditSegCmd->set_filesize(symtab->stroff()+symtab->strsize() - linkEditSegCmd->fileoff());
386 linkEditSegCmd->set_vmsize( (linkEditSegCmd->filesize()+4095) & (-4096) );
387
388 // return new size
389 *newSize = (symtab->stroff()+symtab->strsize()+4095) & (-4096);
390
391 // <rdar://problem/17671438> Xcode 6 leaks in dyld_shared_cache_extract_dylibs
392 for (std::vector<mach_o::trie::Entry>::iterator it = exports.begin(); it != exports.end(); ++it) {
393 ::free((void*)(it->name));
394 }
395
396
397 return 0;
398 }
399
400
401
402 static void make_dirs(const char* file_path)
403 {
404 //printf("make_dirs(%s)\n", file_path);
405 char dirs[strlen(file_path)+1];
406 strcpy(dirs, file_path);
407 char* lastSlash = strrchr(dirs, '/');
408 if ( lastSlash == NULL )
409 return;
410 lastSlash[1] = '\0';
411 struct stat stat_buf;
412 if ( stat(dirs, &stat_buf) != 0 ) {
413 char* afterSlash = &dirs[1];
414 char* slash;
415 while ( (slash = strchr(afterSlash, '/')) != NULL ) {
416 *slash = '\0';
417 ::mkdir(dirs, S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH);
418 //printf("mkdir(%s)\n", dirs);
419 *slash = '/';
420 afterSlash = slash+1;
421 }
422 }
423 }
424
425
426
427 template <typename A>
428 size_t dylib_maker(const void* mapped_cache, std::vector<uint8_t> &dylib_data, const std::vector<seg_info>& segments) {
429 typedef typename A::P P;
430
431 size_t additionalSize = 0;
432 for(std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
433 additionalSize += it->sizem;
434 }
435
436 dylib_data.reserve(dylib_data.size() + additionalSize);
437
438 uint32_t nfat_archs = 0;
439 uint32_t offsetInFatFile = 4096;
440 uint8_t *base_ptr = &dylib_data.front();
441
442 #define FH reinterpret_cast<fat_header*>(base_ptr)
443 #define FA reinterpret_cast<fat_arch*>(base_ptr + (8 + (nfat_archs - 1) * sizeof(fat_arch)))
444
445 if(dylib_data.size() >= 4096 && OSSwapBigToHostInt32(FH->magic) == FAT_MAGIC) {
446 // have fat header, append new arch to end
447 nfat_archs = OSSwapBigToHostInt32(FH->nfat_arch);
448 offsetInFatFile = OSSwapBigToHostInt32(FA->offset) + OSSwapBigToHostInt32(FA->size);
449 }
450
451 dylib_data.resize(offsetInFatFile);
452 base_ptr = &dylib_data.front();
453
454 FH->magic = OSSwapHostToBigInt32(FAT_MAGIC);
455 FH->nfat_arch = OSSwapHostToBigInt32(++nfat_archs);
456
457 FA->cputype = 0; // filled in later
458 FA->cpusubtype = 0; // filled in later
459 FA->offset = OSSwapHostToBigInt32(offsetInFatFile);
460 FA->size = 0; // filled in later
461 FA->align = OSSwapHostToBigInt32(12);
462
463 // Write regular segments into the buffer
464 uint64_t totalSize = 0;
465 uint64_t textOffsetInCache = 0;
466 for( std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
467
468 if(strcmp(it->segName, "__TEXT") == 0 ) {
469 textOffsetInCache = it->offset;
470 const macho_header<P> *textMH = reinterpret_cast<macho_header<P>*>((uint8_t*)mapped_cache+textOffsetInCache);
471 FA->cputype = OSSwapHostToBigInt32(textMH->cputype());
472 FA->cpusubtype = OSSwapHostToBigInt32(textMH->cpusubtype());
473
474 // if this cputype/subtype already exist in fat header, then return immediately
475 for(uint32_t i=0; i < nfat_archs-1; ++i) {
476 fat_arch *afa = reinterpret_cast<fat_arch*>(base_ptr+8)+i;
477
478 if( afa->cputype == FA->cputype
479 && afa->cpusubtype == FA->cpusubtype) {
480 //fprintf(stderr, "arch already exists in fat dylib\n");
481 dylib_data.resize(offsetInFatFile);
482 return offsetInFatFile;
483 }
484 }
485 }
486
487 //printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem);
488 std::copy(((uint8_t*)mapped_cache)+it->offset, ((uint8_t*)mapped_cache)+it->offset+it->sizem, std::back_inserter(dylib_data));
489 base_ptr = &dylib_data.front();
490 totalSize += it->sizem;
491 }
492
493 FA->size = OSSwapHostToBigInt32(totalSize);
494
495 // optimize linkedit
496 uint64_t newSize = dylib_data.size();
497 optimize_linkedit<A>(((macho_header<P>*)(base_ptr+offsetInFatFile)), textOffsetInCache, mapped_cache, &newSize);
498
499 // update fat header with new file size
500 dylib_data.resize((size_t)(offsetInFatFile+newSize));
501 base_ptr = &dylib_data.front();
502 FA->size = OSSwapHostToBigInt32(newSize);
503 #undef FH
504 #undef FA
505 return offsetInFatFile;
506 }
507
508
509 int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path, const char* extraction_root_path,
510 void (^progress)(unsigned current, unsigned total))
511 {
512 struct stat statbuf;
513 if (stat(shared_cache_file_path, &statbuf)) {
514 fprintf(stderr, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path);
515 return -1;
516 }
517
518 int cache_fd = open(shared_cache_file_path, O_RDONLY);
519 if (cache_fd < 0) {
520 fprintf(stderr, "Error: failed to open shared cache file at %s\n", shared_cache_file_path);
521 return -1;
522 }
523
524 void* mapped_cache = mmap(NULL, (size_t)statbuf.st_size, PROT_READ, MAP_PRIVATE, cache_fd, 0);
525 if (mapped_cache == MAP_FAILED) {
526 fprintf(stderr, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path, errno);
527 return -1;
528 }
529
530 close(cache_fd);
531
532 // instantiate arch specific dylib maker
533 size_t (*dylib_create_func)(const void*, std::vector<uint8_t>&, const std::vector<seg_info>&) = NULL;
534 if ( strcmp((char*)mapped_cache, "dyld_v1 i386") == 0 )
535 dylib_create_func = dylib_maker<x86>;
536 else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64") == 0 )
537 dylib_create_func = dylib_maker<x86_64>;
538 else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64h") == 0 )
539 dylib_create_func = dylib_maker<x86_64>;
540 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv5") == 0 )
541 dylib_create_func = dylib_maker<arm>;
542 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv6") == 0 )
543 dylib_create_func = dylib_maker<arm>;
544 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv7") == 0 )
545 dylib_create_func = dylib_maker<arm>;
546 else if ( strncmp((char*)mapped_cache, "dyld_v1 armv7", 14) == 0 )
547 dylib_create_func = dylib_maker<arm>;
548 else if ( strcmp((char*)mapped_cache, "dyld_v1 arm64") == 0 )
549 dylib_create_func = dylib_maker<arm64>;
550 else if ( strcmp((char*)mapped_cache, "dyld_v1 arm64e") == 0 )
551 dylib_create_func = dylib_maker<arm64>;
552 else {
553 fprintf(stderr, "Error: unrecognized dyld shared cache magic.\n");
554 munmap(mapped_cache, (size_t)statbuf.st_size);
555 return -1;
556 }
557
558 // iterate through all images in cache and build map of dylibs and segments
559 __block NameToSegments map;
560 __block int result = dyld_shared_cache_iterate(mapped_cache, (uint32_t)statbuf.st_size, ^(const dyld_shared_cache_dylib_info* dylibInfo, const dyld_shared_cache_segment_info* segInfo) {
561 map[dylibInfo->path].push_back(seg_info(segInfo->name, segInfo->fileOffset, segInfo->fileSize));
562 });
563
564 if(result != 0) {
565 fprintf(stderr, "Error: dyld_shared_cache_iterate_segments_with_slide failed.\n");
566 munmap(mapped_cache, (size_t)statbuf.st_size);
567 return result;
568 }
569
570 // for each dylib instantiate a dylib file
571 dispatch_group_t group = dispatch_group_create();
572 dispatch_semaphore_t sema = dispatch_semaphore_create(2);
573 dispatch_queue_t process_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0);
574 dispatch_queue_t writer_queue = dispatch_queue_create("dyld writer queue", 0);
575
576 __block unsigned count = 0;
577
578 for ( NameToSegments::iterator it = map.begin(); it != map.end(); ++it) {
579 dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
580 dispatch_group_async(group, process_queue, ^{
581
582 char dylib_path[PATH_MAX];
583 strcpy(dylib_path, extraction_root_path);
584 strcat(dylib_path, "/");
585 strcat(dylib_path, it->first);
586
587 //printf("%s with %lu segments\n", dylib_path, it->second.size());
588 // make sure all directories in this path exist
589 make_dirs(dylib_path);
590
591 // open file, create if does not already exist
592 int fd = ::open(dylib_path, O_CREAT | O_EXLOCK | O_RDWR, 0644);
593 if ( fd == -1 ) {
594 fprintf(stderr, "can't open or create dylib file %s, errnor=%d\n", dylib_path, errno);
595 result = -1;
596 return;
597 }
598
599 struct stat statbuf;
600 if (fstat(fd, &statbuf)) {
601 fprintf(stderr, "Error: stat failed for dyld file %s, errnor=%d\n", dylib_path, errno);
602 close(fd);
603 result = -1;
604 return;
605 }
606
607 std::vector<uint8_t> *vec = new std::vector<uint8_t>((size_t)statbuf.st_size);
608 if(pread(fd, &vec->front(), vec->size(), 0) != (long)vec->size()) {
609 fprintf(stderr, "can't read dylib file %s, errnor=%d\n", dylib_path, errno);
610 close(fd);
611 result = -1;
612 return;
613 }
614
615 const size_t offset = dylib_create_func(mapped_cache, *vec, it->second);
616
617 dispatch_group_async(group, writer_queue, ^{
618 progress(count++, (unsigned)map.size());
619
620 if(offset != vec->size()) {
621 //Write out the first page, and everything after offset
622 if( pwrite(fd, &vec->front(), 4096, 0) == -1
623 || pwrite(fd, &vec->front() + offset, vec->size() - offset, offset) == -1) {
624 fprintf(stderr, "error writing, errnor=%d\n", errno);
625 result = -1;
626 }
627 }
628
629 delete vec;
630 close(fd);
631 dispatch_semaphore_signal(sema);
632 });
633 });
634 }
635
636 dispatch_group_wait(group, DISPATCH_TIME_FOREVER);
637 dispatch_release(group);
638 dispatch_release(writer_queue);
639
640 munmap(mapped_cache, (size_t)statbuf.st_size);
641 return result;
642 }
643
644
645
646 int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path, const char* extraction_root_path)
647 {
648 return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path, extraction_root_path,
649 ^(unsigned , unsigned) {} );
650 }
651
652
653 #if 0
654 // test program
655 #include <stdio.h>
656 #include <stddef.h>
657 #include <dlfcn.h>
658
659
660 typedef int (*extractor_proc)(const char* shared_cache_file_path, const char* extraction_root_path,
661 void (^progress)(unsigned current, unsigned total));
662
663 int main(int argc, const char* argv[])
664 {
665 if ( argc != 3 ) {
666 fprintf(stderr, "usage: dsc_extractor <path-to-cache-file> <path-to-device-dir>\n");
667 return 1;
668 }
669
670 //void* handle = dlopen("/Volumes/my/src/dyld/build/Debug/dsc_extractor.bundle", RTLD_LAZY);
671 void* handle = dlopen("/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/usr/lib/dsc_extractor.bundle", RTLD_LAZY);
672 if ( handle == NULL ) {
673 fprintf(stderr, "dsc_extractor.bundle could not be loaded\n");
674 return 1;
675 }
676
677 extractor_proc proc = (extractor_proc)dlsym(handle, "dyld_shared_cache_extract_dylibs_progress");
678 if ( proc == NULL ) {
679 fprintf(stderr, "dsc_extractor.bundle did not have dyld_shared_cache_extract_dylibs_progress symbol\n");
680 return 1;
681 }
682
683 int result = (*proc)(argv[1], argv[2], ^(unsigned c, unsigned total) { printf("%d/%d\n", c, total); } );
684 fprintf(stderr, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result);
685 return 0;
686 }
687
688
689 #endif
690
691
692
693