dyld-210.2.3.tar.gz
[apple/dyld.git] / launch-cache / dsc_extractor.cpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <sys/stat.h>
29 #include <string.h>
30 #include <fcntl.h>
31 #include <stdlib.h>
32 #include <errno.h>
33 #include <sys/mman.h>
34 #include <sys/syslimits.h>
35 #include <libkern/OSByteOrder.h>
36 #include <mach-o/fat.h>
37 #include <mach-o/arch.h>
38 #include <mach-o/loader.h>
39 #include <Availability.h>
40
41 #define NO_ULEB
42 #include "Architectures.hpp"
43 #include "MachOFileAbstraction.hpp"
44
45 #include "dsc_iterator.h"
46 #include "dsc_extractor.h"
47
48 #include <vector>
49 #include <map>
50 #include <ext/hash_map>
51 #include <algorithm>
52 #include <dispatch/dispatch.h>
53
54 struct seg_info
55 {
56 seg_info(const char* n, uint64_t o, uint64_t s)
57 : segName(n), offset(o), sizem(s) { }
58 const char* segName;
59 uint64_t offset;
60 uint64_t sizem;
61 };
62
63 class CStringEquals {
64 public:
65 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
66 };
67 typedef __gnu_cxx::hash_map<const char*, std::vector<seg_info>, __gnu_cxx::hash<const char*>, CStringEquals> NameToSegments;
68
69
70 template <typename A>
71 int optimize_linkedit(macho_header<typename A::P>* mh, const void* mapped_cache, uint64_t* newSize)
72 {
73 typedef typename A::P P;
74 typedef typename A::P::E E;
75 typedef typename A::P::uint_t pint_t;
76
77 // update header flags
78 mh->set_flags(mh->flags() & 0x7FFFFFFF); // remove in-cache bit
79
80 // update load commands
81 uint64_t cumulativeFileSize = 0;
82 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
83 const uint32_t cmd_count = mh->ncmds();
84 const macho_load_command<P>* cmd = cmds;
85 macho_segment_command<P>* linkEditSegCmd = NULL;
86 macho_symtab_command<P>* symtab = NULL;
87 macho_dysymtab_command<P>* dynamicSymTab = NULL;
88 macho_linkedit_data_command<P>* functionStarts = NULL;
89 macho_linkedit_data_command<P>* dataInCode = NULL;
90 for (uint32_t i = 0; i < cmd_count; ++i) {
91 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
92 // update segment/section file offsets
93 macho_segment_command<P>* segCmd = (macho_segment_command<P>*)cmd;
94 segCmd->set_fileoff(cumulativeFileSize);
95 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
96 macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
97 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
98 if ( sect->offset() != 0 )
99 sect->set_offset(cumulativeFileSize+sect->addr()-segCmd->vmaddr());
100 }
101 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
102 linkEditSegCmd = segCmd;
103 }
104 cumulativeFileSize += segCmd->filesize();
105 }
106 else if ( cmd->cmd() == LC_DYLD_INFO_ONLY ) {
107 // zero out all dyld info
108 macho_dyld_info_command<P>* dyldInfo = (macho_dyld_info_command<P>*)cmd;
109 dyldInfo->set_rebase_off(0);
110 dyldInfo->set_rebase_size(0);
111 dyldInfo->set_bind_off(0);
112 dyldInfo->set_bind_size(0);
113 dyldInfo->set_weak_bind_off(0);
114 dyldInfo->set_weak_bind_size(0);
115 dyldInfo->set_lazy_bind_off(0);
116 dyldInfo->set_lazy_bind_size(0);
117 dyldInfo->set_export_off(0);
118 dyldInfo->set_export_size(0);
119 }
120 else if ( cmd->cmd() == LC_SYMTAB ) {
121 symtab = (macho_symtab_command<P>*)cmd;
122 }
123 else if ( cmd->cmd() == LC_DYSYMTAB ) {
124 dynamicSymTab = (macho_dysymtab_command<P>*)cmd;
125 }
126 else if ( cmd->cmd() == LC_FUNCTION_STARTS ) {
127 functionStarts = (macho_linkedit_data_command<P>*)cmd;
128 }
129 else if ( cmd->cmd() == LC_DATA_IN_CODE ) {
130 dataInCode = (macho_linkedit_data_command<P>*)cmd;
131 }
132 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
133 }
134
135 // rebuild symbol table
136 if ( linkEditSegCmd == NULL ) {
137 fprintf(stderr, "__LINKEDIT not found\n");
138 return -1;
139 }
140 if ( symtab == NULL ) {
141 fprintf(stderr, "LC_SYMTAB not found\n");
142 return -1;
143 }
144 if ( dynamicSymTab == NULL ) {
145 fprintf(stderr, "LC_DYSYMTAB not found\n");
146 return -1;
147 }
148
149 const uint32_t newFunctionStartsOffset = linkEditSegCmd->fileoff();
150 uint32_t functionStartsSize = 0;
151 if ( functionStarts != NULL ) {
152 // copy function starts from original cache file to new mapped dylib file
153 functionStartsSize = functionStarts->datasize();
154 memcpy((char*)mh + newFunctionStartsOffset, (char*)mapped_cache + functionStarts->dataoff(), functionStartsSize);
155 }
156 const uint32_t newDataInCodeOffset = (newFunctionStartsOffset + functionStartsSize + sizeof(pint_t) - 1) & (-sizeof(pint_t)); // pointer align
157 uint32_t dataInCodeSize = 0;
158 if ( dataInCode != NULL ) {
159 // copy data-in-code info from original cache file to new mapped dylib file
160 dataInCodeSize = dataInCode->datasize();
161 memcpy((char*)mh + newDataInCodeOffset, (char*)mapped_cache + dataInCode->dataoff(), dataInCodeSize);
162 }
163 // copy symbol entries and strings from original cache file to new mapped dylib file
164 const uint32_t newSymTabOffset = (newDataInCodeOffset + dataInCodeSize + sizeof(pint_t) - 1) & (-sizeof(pint_t)); // pointer align
165 const uint32_t newIndSymTabOffset = newSymTabOffset + symtab->nsyms()*sizeof(macho_nlist<P>);
166 const uint32_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
167 macho_nlist<P>* const newSymTabStart = (macho_nlist<P>*)(((uint8_t*)mh) + newSymTabOffset);
168 char* const newStringPoolStart = (char*)mh + newStringPoolOffset;
169 uint32_t* newIndSymTab = (uint32_t*)((char*)mh + newIndSymTabOffset);
170 const uint32_t* mergedIndSymTab = (uint32_t*)((char*)mapped_cache + dynamicSymTab->indirectsymoff());
171 const macho_nlist<P>* const mergedSymTabStart = (macho_nlist<P>*)(((uint8_t*)mapped_cache) + symtab->symoff());
172 const macho_nlist<P>* const mergedSymTabend = &mergedSymTabStart[symtab->nsyms()];
173 const char* mergedStringPoolStart = (char*)mapped_cache + symtab->stroff();
174 macho_nlist<P>* t = newSymTabStart;
175 int poolOffset = 0;
176 newStringPoolStart[poolOffset++] = '\0'; // first pool entry is always empty string
177 for (const macho_nlist<P>* s = mergedSymTabStart; s != mergedSymTabend; ++s) {
178 *t = *s;
179 t->set_n_strx(poolOffset);
180 strcpy(&newStringPoolStart[poolOffset], &mergedStringPoolStart[s->n_strx()]);
181 poolOffset += (strlen(&newStringPoolStart[poolOffset]) + 1);
182 ++t;
183 }
184 // pointer align string pool size
185 while ( (poolOffset % sizeof(pint_t)) != 0 )
186 ++poolOffset;
187 // copy indirect symbol table
188 memcpy(newIndSymTab, mergedIndSymTab, dynamicSymTab->nindirectsyms()*sizeof(uint32_t));
189
190 // update load commands
191 if ( functionStarts != NULL ) {
192 functionStarts->set_dataoff(newFunctionStartsOffset);
193 functionStarts->set_datasize(functionStartsSize);
194 }
195 if ( dataInCode != NULL ) {
196 dataInCode->set_dataoff(newDataInCodeOffset);
197 dataInCode->set_datasize(dataInCodeSize);
198 }
199 symtab->set_symoff(newSymTabOffset);
200 symtab->set_stroff(newStringPoolOffset);
201 symtab->set_strsize(poolOffset);
202 dynamicSymTab->set_extreloff(0);
203 dynamicSymTab->set_nextrel(0);
204 dynamicSymTab->set_locreloff(0);
205 dynamicSymTab->set_nlocrel(0);
206 dynamicSymTab->set_indirectsymoff(newIndSymTabOffset);
207 linkEditSegCmd->set_filesize(symtab->stroff()+symtab->strsize() - linkEditSegCmd->fileoff());
208 linkEditSegCmd->set_vmsize( (linkEditSegCmd->filesize()+4095) & (-4096) );
209
210 // return new size
211 *newSize = (symtab->stroff()+symtab->strsize()+4095) & (-4096);
212
213 return 0;
214 }
215
216
217
218 static void make_dirs(const char* file_path)
219 {
220 //printf("make_dirs(%s)\n", file_path);
221 char dirs[strlen(file_path)+1];
222 strcpy(dirs, file_path);
223 char* lastSlash = strrchr(dirs, '/');
224 if ( lastSlash == NULL )
225 return;
226 lastSlash[1] = '\0';
227 struct stat stat_buf;
228 if ( stat(dirs, &stat_buf) != 0 ) {
229 const char* afterSlash = &dirs[1];
230 char* slash;
231 while ( (slash = strchr(afterSlash, '/')) != NULL ) {
232 *slash = '\0';
233 ::mkdir(dirs, S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH);
234 //printf("mkdir(%s)\n", dirs);
235 *slash = '/';
236 afterSlash = slash+1;
237 }
238 }
239 }
240
241
242
243 template <typename A>
244 size_t dylib_maker(const void* mapped_cache, std::vector<uint8_t> &dylib_data, const std::vector<seg_info>& segments) {
245 typedef typename A::P P;
246
247 size_t additionalSize = 0;
248 for(std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
249 additionalSize += it->sizem;
250 }
251
252 dylib_data.reserve(dylib_data.size() + additionalSize);
253
254 uint32_t nfat_archs = 0;
255 uint32_t offsetInFatFile = 4096;
256 uint8_t *base_ptr = &dylib_data.front();
257
258 #define FH reinterpret_cast<fat_header*>(base_ptr)
259 #define FA reinterpret_cast<fat_arch*>(base_ptr + (8 + (nfat_archs - 1) * sizeof(fat_arch)))
260
261 if(dylib_data.size() >= 4096 && OSSwapBigToHostInt32(FH->magic) == FAT_MAGIC) {
262 // have fat header, append new arch to end
263 nfat_archs = OSSwapBigToHostInt32(FH->nfat_arch);
264 offsetInFatFile = OSSwapBigToHostInt32(FA->offset) + OSSwapBigToHostInt32(FA->size);
265 }
266
267 dylib_data.resize(offsetInFatFile);
268 base_ptr = &dylib_data.front();
269
270 FH->magic = OSSwapHostToBigInt32(FAT_MAGIC);
271 FH->nfat_arch = OSSwapHostToBigInt32(++nfat_archs);
272
273 FA->cputype = 0; // filled in later
274 FA->cpusubtype = 0; // filled in later
275 FA->offset = OSSwapHostToBigInt32(offsetInFatFile);
276 FA->size = 0; // filled in later
277 FA->align = OSSwapHostToBigInt32(12);
278
279 // Write regular segments into the buffer
280 uint32_t totalSize = 0;
281
282 for( std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
283
284 if(strcmp(it->segName, "__TEXT") == 0 ) {
285 const macho_header<P> *textMH = reinterpret_cast<macho_header<P>*>((uint8_t*)mapped_cache+it->offset);
286 FA->cputype = OSSwapHostToBigInt32(textMH->cputype());
287 FA->cpusubtype = OSSwapHostToBigInt32(textMH->cpusubtype());
288
289 // if this cputype/subtype already exist in fat header, then return immediately
290 for(uint32_t i=0; i < nfat_archs-1; ++i) {
291 fat_arch *afa = reinterpret_cast<fat_arch*>(base_ptr+8)+i;
292
293 if( afa->cputype == FA->cputype
294 && afa->cpusubtype == FA->cpusubtype) {
295 fprintf(stderr, "arch already exists in fat dylib\n");
296 dylib_data.resize(offsetInFatFile);
297 return offsetInFatFile;
298 }
299 }
300 }
301
302 //printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem);
303 std::copy(((uint8_t*)mapped_cache)+it->offset, ((uint8_t*)mapped_cache)+it->offset+it->sizem, std::back_inserter(dylib_data));
304 base_ptr = &dylib_data.front();
305 totalSize += it->sizem;
306 }
307
308 FA->size = OSSwapHostToBigInt32(totalSize);
309
310 // optimize linkedit
311 uint64_t newSize = dylib_data.size();
312 optimize_linkedit<A>(((macho_header<P>*)(base_ptr+offsetInFatFile)), mapped_cache, &newSize);
313
314 // update fat header with new file size
315 dylib_data.resize(offsetInFatFile+newSize);
316 base_ptr = &dylib_data.front();
317 FA->size = OSSwapHostToBigInt32(newSize);
318 #undef FH
319 #undef FA
320 return offsetInFatFile;
321 }
322
323
324 int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path, const char* extraction_root_path,
325 void (^progress)(unsigned current, unsigned total))
326 {
327 struct stat statbuf;
328 if (stat(shared_cache_file_path, &statbuf)) {
329 fprintf(stderr, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path);
330 return -1;
331 }
332
333 int cache_fd = open(shared_cache_file_path, O_RDONLY);
334 if (cache_fd < 0) {
335 fprintf(stderr, "Error: failed to open shared cache file at %s\n", shared_cache_file_path);
336 return -1;
337 }
338
339 void* mapped_cache = mmap(NULL, statbuf.st_size, PROT_READ, MAP_PRIVATE, cache_fd, 0);
340 if (mapped_cache == MAP_FAILED) {
341 fprintf(stderr, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path, errno);
342 return -1;
343 }
344
345 close(cache_fd);
346
347 // instantiate arch specific dylib maker
348 size_t (*dylib_create_func)(const void*, std::vector<uint8_t>&, const std::vector<seg_info>&) = NULL;
349 if ( strcmp((char*)mapped_cache, "dyld_v1 i386") == 0 )
350 dylib_create_func = dylib_maker<x86>;
351 else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64") == 0 )
352 dylib_create_func = dylib_maker<x86_64>;
353 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv5") == 0 )
354 dylib_create_func = dylib_maker<arm>;
355 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv6") == 0 )
356 dylib_create_func = dylib_maker<arm>;
357 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv7") == 0 )
358 dylib_create_func = dylib_maker<arm>;
359 else if ( strncmp((char*)mapped_cache, "dyld_v1 armv7", 14) == 0 )
360 dylib_create_func = dylib_maker<arm>;
361 else {
362 fprintf(stderr, "Error: unrecognized dyld shared cache magic.\n");
363 munmap(mapped_cache, statbuf.st_size);
364 return -1;
365 }
366
367 // iterate through all images in cache and build map of dylibs and segments
368 __block NameToSegments map;
369 dyld_shared_cache_iterate_segments_with_slide(mapped_cache,
370 ^(const char* dylib, const char* segName, uint64_t offset, uint64_t sizem,
371 uint64_t mappedddress, uint64_t slide) {
372 map[dylib].push_back(seg_info(segName, offset, sizem));
373 });
374
375 // for each dylib instantiate a dylib file
376 dispatch_group_t group = dispatch_group_create();
377 dispatch_semaphore_t sema = dispatch_semaphore_create(2);
378 dispatch_queue_t process_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0);
379 dispatch_queue_t writer_queue = dispatch_queue_create("dyld writer queue", 0);
380
381 __block int cumulativeResult = 0;
382 __block unsigned count = 0;
383
384 for ( NameToSegments::iterator it = map.begin(); it != map.end(); ++it) {
385 dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
386 dispatch_group_async(group, process_queue, ^{
387
388 char dylib_path[PATH_MAX];
389 strcpy(dylib_path, extraction_root_path);
390 strcat(dylib_path, "/");
391 strcat(dylib_path, it->first);
392
393 //printf("%s with %lu segments\n", dylib_path, segments.size());
394 // make sure all directories in this path exist
395 make_dirs(dylib_path);
396
397 // open file, create if does not already exist
398 int fd = ::open(dylib_path, O_CREAT | O_EXLOCK | O_RDWR, 0644);
399 if ( fd == -1 ) {
400 fprintf(stderr, "can't open or create dylib file %s, errnor=%d\n", dylib_path, errno);
401 cumulativeResult = -1;
402 return;
403 }
404
405 struct stat statbuf;
406 if (fstat(fd, &statbuf)) {
407 fprintf(stderr, "Error: stat failed for dyld file %s, errnor=%d\n", dylib_path, errno);
408 close(fd);
409 cumulativeResult = -1;
410 return;
411 }
412
413 std::vector<uint8_t> *vec = new std::vector<uint8_t>(statbuf.st_size);
414 if(pread(fd, &vec->front(), vec->size(), 0) != (long)vec->size()) {
415 fprintf(stderr, "can't read dylib file %s, errnor=%d\n", dylib_path, errno);
416 close(fd);
417 cumulativeResult = -1;
418 return;
419 }
420
421 const size_t offset = dylib_create_func(mapped_cache, *vec, it->second);
422
423 dispatch_group_async(group, writer_queue, ^{
424 progress(count++, map.size());
425
426 if(offset != vec->size()) {
427 //Write out the first page, and everything after offset
428 if( pwrite(fd, &vec->front(), 4096, 0) == -1
429 || pwrite(fd, &vec->front() + offset, vec->size() - offset, offset) == -1) {
430 fprintf(stderr, "error writing, errnor=%d\n", errno);
431 cumulativeResult = -1;
432 }
433 }
434
435 delete vec;
436 close(fd);
437 dispatch_semaphore_signal(sema);
438 });
439 });
440 }
441
442 dispatch_group_wait(group, DISPATCH_TIME_FOREVER);
443 dispatch_release(group);
444 dispatch_release(writer_queue);
445
446 munmap(mapped_cache, statbuf.st_size);
447 return cumulativeResult;
448 }
449
450
451
452 int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path, const char* extraction_root_path)
453 {
454 return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path, extraction_root_path,
455 ^(unsigned , unsigned) {} );
456 }
457
458
459 #if 0
460
461 typedef int (*extractor_proc)(const char* shared_cache_file_path, const char* extraction_root_path,
462 void (^progress)(unsigned current, unsigned total));
463
464 int main(int argc, const char* argv[])
465 {
466 if ( argc != 3 ) {
467 fprintf(stderr, "usage: dsc_extractor <path-to-cache-file> <path-to-device-dir>\n");
468 return 1;
469 }
470
471 void* handle = dlopen("/Developer/Platforms/iPhoneOS.platform/usr/lib/dsc_extractor.bundle", RTLD_LAZY);
472 if ( handle == NULL ) {
473 fprintf(stderr, "dsc_extractor.bundle could not be loaded\n");
474 return 1;
475 }
476
477 extractor_proc proc = (extractor_proc)dlsym(handle, "dyld_shared_cache_extract_dylibs_progress");
478 if ( proc == NULL ) {
479 fprintf(stderr, "dsc_extractor.bundle did not have dyld_shared_cache_extract_dylibs_progress symbol\n");
480 return 1;
481 }
482
483 int result = (*proc)(argv[1], argv[2], ^(unsigned c, unsigned total) { printf("%d/%d\n", c, total); } );
484 fprintf(stderr, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result);
485 return 0;
486 }
487 #endif
488
489
490
491