]> git.saurik.com Git - apple/dyld.git/blame_incremental - launch-cache/dsc_extractor.cpp
dyld-195.5.tar.gz
[apple/dyld.git] / launch-cache / dsc_extractor.cpp
... / ...
CommitLineData
1/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2010 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25#include <stdlib.h>
26#include <stdio.h>
27#include <unistd.h>
28#include <sys/stat.h>
29#include <string.h>
30#include <fcntl.h>
31#include <stdlib.h>
32#include <errno.h>
33#include <sys/mman.h>
34#include <sys/syslimits.h>
35#include <libkern/OSByteOrder.h>
36#include <mach-o/fat.h>
37#include <mach-o/arch.h>
38#include <mach-o/loader.h>
39#include <Availability.h>
40
41#define NO_ULEB
42#include "Architectures.hpp"
43#include "MachOFileAbstraction.hpp"
44
45#include "dsc_iterator.h"
46#include "dsc_extractor.h"
47
48#include <vector>
49#include <map>
50#include <ext/hash_map>
51#include <algorithm>
52#include <dispatch/dispatch.h>
53
54struct seg_info
55{
56 seg_info(const char* n, uint64_t o, uint64_t s)
57 : segName(n), offset(o), sizem(s) { }
58 const char* segName;
59 uint64_t offset;
60 uint64_t sizem;
61};
62
63class CStringEquals {
64public:
65 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
66};
67typedef __gnu_cxx::hash_map<const char*, std::vector<seg_info>, __gnu_cxx::hash<const char*>, CStringEquals> NameToSegments;
68
69
70template <typename A>
71int optimize_linkedit(macho_header<typename A::P>* mh, const void* mapped_cache, uint64_t* newSize)
72{
73 typedef typename A::P P;
74 typedef typename A::P::E E;
75 typedef typename A::P::uint_t pint_t;
76
77 // update header flags
78 mh->set_flags(mh->flags() & 0x7FFFFFFF); // remove in-cache bit
79
80 // update load commands
81 uint64_t cumulativeFileSize = 0;
82 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
83 const uint32_t cmd_count = mh->ncmds();
84 const macho_load_command<P>* cmd = cmds;
85 macho_segment_command<P>* linkEditSegCmd = NULL;
86 macho_symtab_command<P>* symtab = NULL;
87 macho_dysymtab_command<P>* dynamicSymTab = NULL;
88 for (uint32_t i = 0; i < cmd_count; ++i) {
89 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
90 // update segment/section file offsets
91 macho_segment_command<P>* segCmd = (macho_segment_command<P>*)cmd;
92 segCmd->set_fileoff(cumulativeFileSize);
93 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
94 macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
95 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
96 if ( sect->offset() != 0 )
97 sect->set_offset(cumulativeFileSize+sect->addr()-segCmd->vmaddr());
98 }
99 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
100 linkEditSegCmd = segCmd;
101 }
102 cumulativeFileSize += segCmd->filesize();
103 }
104 else if ( cmd->cmd() == LC_DYLD_INFO_ONLY ) {
105 // zero out all dyld info
106 macho_dyld_info_command<P>* dyldInfo = (macho_dyld_info_command<P>*)cmd;
107 dyldInfo->set_rebase_off(0);
108 dyldInfo->set_rebase_size(0);
109 dyldInfo->set_bind_off(0);
110 dyldInfo->set_bind_size(0);
111 dyldInfo->set_weak_bind_off(0);
112 dyldInfo->set_weak_bind_size(0);
113 dyldInfo->set_lazy_bind_off(0);
114 dyldInfo->set_lazy_bind_size(0);
115 dyldInfo->set_export_off(0);
116 dyldInfo->set_export_size(0);
117 }
118 else if ( cmd->cmd() == LC_SYMTAB ) {
119 symtab = (macho_symtab_command<P>*)cmd;
120 }
121 else if ( cmd->cmd() == LC_DYSYMTAB ) {
122 dynamicSymTab = (macho_dysymtab_command<P>*)cmd;
123 }
124 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
125 }
126
127 // rebuild symbol table
128 if ( linkEditSegCmd == NULL ) {
129 fprintf(stderr, "__LINKEDIT not found\n");
130 return -1;
131 }
132 if ( symtab == NULL ) {
133 fprintf(stderr, "LC_SYMTAB not found\n");
134 return -1;
135 }
136 if ( dynamicSymTab == NULL ) {
137 fprintf(stderr, "LC_DYSYMTAB not found\n");
138 return -1;
139 }
140 // copy symbol entries and strings from original cache file to new mapped dylib file
141 const uint32_t newSymTabOffset = linkEditSegCmd->fileoff();
142 const uint32_t newIndSymTabOffset = newSymTabOffset + symtab->nsyms()*sizeof(macho_nlist<P>);
143 const uint32_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
144 macho_nlist<P>* const newSymTabStart = (macho_nlist<P>*)(((uint8_t*)mh) + newSymTabOffset);
145 char* const newStringPoolStart = (char*)mh + newStringPoolOffset;
146 uint32_t* newIndSymTab = (uint32_t*)((char*)mh + newIndSymTabOffset);
147 const uint32_t* mergedIndSymTab = (uint32_t*)((char*)mapped_cache + dynamicSymTab->indirectsymoff());
148 const macho_nlist<P>* const mergedSymTabStart = (macho_nlist<P>*)(((uint8_t*)mapped_cache) + symtab->symoff());
149 const macho_nlist<P>* const mergedSymTabend = &mergedSymTabStart[symtab->nsyms()];
150 const char* mergedStringPoolStart = (char*)mapped_cache + symtab->stroff();
151 macho_nlist<P>* t = newSymTabStart;
152 int poolOffset = 0;
153 newStringPoolStart[poolOffset++] = '\0'; // first pool entry is always empty string
154 for (const macho_nlist<P>* s = mergedSymTabStart; s != mergedSymTabend; ++s) {
155 *t = *s;
156 t->set_n_strx(poolOffset);
157 strcpy(&newStringPoolStart[poolOffset], &mergedStringPoolStart[s->n_strx()]);
158 poolOffset += (strlen(&newStringPoolStart[poolOffset]) + 1);
159 ++t;
160 }
161 // pointer align string pool size
162 while ( (poolOffset % sizeof(pint_t)) != 0 )
163 ++poolOffset;
164 // copy indirect symbol table
165 memcpy(newIndSymTab, mergedIndSymTab, dynamicSymTab->nindirectsyms()*sizeof(uint32_t));
166
167 // update load commands
168 symtab->set_symoff(newSymTabOffset);
169 symtab->set_stroff(newStringPoolOffset);
170 symtab->set_strsize(poolOffset);
171 dynamicSymTab->set_extreloff(0);
172 dynamicSymTab->set_nextrel(0);
173 dynamicSymTab->set_locreloff(0);
174 dynamicSymTab->set_nlocrel(0);
175 dynamicSymTab->set_indirectsymoff(newIndSymTabOffset);
176 linkEditSegCmd->set_filesize(symtab->stroff()+symtab->strsize() - linkEditSegCmd->fileoff());
177 linkEditSegCmd->set_vmsize( (linkEditSegCmd->filesize()+4095) & (-4096) );
178
179 // return new size
180 *newSize = (symtab->stroff()+symtab->strsize()+4095) & (-4096);
181
182 return 0;
183}
184
185
186
187static void make_dirs(const char* file_path)
188{
189 //printf("make_dirs(%s)\n", file_path);
190 char dirs[strlen(file_path)+1];
191 strcpy(dirs, file_path);
192 char* lastSlash = strrchr(dirs, '/');
193 if ( lastSlash == NULL )
194 return;
195 lastSlash[1] = '\0';
196 struct stat stat_buf;
197 if ( stat(dirs, &stat_buf) != 0 ) {
198 const char* afterSlash = &dirs[1];
199 char* slash;
200 while ( (slash = strchr(afterSlash, '/')) != NULL ) {
201 *slash = '\0';
202 ::mkdir(dirs, S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH);
203 //printf("mkdir(%s)\n", dirs);
204 *slash = '/';
205 afterSlash = slash+1;
206 }
207 }
208}
209
210
211
212template <typename A>
213size_t dylib_maker(const void* mapped_cache, std::vector<uint8_t> &dylib_data, const std::vector<seg_info>& segments) {
214 typedef typename A::P P;
215
216 size_t additionalSize = 0;
217 for(std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
218 additionalSize += it->sizem;
219 }
220
221 dylib_data.reserve(dylib_data.size() + additionalSize);
222
223 uint32_t nfat_archs = 0;
224 uint32_t offsetInFatFile = 4096;
225 uint8_t *base_ptr = &dylib_data.front();
226
227#define FH reinterpret_cast<fat_header*>(base_ptr)
228#define FA reinterpret_cast<fat_arch*>(base_ptr + (8 + (nfat_archs - 1) * sizeof(fat_arch)))
229
230 if(dylib_data.size() >= 4096 && OSSwapBigToHostInt32(FH->magic) == FAT_MAGIC) {
231 // have fat header, append new arch to end
232 nfat_archs = OSSwapBigToHostInt32(FH->nfat_arch);
233 offsetInFatFile = OSSwapBigToHostInt32(FA->offset) + OSSwapBigToHostInt32(FA->size);
234 }
235
236 dylib_data.resize(offsetInFatFile);
237 base_ptr = &dylib_data.front();
238
239 FH->magic = OSSwapHostToBigInt32(FAT_MAGIC);
240 FH->nfat_arch = OSSwapHostToBigInt32(++nfat_archs);
241
242 FA->cputype = 0; // filled in later
243 FA->cpusubtype = 0; // filled in later
244 FA->offset = OSSwapHostToBigInt32(offsetInFatFile);
245 FA->size = 0; // filled in later
246 FA->align = OSSwapHostToBigInt32(12);
247
248 // Write regular segments into the buffer
249 uint32_t totalSize = 0;
250
251 for( std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
252
253 if(strcmp(it->segName, "__TEXT") == 0 ) {
254 const macho_header<P> *textMH = reinterpret_cast<macho_header<P>*>((uint8_t*)mapped_cache+it->offset);
255 FA->cputype = OSSwapHostToBigInt32(textMH->cputype());
256 FA->cpusubtype = OSSwapHostToBigInt32(textMH->cpusubtype());
257
258 // if this cputype/subtype already exist in fat header, then return immediately
259 for(uint32_t i=0; i < nfat_archs-1; ++i) {
260 fat_arch *afa = reinterpret_cast<fat_arch*>(base_ptr+8)+i;
261
262 if( afa->cputype == FA->cputype
263 && afa->cpusubtype == FA->cpusubtype) {
264 fprintf(stderr, "arch already exists in fat dylib\n");
265 dylib_data.resize(offsetInFatFile);
266 return offsetInFatFile;
267 }
268 }
269 }
270
271 //printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem);
272 std::copy(((uint8_t*)mapped_cache)+it->offset, ((uint8_t*)mapped_cache)+it->offset+it->sizem, std::back_inserter(dylib_data));
273 base_ptr = &dylib_data.front();
274 totalSize += it->sizem;
275 }
276
277 FA->size = OSSwapHostToBigInt32(totalSize);
278
279 // optimize linkedit
280 uint64_t newSize = dylib_data.size();
281 optimize_linkedit<A>(((macho_header<P>*)(base_ptr+offsetInFatFile)), mapped_cache, &newSize);
282
283 // update fat header with new file size
284 dylib_data.resize(offsetInFatFile+newSize);
285 base_ptr = &dylib_data.front();
286 FA->size = OSSwapHostToBigInt32(newSize);
287#undef FH
288#undef FA
289 return offsetInFatFile;
290}
291
292
293extern int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path, const char* extraction_root_path,
294 void (^progress)(unsigned current, unsigned total))
295{
296 struct stat statbuf;
297 if (stat(shared_cache_file_path, &statbuf)) {
298 fprintf(stderr, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path);
299 return -1;
300 }
301
302 int cache_fd = open(shared_cache_file_path, O_RDONLY);
303 if (cache_fd < 0) {
304 fprintf(stderr, "Error: failed to open shared cache file at %s\n", shared_cache_file_path);
305 return -1;
306 }
307
308 void* mapped_cache = mmap(NULL, statbuf.st_size, PROT_READ, MAP_PRIVATE, cache_fd, 0);
309 if (mapped_cache == MAP_FAILED) {
310 fprintf(stderr, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path, errno);
311 return -1;
312 }
313
314 close(cache_fd);
315
316 // instantiate arch specific dylib maker
317 size_t (*dylib_create_func)(const void*, std::vector<uint8_t>&, const std::vector<seg_info>&) = NULL;
318 if ( strcmp((char*)mapped_cache, "dyld_v1 i386") == 0 )
319 dylib_create_func = dylib_maker<x86>;
320 else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64") == 0 )
321 dylib_create_func = dylib_maker<x86_64>;
322 else if ( strcmp((char*)mapped_cache, "dyld_v1 ppc") == 0 )
323 dylib_create_func = dylib_maker<ppc>;
324 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv5") == 0 )
325 dylib_create_func = dylib_maker<arm>;
326 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv6") == 0 )
327 dylib_create_func = dylib_maker<arm>;
328 else if ( strcmp((char*)mapped_cache, "dyld_v1 armv7") == 0 )
329 dylib_create_func = dylib_maker<arm>;
330 else {
331 fprintf(stderr, "Error: unrecognized dyld shared cache magic.\n");
332 munmap(mapped_cache, statbuf.st_size);
333 return -1;
334 }
335
336 // iterate through all images in cache and build map of dylibs and segments
337 __block NameToSegments map;
338 dyld_shared_cache_iterate_segments_with_slide(mapped_cache,
339 ^(const char* dylib, const char* segName, uint64_t offset, uint64_t sizem,
340 uint64_t mappedddress, uint64_t slide) {
341 map[dylib].push_back(seg_info(segName, offset, sizem));
342 });
343
344 // for each dylib instantiate a dylib file
345 dispatch_group_t group = dispatch_group_create();
346 dispatch_semaphore_t sema = dispatch_semaphore_create(4);
347 dispatch_queue_t process_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0);
348 dispatch_queue_t writer_queue = dispatch_queue_create("dyld writer queue", 0);
349
350 __block int cumulativeResult = 0;
351 __block unsigned count = 0;
352
353 for ( NameToSegments::iterator it = map.begin(); it != map.end(); ++it) {
354 dispatch_group_async(group, process_queue, ^{
355 dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
356
357 char dylib_path[PATH_MAX];
358 strcpy(dylib_path, extraction_root_path);
359 strcat(dylib_path, "/");
360 strcat(dylib_path, it->first);
361
362 //printf("%s with %lu segments\n", dylib_path, segments.size());
363 // make sure all directories in this path exist
364 make_dirs(dylib_path);
365
366 // open file, create if does not already exist
367 int fd = ::open(dylib_path, O_CREAT | O_EXLOCK | O_RDWR, 0644);
368 if ( fd == -1 ) {
369 fprintf(stderr, "can't open or create dylib file %s, errnor=%d\n", dylib_path, errno);
370 cumulativeResult = -1;
371 return;
372 }
373
374 struct stat statbuf;
375 if (fstat(fd, &statbuf)) {
376 fprintf(stderr, "Error: stat failed for dyld file %s, errnor=%d\n", dylib_path, errno);
377 close(fd);
378 cumulativeResult = -1;
379 return;
380 }
381
382 std::vector<uint8_t> *vec = new std::vector<uint8_t>(statbuf.st_size);
383 if(pread(fd, &vec->front(), vec->size(), 0) != (long)vec->size()) {
384 fprintf(stderr, "can't read dylib file %s, errnor=%d\n", dylib_path, errno);
385 close(fd);
386 cumulativeResult = -1;
387 return;
388 }
389
390 const size_t offset = dylib_create_func(mapped_cache, *vec, it->second);
391
392 dispatch_group_async(group, writer_queue, ^{
393 progress(count++, map.size());
394
395 if(offset != vec->size()) {
396 //Write out the first page, and everything after offset
397 if( pwrite(fd, &vec->front(), 4096, 0) == -1
398 || pwrite(fd, &vec->front() + offset, vec->size() - offset, offset) == -1) {
399 fprintf(stderr, "error writing, errnor=%d\n", errno);
400 cumulativeResult = -1;
401 }
402 }
403
404 delete vec;
405 close(fd);
406 dispatch_semaphore_signal(sema);
407 });
408 });
409 }
410
411 dispatch_group_wait(group, DISPATCH_TIME_FOREVER);
412 dispatch_release(group);
413 dispatch_release(writer_queue);
414
415 munmap(mapped_cache, statbuf.st_size);
416 return cumulativeResult;
417}
418
419
420
421int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path, const char* extraction_root_path)
422{
423 return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path, extraction_root_path,
424 ^(unsigned , unsigned) {} );
425}
426
427
428#if 0
429int main(int argc, const char* argv[])
430{
431 if ( argc != 3 ) {
432 fprintf(stderr, "usage: dsc_extractor <path-to-cache-file> <path-to-device-dir>\n");
433 return 1;
434 }
435
436 int result = dyld_shared_cache_extract_dylibs_progress(argv[1], argv[2], ^(unsigned c, unsigned total) { printf("%d/%d\n", c, total); } );
437 fprintf(stderr, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result);
438 return 0;
439}
440#endif
441
442
443