1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2010 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
34 #include <sys/syslimits.h>
35 #include <libkern/OSByteOrder.h>
36 #include <mach-o/fat.h>
37 #include <mach-o/arch.h>
38 #include <mach-o/loader.h>
39 #include <Availability.h>
42 #include "Architectures.hpp"
43 #include "MachOFileAbstraction.hpp"
45 #include "dsc_iterator.h"
46 #include "dsc_extractor.h"
50 #include <ext/hash_map>
52 #include <dispatch/dispatch.h>
56 seg_info(const char* n
, uint64_t o
, uint64_t s
)
57 : segName(n
), offset(o
), sizem(s
) { }
65 bool operator()(const char* left
, const char* right
) const { return (strcmp(left
, right
) == 0); }
67 typedef __gnu_cxx::hash_map
<const char*, std::vector
<seg_info
>, __gnu_cxx::hash
<const char*>, CStringEquals
> NameToSegments
;
71 int optimize_linkedit(macho_header
<typename
A::P
>* mh
, const void* mapped_cache
, uint64_t* newSize
)
73 typedef typename
A::P P
;
74 typedef typename
A::P::E E
;
75 typedef typename
A::P::uint_t pint_t
;
77 // update header flags
78 mh
->set_flags(mh
->flags() & 0x7FFFFFFF); // remove in-cache bit
80 // update load commands
81 uint64_t cumulativeFileSize
= 0;
82 const macho_load_command
<P
>* const cmds
= (macho_load_command
<P
>*)((uint8_t*)mh
+ sizeof(macho_header
<P
>));
83 const uint32_t cmd_count
= mh
->ncmds();
84 const macho_load_command
<P
>* cmd
= cmds
;
85 macho_segment_command
<P
>* linkEditSegCmd
= NULL
;
86 macho_symtab_command
<P
>* symtab
= NULL
;
87 macho_dysymtab_command
<P
>* dynamicSymTab
= NULL
;
88 for (uint32_t i
= 0; i
< cmd_count
; ++i
) {
89 if ( cmd
->cmd() == macho_segment_command
<P
>::CMD
) {
90 // update segment/section file offsets
91 macho_segment_command
<P
>* segCmd
= (macho_segment_command
<P
>*)cmd
;
92 segCmd
->set_fileoff(cumulativeFileSize
);
93 macho_section
<P
>* const sectionsStart
= (macho_section
<P
>*)((char*)segCmd
+ sizeof(macho_segment_command
<P
>));
94 macho_section
<P
>* const sectionsEnd
= §ionsStart
[segCmd
->nsects()];
95 for(macho_section
<P
>* sect
= sectionsStart
; sect
< sectionsEnd
; ++sect
) {
96 if ( sect
->offset() != 0 )
97 sect
->set_offset(cumulativeFileSize
+sect
->addr()-segCmd
->vmaddr());
99 if ( strcmp(segCmd
->segname(), "__LINKEDIT") == 0 ) {
100 linkEditSegCmd
= segCmd
;
102 cumulativeFileSize
+= segCmd
->filesize();
104 else if ( cmd
->cmd() == LC_DYLD_INFO_ONLY
) {
105 // zero out all dyld info
106 macho_dyld_info_command
<P
>* dyldInfo
= (macho_dyld_info_command
<P
>*)cmd
;
107 dyldInfo
->set_rebase_off(0);
108 dyldInfo
->set_rebase_size(0);
109 dyldInfo
->set_bind_off(0);
110 dyldInfo
->set_bind_size(0);
111 dyldInfo
->set_weak_bind_off(0);
112 dyldInfo
->set_weak_bind_size(0);
113 dyldInfo
->set_lazy_bind_off(0);
114 dyldInfo
->set_lazy_bind_size(0);
115 dyldInfo
->set_export_off(0);
116 dyldInfo
->set_export_size(0);
118 else if ( cmd
->cmd() == LC_SYMTAB
) {
119 symtab
= (macho_symtab_command
<P
>*)cmd
;
121 else if ( cmd
->cmd() == LC_DYSYMTAB
) {
122 dynamicSymTab
= (macho_dysymtab_command
<P
>*)cmd
;
124 cmd
= (const macho_load_command
<P
>*)(((uint8_t*)cmd
)+cmd
->cmdsize());
127 // rebuild symbol table
128 if ( linkEditSegCmd
== NULL
) {
129 fprintf(stderr
, "__LINKEDIT not found\n");
132 if ( symtab
== NULL
) {
133 fprintf(stderr
, "LC_SYMTAB not found\n");
136 if ( dynamicSymTab
== NULL
) {
137 fprintf(stderr
, "LC_DYSYMTAB not found\n");
140 // copy symbol entries and strings from original cache file to new mapped dylib file
141 const uint32_t newSymTabOffset
= linkEditSegCmd
->fileoff();
142 const uint32_t newIndSymTabOffset
= newSymTabOffset
+ symtab
->nsyms()*sizeof(macho_nlist
<P
>);
143 const uint32_t newStringPoolOffset
= newIndSymTabOffset
+ dynamicSymTab
->nindirectsyms()*sizeof(uint32_t);
144 macho_nlist
<P
>* const newSymTabStart
= (macho_nlist
<P
>*)(((uint8_t*)mh
) + newSymTabOffset
);
145 char* const newStringPoolStart
= (char*)mh
+ newStringPoolOffset
;
146 uint32_t* newIndSymTab
= (uint32_t*)((char*)mh
+ newIndSymTabOffset
);
147 const uint32_t* mergedIndSymTab
= (uint32_t*)((char*)mapped_cache
+ dynamicSymTab
->indirectsymoff());
148 const macho_nlist
<P
>* const mergedSymTabStart
= (macho_nlist
<P
>*)(((uint8_t*)mapped_cache
) + symtab
->symoff());
149 const macho_nlist
<P
>* const mergedSymTabend
= &mergedSymTabStart
[symtab
->nsyms()];
150 const char* mergedStringPoolStart
= (char*)mapped_cache
+ symtab
->stroff();
151 macho_nlist
<P
>* t
= newSymTabStart
;
153 newStringPoolStart
[poolOffset
++] = '\0'; // first pool entry is always empty string
154 for (const macho_nlist
<P
>* s
= mergedSymTabStart
; s
!= mergedSymTabend
; ++s
) {
156 t
->set_n_strx(poolOffset
);
157 strcpy(&newStringPoolStart
[poolOffset
], &mergedStringPoolStart
[s
->n_strx()]);
158 poolOffset
+= (strlen(&newStringPoolStart
[poolOffset
]) + 1);
161 // pointer align string pool size
162 while ( (poolOffset
% sizeof(pint_t
)) != 0 )
164 // copy indirect symbol table
165 memcpy(newIndSymTab
, mergedIndSymTab
, dynamicSymTab
->nindirectsyms()*sizeof(uint32_t));
167 // update load commands
168 symtab
->set_symoff(newSymTabOffset
);
169 symtab
->set_stroff(newStringPoolOffset
);
170 symtab
->set_strsize(poolOffset
);
171 dynamicSymTab
->set_extreloff(0);
172 dynamicSymTab
->set_nextrel(0);
173 dynamicSymTab
->set_locreloff(0);
174 dynamicSymTab
->set_nlocrel(0);
175 dynamicSymTab
->set_indirectsymoff(newIndSymTabOffset
);
176 linkEditSegCmd
->set_filesize(symtab
->stroff()+symtab
->strsize() - linkEditSegCmd
->fileoff());
177 linkEditSegCmd
->set_vmsize( (linkEditSegCmd
->filesize()+4095) & (-4096) );
180 *newSize
= (symtab
->stroff()+symtab
->strsize()+4095) & (-4096);
187 static void make_dirs(const char* file_path
)
189 //printf("make_dirs(%s)\n", file_path);
190 char dirs
[strlen(file_path
)+1];
191 strcpy(dirs
, file_path
);
192 char* lastSlash
= strrchr(dirs
, '/');
193 if ( lastSlash
== NULL
)
196 struct stat stat_buf
;
197 if ( stat(dirs
, &stat_buf
) != 0 ) {
198 const char* afterSlash
= &dirs
[1];
200 while ( (slash
= strchr(afterSlash
, '/')) != NULL
) {
202 ::mkdir(dirs
, S_IRWXU
| S_IRGRP
|S_IXGRP
| S_IROTH
|S_IXOTH
);
203 //printf("mkdir(%s)\n", dirs);
205 afterSlash
= slash
+1;
212 template <typename A
>
213 size_t dylib_maker(const void* mapped_cache
, std::vector
<uint8_t> &dylib_data
, const std::vector
<seg_info
>& segments
) {
214 typedef typename
A::P P
;
216 size_t additionalSize
= 0;
217 for(std::vector
<seg_info
>::const_iterator it
=segments
.begin(); it
!= segments
.end(); ++it
) {
218 additionalSize
+= it
->sizem
;
221 dylib_data
.reserve(dylib_data
.size() + additionalSize
);
223 uint32_t nfat_archs
= 0;
224 uint32_t offsetInFatFile
= 4096;
225 uint8_t *base_ptr
= &dylib_data
.front();
227 #define FH reinterpret_cast<fat_header*>(base_ptr)
228 #define FA reinterpret_cast<fat_arch*>(base_ptr + (8 + (nfat_archs - 1) * sizeof(fat_arch)))
230 if(dylib_data
.size() >= 4096 && OSSwapBigToHostInt32(FH
->magic
) == FAT_MAGIC
) {
231 // have fat header, append new arch to end
232 nfat_archs
= OSSwapBigToHostInt32(FH
->nfat_arch
);
233 offsetInFatFile
= OSSwapBigToHostInt32(FA
->offset
) + OSSwapBigToHostInt32(FA
->size
);
236 dylib_data
.resize(offsetInFatFile
);
237 base_ptr
= &dylib_data
.front();
239 FH
->magic
= OSSwapHostToBigInt32(FAT_MAGIC
);
240 FH
->nfat_arch
= OSSwapHostToBigInt32(++nfat_archs
);
242 FA
->cputype
= 0; // filled in later
243 FA
->cpusubtype
= 0; // filled in later
244 FA
->offset
= OSSwapHostToBigInt32(offsetInFatFile
);
245 FA
->size
= 0; // filled in later
246 FA
->align
= OSSwapHostToBigInt32(12);
248 // Write regular segments into the buffer
249 uint32_t totalSize
= 0;
251 for( std::vector
<seg_info
>::const_iterator it
=segments
.begin(); it
!= segments
.end(); ++it
) {
253 if(strcmp(it
->segName
, "__TEXT") == 0 ) {
254 const macho_header
<P
> *textMH
= reinterpret_cast<macho_header
<P
>*>((uint8_t*)mapped_cache
+it
->offset
);
255 FA
->cputype
= OSSwapHostToBigInt32(textMH
->cputype());
256 FA
->cpusubtype
= OSSwapHostToBigInt32(textMH
->cpusubtype());
258 // if this cputype/subtype already exist in fat header, then return immediately
259 for(uint32_t i
=0; i
< nfat_archs
-1; ++i
) {
260 fat_arch
*afa
= reinterpret_cast<fat_arch
*>(base_ptr
+8)+i
;
262 if( afa
->cputype
== FA
->cputype
263 && afa
->cpusubtype
== FA
->cpusubtype
) {
264 fprintf(stderr
, "arch already exists in fat dylib\n");
265 dylib_data
.resize(offsetInFatFile
);
266 return offsetInFatFile
;
271 //printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem);
272 std::copy(((uint8_t*)mapped_cache
)+it
->offset
, ((uint8_t*)mapped_cache
)+it
->offset
+it
->sizem
, std::back_inserter(dylib_data
));
273 base_ptr
= &dylib_data
.front();
274 totalSize
+= it
->sizem
;
277 FA
->size
= OSSwapHostToBigInt32(totalSize
);
280 uint64_t newSize
= dylib_data
.size();
281 optimize_linkedit
<A
>(((macho_header
<P
>*)(base_ptr
+offsetInFatFile
)), mapped_cache
, &newSize
);
283 // update fat header with new file size
284 dylib_data
.resize(offsetInFatFile
+newSize
);
285 base_ptr
= &dylib_data
.front();
286 FA
->size
= OSSwapHostToBigInt32(newSize
);
289 return offsetInFatFile
;
293 extern int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path
, const char* extraction_root_path
,
294 void (^progress
)(unsigned current
, unsigned total
))
297 if (stat(shared_cache_file_path
, &statbuf
)) {
298 fprintf(stderr
, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path
);
302 int cache_fd
= open(shared_cache_file_path
, O_RDONLY
);
304 fprintf(stderr
, "Error: failed to open shared cache file at %s\n", shared_cache_file_path
);
308 void* mapped_cache
= mmap(NULL
, statbuf
.st_size
, PROT_READ
, MAP_PRIVATE
, cache_fd
, 0);
309 if (mapped_cache
== MAP_FAILED
) {
310 fprintf(stderr
, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path
, errno
);
316 // instantiate arch specific dylib maker
317 size_t (*dylib_create_func
)(const void*, std::vector
<uint8_t>&, const std::vector
<seg_info
>&) = NULL
;
318 if ( strcmp((char*)mapped_cache
, "dyld_v1 i386") == 0 )
319 dylib_create_func
= dylib_maker
<x86
>;
320 else if ( strcmp((char*)mapped_cache
, "dyld_v1 x86_64") == 0 )
321 dylib_create_func
= dylib_maker
<x86_64
>;
322 else if ( strcmp((char*)mapped_cache
, "dyld_v1 ppc") == 0 )
323 dylib_create_func
= dylib_maker
<ppc
>;
324 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv5") == 0 )
325 dylib_create_func
= dylib_maker
<arm
>;
326 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv6") == 0 )
327 dylib_create_func
= dylib_maker
<arm
>;
328 else if ( strcmp((char*)mapped_cache
, "dyld_v1 armv7") == 0 )
329 dylib_create_func
= dylib_maker
<arm
>;
331 fprintf(stderr
, "Error: unrecognized dyld shared cache magic.\n");
332 munmap(mapped_cache
, statbuf
.st_size
);
336 // iterate through all images in cache and build map of dylibs and segments
337 __block NameToSegments map
;
338 dyld_shared_cache_iterate_segments_with_slide(mapped_cache
,
339 ^(const char* dylib
, const char* segName
, uint64_t offset
, uint64_t sizem
,
340 uint64_t mappedddress
, uint64_t slide
) {
341 map
[dylib
].push_back(seg_info(segName
, offset
, sizem
));
344 // for each dylib instantiate a dylib file
345 dispatch_group_t group
= dispatch_group_create();
346 dispatch_semaphore_t sema
= dispatch_semaphore_create(4);
347 dispatch_queue_t process_queue
= dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW
, 0);
348 dispatch_queue_t writer_queue
= dispatch_queue_create("dyld writer queue", 0);
350 __block
int cumulativeResult
= 0;
351 __block
unsigned count
= 0;
353 for ( NameToSegments::iterator it
= map
.begin(); it
!= map
.end(); ++it
) {
354 dispatch_group_async(group
, process_queue
, ^{
355 dispatch_semaphore_wait(sema
, DISPATCH_TIME_FOREVER
);
357 char dylib_path
[PATH_MAX
];
358 strcpy(dylib_path
, extraction_root_path
);
359 strcat(dylib_path
, "/");
360 strcat(dylib_path
, it
->first
);
362 //printf("%s with %lu segments\n", dylib_path, segments.size());
363 // make sure all directories in this path exist
364 make_dirs(dylib_path
);
366 // open file, create if does not already exist
367 int fd
= ::open(dylib_path
, O_CREAT
| O_EXLOCK
| O_RDWR
, 0644);
369 fprintf(stderr
, "can't open or create dylib file %s, errnor=%d\n", dylib_path
, errno
);
370 cumulativeResult
= -1;
375 if (fstat(fd
, &statbuf
)) {
376 fprintf(stderr
, "Error: stat failed for dyld file %s, errnor=%d\n", dylib_path
, errno
);
378 cumulativeResult
= -1;
382 std::vector
<uint8_t> *vec
= new std::vector
<uint8_t>(statbuf
.st_size
);
383 if(pread(fd
, &vec
->front(), vec
->size(), 0) != (long)vec
->size()) {
384 fprintf(stderr
, "can't read dylib file %s, errnor=%d\n", dylib_path
, errno
);
386 cumulativeResult
= -1;
390 const size_t offset
= dylib_create_func(mapped_cache
, *vec
, it
->second
);
392 dispatch_group_async(group
, writer_queue
, ^{
393 progress(count
++, map
.size());
395 if(offset
!= vec
->size()) {
396 //Write out the first page, and everything after offset
397 if( pwrite(fd
, &vec
->front(), 4096, 0) == -1
398 || pwrite(fd
, &vec
->front() + offset
, vec
->size() - offset
, offset
) == -1) {
399 fprintf(stderr
, "error writing, errnor=%d\n", errno
);
400 cumulativeResult
= -1;
406 dispatch_semaphore_signal(sema
);
411 dispatch_group_wait(group
, DISPATCH_TIME_FOREVER
);
412 dispatch_release(group
);
413 dispatch_release(writer_queue
);
415 munmap(mapped_cache
, statbuf
.st_size
);
416 return cumulativeResult
;
421 int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path
, const char* extraction_root_path
)
423 return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path
, extraction_root_path
,
424 ^(unsigned , unsigned) {} );
429 int main(int argc
, const char* argv
[])
432 fprintf(stderr
, "usage: dsc_extractor <path-to-cache-file> <path-to-device-dir>\n");
436 int result
= dyld_shared_cache_extract_dylibs_progress(argv
[1], argv
[2], ^(unsigned c
, unsigned total
) { printf("%d/%d\n", c
, total
); } );
437 fprintf(stderr
, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result
);