dyld-551.3.tar.gz
[apple/dyld.git] / dyld3 / MachOParser.cpp
1 /*
2 * Copyright (c) 2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24
25
26 #include <stdint.h>
27 #include <string.h>
28 #include <assert.h>
29 #include <uuid/uuid.h>
30 #include <fcntl.h>
31 #include <errno.h>
32 #include <unistd.h>
33 #include <sys/uio.h>
34 #include <sys/param.h>
35 #include <sys/sysctl.h>
36 #include <sys/resource.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <sys/mman.h>
40 #include <rootless.h>
41 #include <dirent.h>
42 #include <mach/mach.h>
43 #include <mach/machine.h>
44 #include <mach-o/loader.h>
45 #include <mach-o/nlist.h>
46 #include <mach-o/fat.h>
47 #include <mach-o/reloc.h>
48 #include <mach-o/dyld_priv.h>
49 #include <CommonCrypto/CommonDigest.h>
50
51 #if !DYLD_IN_PROCESS
52 #include <dlfcn.h>
53 #endif
54
55 #include "MachOParser.h"
56 #include "Logging.h"
57 #include "CodeSigningTypes.h"
58 #include "DyldSharedCache.h"
59 #include "Trie.hpp"
60
61 #if DYLD_IN_PROCESS
62 #include "APIs.h"
63 #else
64 #include "StringUtils.h"
65 #endif
66
67
68
69 #ifndef EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE
70 #define EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE 0x02
71 #endif
72
73 #ifndef CPU_SUBTYPE_ARM64_E
74 #define CPU_SUBTYPE_ARM64_E 2
75 #endif
76
77 #ifndef LC_BUILD_VERSION
78 #define LC_BUILD_VERSION 0x32 /* build for platform min OS version */
79
80 /*
81 * The build_version_command contains the min OS version on which this
82 * binary was built to run for its platform. The list of known platforms and
83 * tool values following it.
84 */
85 struct build_version_command {
86 uint32_t cmd; /* LC_BUILD_VERSION */
87 uint32_t cmdsize; /* sizeof(struct build_version_command) plus */
88 /* ntools * sizeof(struct build_tool_version) */
89 uint32_t platform; /* platform */
90 uint32_t minos; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
91 uint32_t sdk; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
92 uint32_t ntools; /* number of tool entries following this */
93 };
94
95 struct build_tool_version {
96 uint32_t tool; /* enum for the tool */
97 uint32_t version; /* version number of the tool */
98 };
99
100 /* Known values for the platform field above. */
101 #define PLATFORM_MACOS 1
102 #define PLATFORM_IOS 2
103 #define PLATFORM_TVOS 3
104 #define PLATFORM_WATCHOS 4
105 #define PLATFORM_BRIDGEOS 5
106
107 /* Known values for the tool field above. */
108 #define TOOL_CLANG 1
109 #define TOOL_SWIFT 2
110 #define TOOL_LD 3
111 #endif
112
113
114 namespace dyld3 {
115
116
117 bool FatUtil::isFatFile(const void* fileStart)
118 {
119 const fat_header* fileStartAsFat = (fat_header*)fileStart;
120 return ( fileStartAsFat->magic == OSSwapBigToHostInt32(FAT_MAGIC) );
121 }
122
123 /// Returns true if (addLHS + addRHS) > b, or if the add overflowed
124 template<typename T>
125 static bool greaterThanAddOrOverflow(uint32_t addLHS, uint32_t addRHS, T b) {
126 return (addLHS > b) || (addRHS > (b-addLHS));
127 }
128
129 /// Returns true if (addLHS + addRHS) > b, or if the add overflowed
130 template<typename T>
131 static bool greaterThanAddOrOverflow(uint64_t addLHS, uint64_t addRHS, T b) {
132 return (addLHS > b) || (addRHS > (b-addLHS));
133 }
134
135 void FatUtil::forEachSlice(Diagnostics& diag, const void* fileContent, size_t fileLen, void (^callback)(uint32_t sliceCpuType, uint32_t sliceCpuSubType, const void* sliceStart, size_t sliceSize, bool& stop))
136 {
137 const fat_header* fh = (fat_header*)fileContent;
138 if ( fh->magic != OSSwapBigToHostInt32(FAT_MAGIC) ) {
139 diag.error("not a fat file");
140 return;
141 }
142
143 if ( OSSwapBigToHostInt32(fh->nfat_arch) > ((4096 - sizeof(fat_header)) / sizeof(fat_arch)) ) {
144 diag.error("fat header too large: %u entries", OSSwapBigToHostInt32(fh->nfat_arch));
145 }
146 const fat_arch* const archs = (fat_arch*)(((char*)fh)+sizeof(fat_header));
147 bool stop = false;
148 for (uint32_t i=0; i < OSSwapBigToHostInt32(fh->nfat_arch); ++i) {
149 uint32_t cpuType = OSSwapBigToHostInt32(archs[i].cputype);
150 uint32_t cpuSubType = OSSwapBigToHostInt32(archs[i].cpusubtype);
151 uint32_t offset = OSSwapBigToHostInt32(archs[i].offset);
152 uint32_t len = OSSwapBigToHostInt32(archs[i].size);
153 if (greaterThanAddOrOverflow(offset, len, fileLen)) {
154 diag.error("slice %d extends beyond end of file", i);
155 return;
156 }
157 callback(cpuType, cpuSubType, (uint8_t*)fileContent+offset, len, stop);
158 if ( stop )
159 break;
160 }
161 }
162
163 #if !DYLD_IN_PROCESS
164 bool FatUtil::isFatFileWithSlice(Diagnostics& diag, const void* fileContent, size_t fileLen, const std::string& archName, size_t& sliceOffset, size_t& sliceLen, bool& missingSlice)
165 {
166 missingSlice = false;
167 if ( !isFatFile(fileContent) )
168 return false;
169
170 __block bool found = false;
171 forEachSlice(diag, fileContent, fileLen, ^(uint32_t sliceCpuType, uint32_t sliceCpuSubType, const void* sliceStart, size_t sliceSize, bool& stop) {
172 std::string sliceArchName = MachOParser::archName(sliceCpuType, sliceCpuSubType);
173 if ( sliceArchName == archName ) {
174 sliceOffset = (char*)sliceStart - (char*)fileContent;
175 sliceLen = sliceSize;
176 found = true;
177 stop = true;
178 }
179 });
180 if ( diag.hasError() )
181 return false;
182
183 if ( !found )
184 missingSlice = true;
185
186 // when looking for x86_64h fallback to x86_64
187 if ( !found && (archName == "x86_64h") )
188 return isFatFileWithSlice(diag, fileContent, fileLen, "x86_64", sliceOffset, sliceLen, missingSlice);
189
190 return found;
191 }
192
193 #endif
194
195 MachOParser::MachOParser(const mach_header* mh, bool dyldCacheIsRaw)
196 {
197 #if DYLD_IN_PROCESS
198 // assume all in-process mach_headers are real loaded images
199 _data = (long)mh;
200 #else
201 if (mh == nullptr)
202 return;
203 _data = (long)mh;
204 if ( (mh->flags & 0x80000000) == 0 ) {
205 // asssume out-of-process mach_header not in a dyld cache are raw mapped files
206 _data |= 1;
207 }
208 // out-of-process mach_header in a dyld cache are not raw, but cache may be raw
209 if ( dyldCacheIsRaw )
210 _data |= 2;
211 #endif
212 }
213
214 const mach_header* MachOParser::header() const
215 {
216 return (mach_header*)(_data & -4);
217 }
218
219 // "raw" means the whole mach-o file was mapped as one contiguous region
220 // not-raw means the the mach-o file was mapped like dyld does - with zero fill expansion
221 bool MachOParser::isRaw() const
222 {
223 return (_data & 1);
224 }
225
226 // A raw dyld cache is when the whole dyld cache file is mapped in one contiguous region
227 // not-raw manes the dyld cache was mapped as it is at runtime with padding between regions
228 bool MachOParser::inRawCache() const
229 {
230 return (_data & 2);
231 }
232
233 uint32_t MachOParser::fileType() const
234 {
235 return header()->filetype;
236 }
237
238 bool MachOParser::inDyldCache() const
239 {
240 return (header()->flags & 0x80000000);
241 }
242
243 bool MachOParser::hasThreadLocalVariables() const
244 {
245 return (header()->flags & MH_HAS_TLV_DESCRIPTORS);
246 }
247
248 Platform MachOParser::platform() const
249 {
250 Platform platform;
251 uint32_t minOS;
252 uint32_t sdk;
253 if ( getPlatformAndVersion(&platform, &minOS, &sdk) )
254 return platform;
255
256 // old binary with no explict load command to mark platform, look at arch
257 switch ( header()->cputype ) {
258 case CPU_TYPE_X86_64:
259 case CPU_TYPE_I386:
260 return Platform::macOS;
261 case CPU_TYPE_ARM64:
262 case CPU_TYPE_ARM:
263 return Platform::iOS;
264 }
265 return Platform::macOS;
266 }
267
268
269 #if !DYLD_IN_PROCESS
270
271 const MachOParser::ArchInfo MachOParser::_s_archInfos[] = {
272 { "x86_64", CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL },
273 { "x86_64h", CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_H },
274 { "i386", CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL },
275 { "arm64", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL },
276 { "arm64e", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_E },
277 { "armv7k", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7K },
278 { "armv7s", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S },
279 { "armv7", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7 }
280 };
281
282 bool MachOParser::isValidMachO(Diagnostics& diag, const std::string& archName, Platform platform, const void* fileContent, size_t fileLength, const std::string& pathOpened, bool ignoreMainExecutables)
283 {
284 // must start with mach-o magic value
285 const mach_header* mh = (const mach_header*)fileContent;
286 if ( (mh->magic != MH_MAGIC) && (mh->magic != MH_MAGIC_64) ) {
287 diag.warning("could not use '%s' because it is not a mach-o file", pathOpened.c_str());
288 return false;
289 }
290
291 // must match requested architecture if specified
292 if (!archName.empty() && !isArch(mh, archName)) {
293 // except when looking for x86_64h, fallback to x86_64
294 if ( (archName != "x86_64h") || !isArch(mh, "x86_64") ) {
295 diag.warning("could not use '%s' because it does not contain required architecture %s", pathOpened.c_str(), archName.c_str());
296 return false;
297 }
298 }
299
300 // must be a filetype dyld can load
301 switch ( mh->filetype ) {
302 case MH_EXECUTE:
303 if ( ignoreMainExecutables )
304 return false;
305 break;
306 case MH_DYLIB:
307 case MH_BUNDLE:
308 break;
309 default:
310 diag.warning("could not use '%s' because it is not a dylib, bundle, or executable", pathOpened.c_str());
311 return false;
312 }
313
314 // must be from a file - not in the dyld shared cache
315 if ( mh->flags & 0x80000000 ) {
316 diag.warning("could not use '%s' because the high bit of mach_header flags is reserved for images in dyld cache", pathOpened.c_str());
317 return false;
318 }
319
320 // validate load commands structure
321 MachOParser parser(mh);
322 if ( !parser.validLoadCommands(diag, fileLength) )
323 return false;
324
325 // must match requested platform
326 if ( parser.platform() != platform ) {
327 diag.warning("could not use '%s' because it was built for a different platform", pathOpened.c_str());
328 return false;
329 }
330
331 // cannot be a static executable
332 if ( (mh->filetype == MH_EXECUTE) && !parser.isDynamicExecutable() ) {
333 diag.warning("could not use '%s' because it is a static executable", pathOpened.c_str());
334 return false;
335 }
336
337 // validate dylib loads
338 if ( !parser.validEmbeddedPaths(diag) )
339 return false;
340
341 // validate segments
342 if ( !parser.validSegments(diag, fileLength) )
343 return false;
344
345 // validate LINKEDIT layout
346 if ( !parser.validLinkeditLayout(diag) )
347 return false;
348
349 return true;
350 }
351
352
353 bool MachOParser::validLoadCommands(Diagnostics& diag, size_t fileLen)
354 {
355 // check load command don't exceed file length
356 if ( header()->sizeofcmds + sizeof(mach_header_64) > fileLen ) {
357 diag.warning("load commands exceed length of file");
358 return false;
359 }
360 // walk all load commands and sanity check them
361 Diagnostics walkDiag;
362 LinkEditInfo lePointers;
363 getLinkEditLoadCommands(walkDiag, lePointers);
364 if ( walkDiag.hasError() ) {
365 diag.warning("%s", walkDiag.errorMessage().c_str());
366 return false;
367 }
368
369 // check load commands fit in TEXT segment
370 __block bool overflowText = false;
371 forEachSegment(^(const char* segName, uint32_t segFileOffset, uint32_t segFileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
372 if ( strcmp(segName, "__TEXT") == 0 ) {
373 if ( header()->sizeofcmds + sizeof(mach_header_64) > segFileSize ) {
374 diag.warning("load commands exceed length of __TEXT segment");
375 overflowText = true;
376 }
377 stop = true;
378 }
379 });
380 if ( overflowText )
381 return false;
382
383 return true;
384 }
385
386 bool MachOParser::validEmbeddedPaths(Diagnostics& diag)
387 {
388 __block int index = 1;
389 __block bool allGood = true;
390 __block bool foundInstallName = false;
391 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
392 const dylib_command* dylibCmd;
393 const rpath_command* rpathCmd;
394 switch ( cmd->cmd ) {
395 case LC_ID_DYLIB:
396 foundInstallName = true;
397 // fall through
398 case LC_LOAD_DYLIB:
399 case LC_LOAD_WEAK_DYLIB:
400 case LC_REEXPORT_DYLIB:
401 case LC_LOAD_UPWARD_DYLIB:
402 dylibCmd = (dylib_command*)cmd;
403 if ( dylibCmd->dylib.name.offset > cmd->cmdsize ) {
404 diag.warning("load command #%d name offset (%u) outside its size (%u)", index, dylibCmd->dylib.name.offset, cmd->cmdsize);
405 stop = true;
406 allGood = false;
407 }
408 else {
409 bool foundEnd = false;
410 const char* start = (char*)dylibCmd + dylibCmd->dylib.name.offset;
411 const char* end = (char*)dylibCmd + cmd->cmdsize;
412 for (const char* s=start; s < end; ++s) {
413 if ( *s == '\0' ) {
414 foundEnd = true;
415 break;
416 }
417 }
418 if ( !foundEnd ) {
419 diag.warning("load command #%d string extends beyond end of load command", index);
420 stop = true;
421 allGood = false;
422 }
423 }
424 break;
425 case LC_RPATH:
426 rpathCmd = (rpath_command*)cmd;
427 if ( rpathCmd->path.offset > cmd->cmdsize ) {
428 diag.warning("load command #%d path offset (%u) outside its size (%u)", index, rpathCmd->path.offset, cmd->cmdsize);
429 stop = true;
430 allGood = false;
431 }
432 else {
433 bool foundEnd = false;
434 const char* start = (char*)rpathCmd + rpathCmd->path.offset;
435 const char* end = (char*)rpathCmd + cmd->cmdsize;
436 for (const char* s=start; s < end; ++s) {
437 if ( *s == '\0' ) {
438 foundEnd = true;
439 break;
440 }
441 }
442 if ( !foundEnd ) {
443 diag.warning("load command #%d string extends beyond end of load command", index);
444 stop = true;
445 allGood = false;
446 }
447 }
448 break;
449 }
450 ++index;
451 });
452
453 if ( header()->filetype == MH_DYLIB ) {
454 if ( !foundInstallName ) {
455 diag.warning("MH_DYLIB is missing LC_ID_DYLIB");
456 allGood = false;
457 }
458 }
459 else {
460 if ( foundInstallName ) {
461 diag.warning("LC_ID_DYLIB found in non-MH_DYLIB");
462 allGood = false;
463 }
464 }
465
466 return allGood;
467 }
468
469 bool MachOParser::validSegments(Diagnostics& diag, size_t fileLen)
470 {
471 // check segment load command size
472 __block bool badSegmentLoadCommand = false;
473 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
474 if ( cmd->cmd == LC_SEGMENT_64 ) {
475 const segment_command_64* seg = (segment_command_64*)cmd;
476 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command_64);
477 if ( sectionsSpace < 0 ) {
478 diag.warning("load command size too small for LC_SEGMENT_64");
479 badSegmentLoadCommand = true;
480 stop = true;
481 }
482 else if ( (sectionsSpace % sizeof(section_64)) != 0 ) {
483 diag.warning("segment load command size 0x%X will not fit whole number of sections", cmd->cmdsize);
484 badSegmentLoadCommand = true;
485 stop = true;
486 }
487 else if ( sectionsSpace != (seg->nsects * sizeof(section_64)) ) {
488 diag.warning("load command size 0x%X does not match nsects %d", cmd->cmdsize, seg->nsects);
489 badSegmentLoadCommand = true;
490 stop = true;
491 } else if (greaterThanAddOrOverflow(seg->fileoff, seg->filesize, fileLen)) {
492 diag.warning("segment load command content extends beyond end of file");
493 badSegmentLoadCommand = true;
494 stop = true;
495 } else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
496 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
497 diag.warning("segment filesize exceeds vmsize");
498 badSegmentLoadCommand = true;
499 stop = true;
500 }
501 }
502 else if ( cmd->cmd == LC_SEGMENT ) {
503 const segment_command* seg = (segment_command*)cmd;
504 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command);
505 if ( sectionsSpace < 0 ) {
506 diag.warning("load command size too small for LC_SEGMENT");
507 badSegmentLoadCommand = true;
508 stop = true;
509 }
510 else if ( (sectionsSpace % sizeof(section)) != 0 ) {
511 diag.warning("segment load command size 0x%X will not fit whole number of sections", cmd->cmdsize);
512 badSegmentLoadCommand = true;
513 stop = true;
514 }
515 else if ( sectionsSpace != (seg->nsects * sizeof(section)) ) {
516 diag.warning("load command size 0x%X does not match nsects %d", cmd->cmdsize, seg->nsects);
517 badSegmentLoadCommand = true;
518 stop = true;
519 } else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
520 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
521 diag.warning("segment filesize exceeds vmsize");
522 badSegmentLoadCommand = true;
523 stop = true;
524 }
525 }
526 });
527 if ( badSegmentLoadCommand )
528 return false;
529
530 // check mapping permissions of segments
531 __block bool badPermissions = false;
532 __block bool badSize = false;
533 __block bool hasTEXT = false;
534 __block bool hasLINKEDIT = false;
535 forEachSegment(^(const char* segName, uint32_t segFileOffset, uint32_t segFileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
536 if ( strcmp(segName, "__TEXT") == 0 ) {
537 if ( protections != (VM_PROT_READ|VM_PROT_EXECUTE) ) {
538 diag.warning("__TEXT segment permissions is not 'r-x'");
539 badPermissions = true;
540 stop = true;
541 }
542 hasTEXT = true;
543 }
544 else if ( strcmp(segName, "__LINKEDIT") == 0 ) {
545 if ( protections != VM_PROT_READ ) {
546 diag.warning("__LINKEDIT segment permissions is not 'r--'");
547 badPermissions = true;
548 stop = true;
549 }
550 hasLINKEDIT = true;
551 }
552 else if ( (protections & 0xFFFFFFF8) != 0 ) {
553 diag.warning("%s segment permissions has invalid bits set", segName);
554 badPermissions = true;
555 stop = true;
556 }
557 if (greaterThanAddOrOverflow(segFileOffset, segFileSize, fileLen)) {
558 diag.warning("%s segment content extends beyond end of file", segName);
559 badSize = true;
560 stop = true;
561 }
562 if ( is64() ) {
563 if ( vmAddr+vmSize < vmAddr ) {
564 diag.warning("%s segment vm range wraps", segName);
565 badSize = true;
566 stop = true;
567 }
568 }
569 else {
570 if ( (uint32_t)(vmAddr+vmSize) < (uint32_t)(vmAddr) ) {
571 diag.warning("%s segment vm range wraps", segName);
572 badSize = true;
573 stop = true;
574 }
575 }
576 });
577 if ( badPermissions || badSize )
578 return false;
579 if ( !hasTEXT ) {
580 diag.warning("missing __TEXT segment");
581 return false;
582 }
583 if ( !hasLINKEDIT ) {
584 diag.warning("missing __LINKEDIT segment");
585 return false;
586 }
587
588 // check for overlapping segments
589 __block bool badSegments = false;
590 forEachSegment(^(const char* seg1Name, uint32_t seg1FileOffset, uint32_t seg1FileSize, uint64_t seg1vmAddr, uint64_t seg1vmSize, uint8_t seg1Protections, uint32_t seg1Index, uint64_t seg1SizeOfSections, uint8_t seg1Align, bool& stop1) {
591 uint64_t seg1vmEnd = seg1vmAddr + seg1vmSize;
592 uint32_t seg1FileEnd = seg1FileOffset + seg1FileSize;
593 forEachSegment(^(const char* seg2Name, uint32_t seg2FileOffset, uint32_t seg2FileSize, uint64_t seg2vmAddr, uint64_t seg2vmSize, uint8_t seg2Protections, uint32_t seg2Index, uint64_t seg2SizeOfSections, uint8_t seg2Align, bool& stop2) {
594 if ( seg1Index == seg2Index )
595 return;
596 uint64_t seg2vmEnd = seg2vmAddr + seg2vmSize;
597 uint32_t seg2FileEnd = seg2FileOffset + seg2FileSize;
598 if ( ((seg2vmAddr <= seg1vmAddr) && (seg2vmEnd > seg1vmAddr) && (seg1vmEnd > seg1vmAddr)) || ((seg2vmAddr >= seg1vmAddr) && (seg2vmAddr < seg1vmEnd) && (seg2vmEnd > seg2vmAddr)) ) {
599 diag.warning("segment %s vm range overlaps segment %s", seg1Name, seg2Name);
600 badSegments = true;
601 stop1 = true;
602 stop2 = true;
603 }
604 if ( ((seg2FileOffset <= seg1FileOffset) && (seg2FileEnd > seg1FileOffset) && (seg1FileEnd > seg1FileOffset)) || ((seg2FileOffset >= seg1FileOffset) && (seg2FileOffset < seg1FileEnd) && (seg2FileEnd > seg2FileOffset)) ) {
605 diag.warning("segment %s file content overlaps segment %s", seg1Name, seg2Name);
606 badSegments = true;
607 stop1 = true;
608 stop2 = true;
609 }
610 // check for out of order segments
611 if ( (seg1Index < seg2Index) && !stop1 ) {
612 if ( (seg1vmAddr > seg2vmAddr) || ((seg1FileOffset > seg2FileOffset) && (seg1FileOffset != 0) && (seg2FileOffset != 0)) ){
613 diag.warning("segment load commands out of order with respect to layout for %s and %s", seg1Name, seg2Name);
614 badSegments = true;
615 stop1 = true;
616 stop2 = true;
617 }
618 }
619 });
620 });
621 if ( badSegments )
622 return false;
623
624 // check sections are within segment
625 __block bool badSections = false;
626 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
627 if ( cmd->cmd == LC_SEGMENT_64 ) {
628 const segment_command_64* seg = (segment_command_64*)cmd;
629 const section_64* const sectionsStart = (section_64*)((char*)seg + sizeof(struct segment_command_64));
630 const section_64* const sectionsEnd = &sectionsStart[seg->nsects];
631 for (const section_64* sect=sectionsStart; (sect < sectionsEnd); ++sect) {
632 if ( (int64_t)(sect->size) < 0 ) {
633 diag.warning("section %s size too large 0x%llX", sect->sectname, sect->size);
634 badSections = true;
635 }
636 else if ( sect->addr < seg->vmaddr ) {
637 diag.warning("section %s start address 0x%llX is before containing segment's address 0x%0llX", sect->sectname, sect->addr, seg->vmaddr);
638 badSections = true;
639 }
640 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
641 diag.warning("section %s end address 0x%llX is beyond containing segment's end address 0x%0llX", sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
642 badSections = true;
643 }
644 }
645 }
646 else if ( cmd->cmd == LC_SEGMENT ) {
647 const segment_command* seg = (segment_command*)cmd;
648 const section* const sectionsStart = (section*)((char*)seg + sizeof(struct segment_command));
649 const section* const sectionsEnd = &sectionsStart[seg->nsects];
650 for (const section* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) {
651 if ( (int64_t)(sect->size) < 0 ) {
652 diag.warning("section %s size too large 0x%X", sect->sectname, sect->size);
653 badSections = true;
654 }
655 else if ( sect->addr < seg->vmaddr ) {
656 diag.warning("section %s start address 0x%X is before containing segment's address 0x%0X", sect->sectname, sect->addr, seg->vmaddr);
657 badSections = true;
658 }
659 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
660 diag.warning("section %s end address 0x%X is beyond containing segment's end address 0x%0X", sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
661 badSections = true;
662 }
663 }
664 }
665 });
666
667 return !badSections;
668 }
669
670 struct LinkEditContent
671 {
672 const char* name;
673 uint32_t stdOrder;
674 uint32_t fileOffsetStart;
675 uint32_t size;
676 };
677
678
679
680 bool MachOParser::validLinkeditLayout(Diagnostics& diag)
681 {
682 LinkEditInfo leInfo;
683 getLinkEditPointers(diag, leInfo);
684 if ( diag.hasError() )
685 return false;
686 const bool is64Bit = is64();
687 const uint32_t pointerSize = (is64Bit ? 8 : 4);
688
689 // build vector of all blobs in LINKEDIT
690 std::vector<LinkEditContent> blobs;
691 if ( leInfo.dyldInfo != nullptr ) {
692 if ( leInfo.dyldInfo->rebase_size != 0 )
693 blobs.push_back({"rebase opcodes", 1, leInfo.dyldInfo->rebase_off, leInfo.dyldInfo->rebase_size});
694 if ( leInfo.dyldInfo->bind_size != 0 )
695 blobs.push_back({"bind opcodes", 2, leInfo.dyldInfo->bind_off, leInfo.dyldInfo->bind_size});
696 if ( leInfo.dyldInfo->weak_bind_size != 0 )
697 blobs.push_back({"weak bind opcodes", 3, leInfo.dyldInfo->weak_bind_off, leInfo.dyldInfo->weak_bind_size});
698 if ( leInfo.dyldInfo->lazy_bind_size != 0 )
699 blobs.push_back({"lazy bind opcodes", 4, leInfo.dyldInfo->lazy_bind_off, leInfo.dyldInfo->lazy_bind_size});
700 if ( leInfo.dyldInfo->export_size!= 0 )
701 blobs.push_back({"exports trie", 5, leInfo.dyldInfo->export_off, leInfo.dyldInfo->export_size});
702 }
703 if ( leInfo.dynSymTab != nullptr ) {
704 if ( leInfo.dynSymTab->nlocrel != 0 )
705 blobs.push_back({"local relocations", 6, leInfo.dynSymTab->locreloff, static_cast<uint32_t>(leInfo.dynSymTab->nlocrel*sizeof(relocation_info))});
706 if ( leInfo.dynSymTab->nextrel != 0 )
707 blobs.push_back({"external relocations", 11, leInfo.dynSymTab->extreloff, static_cast<uint32_t>(leInfo.dynSymTab->nextrel*sizeof(relocation_info))});
708 if ( leInfo.dynSymTab->nindirectsyms != 0 )
709 blobs.push_back({"indirect symbol table", 12, leInfo.dynSymTab->indirectsymoff, leInfo.dynSymTab->nindirectsyms*4});
710 }
711 if ( leInfo.splitSegInfo != nullptr ) {
712 if ( leInfo.splitSegInfo->datasize != 0 )
713 blobs.push_back({"shared cache info", 6, leInfo.splitSegInfo->dataoff, leInfo.splitSegInfo->datasize});
714 }
715 if ( leInfo.functionStarts != nullptr ) {
716 if ( leInfo.functionStarts->datasize != 0 )
717 blobs.push_back({"function starts", 7, leInfo.functionStarts->dataoff, leInfo.functionStarts->datasize});
718 }
719 if ( leInfo.dataInCode != nullptr ) {
720 if ( leInfo.dataInCode->datasize != 0 )
721 blobs.push_back({"data in code", 8, leInfo.dataInCode->dataoff, leInfo.dataInCode->datasize});
722 }
723 if ( leInfo.symTab != nullptr ) {
724 if ( leInfo.symTab->nsyms != 0 )
725 blobs.push_back({"symbol table", 10, leInfo.symTab->symoff, static_cast<uint32_t>(leInfo.symTab->nsyms*(is64Bit ? sizeof(nlist_64) : sizeof(struct nlist)))});
726 if ( leInfo.symTab->strsize != 0 )
727 blobs.push_back({"symbol table strings", 20, leInfo.symTab->stroff, leInfo.symTab->strsize});
728 }
729 if ( leInfo.codeSig != nullptr ) {
730 if ( leInfo.codeSig->datasize != 0 )
731 blobs.push_back({"code signature", 21, leInfo.codeSig->dataoff, leInfo.codeSig->datasize});
732 }
733
734 // check for bad combinations
735 if ( (leInfo.dyldInfo != nullptr) && (leInfo.dyldInfo->cmd == LC_DYLD_INFO_ONLY) && (leInfo.dynSymTab != nullptr) ) {
736 if ( leInfo.dynSymTab->nlocrel != 0 ) {
737 diag.error("malformed mach-o contains LC_DYLD_INFO_ONLY and local relocations");
738 return false;
739 }
740 if ( leInfo.dynSymTab->nextrel != 0 ) {
741 diag.error("malformed mach-o contains LC_DYLD_INFO_ONLY and external relocations");
742 return false;
743 }
744 }
745 if ( (leInfo.dyldInfo == nullptr) && (leInfo.dynSymTab == nullptr) ) {
746 diag.error("malformed mach-o misssing LC_DYLD_INFO and LC_DYSYMTAB");
747 return false;
748 }
749 if ( blobs.empty() ) {
750 diag.error("malformed mach-o misssing LINKEDIT");
751 return false;
752 }
753
754 // sort vector by file offset and error on overlaps
755 std::sort(blobs.begin(), blobs.end(), [&](const LinkEditContent& a, const LinkEditContent& b) {
756 return a.fileOffsetStart < b.fileOffsetStart;
757 });
758 uint32_t prevEnd = (uint32_t)(leInfo.layout.segments[leInfo.layout.linkeditSegIndex].fileOffset);
759 const char* prevName = "start of LINKEDIT";
760 for (const LinkEditContent& blob : blobs) {
761 if ( blob.fileOffsetStart < prevEnd ) {
762 diag.error("LINKEDIT overlap of %s and %s", prevName, blob.name);
763 return false;
764 }
765 prevEnd = blob.fileOffsetStart + blob.size;
766 prevName = blob.name;
767 }
768 const LinkEditContent& lastBlob = blobs.back();
769 uint32_t linkeditFileEnd = (uint32_t)(leInfo.layout.segments[leInfo.layout.linkeditSegIndex].fileOffset + leInfo.layout.segments[leInfo.layout.linkeditSegIndex].fileSize);
770 if (greaterThanAddOrOverflow(lastBlob.fileOffsetStart, lastBlob.size, linkeditFileEnd)) {
771 diag.error("LINKEDIT content '%s' extends beyond end of segment", lastBlob.name);
772 return false;
773 }
774
775 // sort vector by order and warn on non standard order or mis-alignment
776 std::sort(blobs.begin(), blobs.end(), [&](const LinkEditContent& a, const LinkEditContent& b) {
777 return a.stdOrder < b.stdOrder;
778 });
779 prevEnd = (uint32_t)(leInfo.layout.segments[leInfo.layout.linkeditSegIndex].fileOffset);
780 prevName = "start of LINKEDIT";
781 for (const LinkEditContent& blob : blobs) {
782 if ( ((blob.fileOffsetStart & (pointerSize-1)) != 0) && (blob.stdOrder != 20) ) // ok for "symbol table strings" to be mis-aligned
783 diag.warning("mis-aligned LINKEDIT content '%s'", blob.name);
784 if ( blob.fileOffsetStart < prevEnd ) {
785 diag.warning("LINKEDIT out of order %s", blob.name);
786 }
787 prevEnd = blob.fileOffsetStart;
788 prevName = blob.name;
789 }
790
791 // Check for invalid symbol table sizes
792 if ( leInfo.symTab != nullptr ) {
793 if ( leInfo.symTab->nsyms > 0x10000000 ) {
794 diag.error("malformed mach-o image: symbol table too large");
795 return false;
796 }
797 if ( leInfo.dynSymTab != nullptr ) {
798 // validate indirect symbol table
799 if ( leInfo.dynSymTab->nindirectsyms != 0 ) {
800 if ( leInfo.dynSymTab->nindirectsyms > 0x10000000 ) {
801 diag.error("malformed mach-o image: indirect symbol table too large");
802 return false;
803 }
804 }
805 if ( (leInfo.dynSymTab->nlocalsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->ilocalsym > leInfo.symTab->nsyms) ) {
806 diag.error("malformed mach-o image: indirect symbol table local symbol count exceeds total symbols");
807 return false;
808 }
809 if ( leInfo.dynSymTab->ilocalsym + leInfo.dynSymTab->nlocalsym < leInfo.dynSymTab->ilocalsym ) {
810 diag.error("malformed mach-o image: indirect symbol table local symbol count wraps");
811 return false;
812 }
813 if ( (leInfo.dynSymTab->nextdefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iextdefsym > leInfo.symTab->nsyms) ) {
814 diag.error("malformed mach-o image: indirect symbol table extern symbol count exceeds total symbols");
815 return false;
816 }
817 if ( leInfo.dynSymTab->iextdefsym + leInfo.dynSymTab->nextdefsym < leInfo.dynSymTab->iextdefsym ) {
818 diag.error("malformed mach-o image: indirect symbol table extern symbol count wraps");
819 return false;
820 }
821 if ( (leInfo.dynSymTab->nundefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iundefsym > leInfo.symTab->nsyms) ) {
822 diag.error("malformed mach-o image: indirect symbol table undefined symbol count exceeds total symbols");
823 return false;
824 }
825 if ( leInfo.dynSymTab->iundefsym + leInfo.dynSymTab->nundefsym < leInfo.dynSymTab->iundefsym ) {
826 diag.error("malformed mach-o image: indirect symbol table undefined symbol count wraps");
827 return false;
828 }
829 }
830 }
831
832 return true;
833 }
834
835 bool MachOParser::isArch(const mach_header* mh, const std::string& archName)
836 {
837 for (const ArchInfo& info : _s_archInfos) {
838 if ( archName == info.name ) {
839 return ( (mh->cputype == info.cputype) && ((mh->cpusubtype & ~CPU_SUBTYPE_MASK) == info.cpusubtype) );
840 }
841 }
842 return false;
843 }
844
845
846 std::string MachOParser::archName(uint32_t cputype, uint32_t cpusubtype)
847 {
848 for (const ArchInfo& info : _s_archInfos) {
849 if ( (cputype == info.cputype) && ((cpusubtype & ~CPU_SUBTYPE_MASK) == info.cpusubtype) ) {
850 return info.name;
851 }
852 }
853 return "unknown";
854 }
855
856 uint32_t MachOParser::cpuTypeFromArchName(const std::string& archName)
857 {
858 for (const ArchInfo& info : _s_archInfos) {
859 if ( archName == info.name ) {
860 return info.cputype;
861 }
862 }
863 return 0;
864 }
865
866 uint32_t MachOParser::cpuSubtypeFromArchName(const std::string& archName)
867 {
868 for (const ArchInfo& info : _s_archInfos) {
869 if ( archName == info.name ) {
870 return info.cpusubtype;
871 }
872 }
873 return 0;
874 }
875
876 std::string MachOParser::archName() const
877 {
878 return archName(header()->cputype, header()->cpusubtype);
879 }
880
881 std::string MachOParser::platformName(Platform platform)
882 {
883 switch ( platform ) {
884 case Platform::unknown:
885 return "unknown";
886 case Platform::macOS:
887 return "macOS";
888 case Platform::iOS:
889 return "iOS";
890 case Platform::tvOS:
891 return "tvOS";
892 case Platform::watchOS:
893 return "watchOS";
894 case Platform::bridgeOS:
895 return "bridgeOS";
896 }
897 return "unknown platform";
898 }
899
900 std::string MachOParser::versionString(uint32_t packedVersion)
901 {
902 char buff[64];
903 sprintf(buff, "%d.%d.%d", (packedVersion >> 16), ((packedVersion >> 8) & 0xFF), (packedVersion & 0xFF));
904 return buff;
905 }
906
907 #else
908
909 bool MachOParser::isMachO(Diagnostics& diag, const void* fileContent, size_t mappedLength)
910 {
911 // sanity check length
912 if ( mappedLength < 4096 ) {
913 diag.error("file too short");
914 return false;
915 }
916
917 // must start with mach-o magic value
918 const mach_header* mh = (const mach_header*)fileContent;
919 #if __LP64__
920 const uint32_t requiredMagic = MH_MAGIC_64;
921 #else
922 const uint32_t requiredMagic = MH_MAGIC;
923 #endif
924 if ( mh->magic != requiredMagic ) {
925 diag.error("not a mach-o file");
926 return false;
927 }
928
929 #if __x86_64__
930 const uint32_t requiredCPU = CPU_TYPE_X86_64;
931 #elif __i386__
932 const uint32_t requiredCPU = CPU_TYPE_I386;
933 #elif __arm__
934 const uint32_t requiredCPU = CPU_TYPE_ARM;
935 #elif __arm64__
936 const uint32_t requiredCPU = CPU_TYPE_ARM64;
937 #else
938 #error unsupported architecture
939 #endif
940 if ( mh->cputype != requiredCPU ) {
941 diag.error("wrong cpu type");
942 return false;
943 }
944
945 return true;
946 }
947
948 bool MachOParser::wellFormedMachHeaderAndLoadCommands(const mach_header* mh)
949 {
950 const load_command* startCmds = nullptr;
951 if ( mh->magic == MH_MAGIC_64 )
952 startCmds = (load_command*)((char *)mh + sizeof(mach_header_64));
953 else if ( mh->magic == MH_MAGIC )
954 startCmds = (load_command*)((char *)mh + sizeof(mach_header));
955 else
956 return false; // not a mach-o file, or wrong endianness
957
958 const load_command* const cmdsEnd = (load_command*)((char*)startCmds + mh->sizeofcmds);
959 const load_command* cmd = startCmds;
960 for(uint32_t i = 0; i < mh->ncmds; ++i) {
961 const load_command* nextCmd = (load_command*)((char *)cmd + cmd->cmdsize);
962 if ( (cmd->cmdsize < 8) || (nextCmd > cmdsEnd) || (nextCmd < startCmds)) {
963 return false;
964 }
965 cmd = nextCmd;
966 }
967 return true;
968 }
969
970 #endif
971
972 Platform MachOParser::currentPlatform()
973 {
974 #if TARGET_OS_BRIDGE
975 return Platform::bridgeOS;
976 #elif TARGET_OS_WATCH
977 return Platform::watchOS;
978 #elif TARGET_OS_TV
979 return Platform::tvOS;
980 #elif TARGET_OS_IOS
981 return Platform::iOS;
982 #elif TARGET_OS_MAC
983 return Platform::macOS;
984 #else
985 #error unknown platform
986 #endif
987 }
988
989
990 bool MachOParser::valid(Diagnostics& diag)
991 {
992 #if DYLD_IN_PROCESS
993 // only images loaded by dyld to be parsed
994 const mach_header* inImage = dyld3::dyld_image_header_containing_address(header());
995 if ( inImage != header() ) {
996 diag.error("only dyld loaded images can be parsed by MachOParser");
997 return false;
998 }
999 #else
1000
1001 #endif
1002 return true;
1003 }
1004
1005
1006 void MachOParser::forEachLoadCommand(Diagnostics& diag, void (^callback)(const load_command* cmd, bool& stop)) const
1007 {
1008 bool stop = false;
1009 const load_command* startCmds = nullptr;
1010 if ( header()->magic == MH_MAGIC_64 )
1011 startCmds = (load_command*)((char *)header() + sizeof(mach_header_64));
1012 else if ( header()->magic == MH_MAGIC )
1013 startCmds = (load_command*)((char *)header() + sizeof(mach_header));
1014 else {
1015 diag.error("file does not start with MH_MAGIC[_64]");
1016 return; // not a mach-o file, or wrong endianness
1017 }
1018 const load_command* const cmdsEnd = (load_command*)((char*)startCmds + header()->sizeofcmds);
1019 const load_command* cmd = startCmds;
1020 for(uint32_t i = 0; i < header()->ncmds; ++i) {
1021 const load_command* nextCmd = (load_command*)((char *)cmd + cmd->cmdsize);
1022 if ( cmd->cmdsize < 8 ) {
1023 diag.error("malformed load command #%d, size too small %d", i, cmd->cmdsize);
1024 return;
1025 }
1026 if ( (nextCmd > cmdsEnd) || (nextCmd < startCmds) ) {
1027 diag.error("malformed load command #%d, size too large 0x%X", i, cmd->cmdsize);
1028 return;
1029 }
1030 callback(cmd, stop);
1031 if ( stop )
1032 return;
1033 cmd = nextCmd;
1034 }
1035 }
1036
1037 UUID MachOParser::uuid() const
1038 {
1039 uuid_t uuid;
1040 getUuid(uuid);
1041 return uuid;
1042 }
1043
1044 bool MachOParser::getUuid(uuid_t uuid) const
1045 {
1046 Diagnostics diag;
1047 __block bool found = false;
1048 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1049 if ( cmd->cmd == LC_UUID ) {
1050 const uuid_command* uc = (const uuid_command*)cmd;
1051 memcpy(uuid, uc->uuid, sizeof(uuid_t));
1052 found = true;
1053 stop = true;
1054 }
1055 });
1056 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1057 if ( !found )
1058 bzero(uuid, sizeof(uuid_t));
1059 return found;
1060 }
1061
1062 uint64_t MachOParser::preferredLoadAddress() const
1063 {
1064 __block uint64_t result = 0;
1065 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
1066 if ( strcmp(segName, "__TEXT") == 0 ) {
1067 result = vmAddr;
1068 stop = true;
1069 }
1070 });
1071 return result;
1072 }
1073
1074 bool MachOParser::getPlatformAndVersion(Platform* platform, uint32_t* minOS, uint32_t* sdk) const
1075 {
1076 Diagnostics diag;
1077 __block bool found = false;
1078 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1079 const version_min_command* versCmd;
1080 switch ( cmd->cmd ) {
1081 case LC_VERSION_MIN_IPHONEOS:
1082 versCmd = (version_min_command*)cmd;
1083 *platform = Platform::iOS;
1084 *minOS = versCmd->version;
1085 *sdk = versCmd->sdk;
1086 found = true;
1087 stop = true;
1088 break;
1089 case LC_VERSION_MIN_MACOSX:
1090 versCmd = (version_min_command*)cmd;
1091 *platform = Platform::macOS;
1092 *minOS = versCmd->version;
1093 *sdk = versCmd->sdk;
1094 found = true;
1095 stop = true;
1096 break;
1097 case LC_VERSION_MIN_TVOS:
1098 versCmd = (version_min_command*)cmd;
1099 *platform = Platform::tvOS;
1100 *minOS = versCmd->version;
1101 *sdk = versCmd->sdk;
1102 found = true;
1103 stop = true;
1104 break;
1105 case LC_VERSION_MIN_WATCHOS:
1106 versCmd = (version_min_command*)cmd;
1107 *platform = Platform::watchOS;
1108 *minOS = versCmd->version;
1109 *sdk = versCmd->sdk;
1110 found = true;
1111 stop = true;
1112 break;
1113 case LC_BUILD_VERSION: {
1114 const build_version_command* buildCmd = (build_version_command *)cmd;
1115 *minOS = buildCmd->minos;
1116 *sdk = buildCmd->sdk;
1117
1118 switch(buildCmd->platform) {
1119 /* Known values for the platform field above. */
1120 case PLATFORM_MACOS:
1121 *platform = Platform::macOS;
1122 break;
1123 case PLATFORM_IOS:
1124 *platform = Platform::iOS;
1125 break;
1126 case PLATFORM_TVOS:
1127 *platform = Platform::tvOS;
1128 break;
1129 case PLATFORM_WATCHOS:
1130 *platform = Platform::watchOS;
1131 break;
1132 case PLATFORM_BRIDGEOS:
1133 *platform = Platform::bridgeOS;
1134 break;
1135 }
1136 found = true;
1137 stop = true;
1138 } break;
1139 }
1140 });
1141 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1142 return found;
1143 }
1144
1145
1146 bool MachOParser::isSimulatorBinary() const
1147 {
1148 Platform platform;
1149 uint32_t minOS;
1150 uint32_t sdk;
1151 switch ( header()->cputype ) {
1152 case CPU_TYPE_I386:
1153 case CPU_TYPE_X86_64:
1154 if ( getPlatformAndVersion(&platform, &minOS, &sdk) ) {
1155 return (platform != Platform::macOS);
1156 }
1157 break;
1158 }
1159 return false;
1160 }
1161
1162
1163 bool MachOParser::getDylibInstallName(const char** installName, uint32_t* compatVersion, uint32_t* currentVersion) const
1164 {
1165 Diagnostics diag;
1166 __block bool found = false;
1167 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1168 if ( cmd->cmd == LC_ID_DYLIB ) {
1169 const dylib_command* dylibCmd = (dylib_command*)cmd;
1170 *compatVersion = dylibCmd->dylib.compatibility_version;
1171 *currentVersion = dylibCmd->dylib.current_version;
1172 *installName = (char*)dylibCmd + dylibCmd->dylib.name.offset;
1173 found = true;
1174 stop = true;
1175 }
1176 });
1177 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1178 return found;
1179 }
1180
1181 const char* MachOParser::installName() const
1182 {
1183 assert(header()->filetype == MH_DYLIB);
1184 const char* result;
1185 uint32_t ignoreVersion;
1186 assert(getDylibInstallName(&result, &ignoreVersion, &ignoreVersion));
1187 return result;
1188 }
1189
1190
1191 uint32_t MachOParser::dependentDylibCount() const
1192 {
1193 __block uint32_t count = 0;
1194 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
1195 ++count;
1196 });
1197 return count;
1198 }
1199
1200 const char* MachOParser::dependentDylibLoadPath(uint32_t depIndex) const
1201 {
1202 __block const char* foundLoadPath = nullptr;
1203 __block uint32_t curDepIndex = 0;
1204 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
1205 if ( curDepIndex == depIndex ) {
1206 foundLoadPath = loadPath;
1207 stop = true;
1208 }
1209 ++curDepIndex;
1210 });
1211 return foundLoadPath;
1212 }
1213
1214
1215 void MachOParser::forEachDependentDylib(void (^callback)(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop)) const
1216 {
1217 Diagnostics diag;
1218 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1219 switch ( cmd->cmd ) {
1220 case LC_LOAD_DYLIB:
1221 case LC_LOAD_WEAK_DYLIB:
1222 case LC_REEXPORT_DYLIB:
1223 case LC_LOAD_UPWARD_DYLIB: {
1224 const dylib_command* dylibCmd = (dylib_command*)cmd;
1225 assert(dylibCmd->dylib.name.offset < cmd->cmdsize);
1226 const char* loadPath = (char*)dylibCmd + dylibCmd->dylib.name.offset;
1227 callback(loadPath, (cmd->cmd == LC_LOAD_WEAK_DYLIB), (cmd->cmd == LC_REEXPORT_DYLIB), (cmd->cmd == LC_LOAD_UPWARD_DYLIB),
1228 dylibCmd->dylib.compatibility_version, dylibCmd->dylib.current_version, stop);
1229 }
1230 break;
1231 }
1232 });
1233 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1234 }
1235
1236 void MachOParser::forEachRPath(void (^callback)(const char* rPath, bool& stop)) const
1237 {
1238 Diagnostics diag;
1239 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1240 if ( cmd->cmd == LC_RPATH ) {
1241 const char* rpath = (char*)cmd + ((struct rpath_command*)cmd)->path.offset;
1242 callback(rpath, stop);
1243 }
1244 });
1245 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1246 }
1247
1248 /*
1249 struct LayoutInfo {
1250 #if DYLD_IN_PROCESS
1251 uintptr_t slide;
1252 uintptr_t textUnslidVMAddr;
1253 uintptr_t linkeditUnslidVMAddr;
1254 uint32_t linkeditFileOffset;
1255 #else
1256 uint32_t segmentCount;
1257 uint32_t linkeditSegIndex;
1258 struct {
1259 uint64_t mappingOffset;
1260 uint64_t fileOffset;
1261 uint64_t segUnslidAddress;
1262 uint64_t segSize;
1263 } segments[16];
1264 #endif
1265 };
1266 */
1267
1268 #if !DYLD_IN_PROCESS
1269 const uint8_t* MachOParser::getContentForVMAddr(const LayoutInfo& info, uint64_t addr) const
1270 {
1271 for (uint32_t i=0; i < info.segmentCount; ++i) {
1272 if ( (addr >= info.segments[i].segUnslidAddress) && (addr < (info.segments[i].segUnslidAddress+info.segments[i].segSize)) )
1273 return (uint8_t*)header() + info.segments[i].mappingOffset + (addr - info.segments[i].segUnslidAddress);
1274 }
1275 // value is outside this image. could be pointer into another image
1276 if ( inDyldCache() ) {
1277 return (uint8_t*)header() + info.segments[0].mappingOffset + (addr - info.segments[0].segUnslidAddress);
1278 }
1279 assert(0 && "address not found in segment");
1280 return nullptr;
1281 }
1282 #endif
1283
1284 const uint8_t* MachOParser::getLinkEditContent(const LayoutInfo& info, uint32_t fileOffset) const
1285 {
1286 #if DYLD_IN_PROCESS
1287 uint32_t offsetInLinkedit = fileOffset - info.linkeditFileOffset;
1288 uintptr_t linkeditStartAddr = info.linkeditUnslidVMAddr + info.slide;
1289 return (uint8_t*)(linkeditStartAddr + offsetInLinkedit);
1290 #else
1291 uint32_t offsetInLinkedit = fileOffset - (uint32_t)(info.segments[info.linkeditSegIndex].fileOffset);
1292 const uint8_t* linkeditStart = (uint8_t*)header() + info.segments[info.linkeditSegIndex].mappingOffset;
1293 return linkeditStart + offsetInLinkedit;
1294 #endif
1295 }
1296
1297
1298 void MachOParser::getLayoutInfo(LayoutInfo& result) const
1299 {
1300 #if DYLD_IN_PROCESS
1301 // image loaded by dyld, just record the addr and file offset of TEXT and LINKEDIT segments
1302 result.slide = getSlide();
1303 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
1304 if ( strcmp(segName, "__TEXT") == 0 ) {
1305 result.textUnslidVMAddr = (uintptr_t)vmAddr;
1306 }
1307 else if ( strcmp(segName, "__LINKEDIT") == 0 ) {
1308 result.linkeditUnslidVMAddr = (uintptr_t)vmAddr;
1309 result.linkeditFileOffset = fileOffset;
1310 }
1311 });
1312 #else
1313 bool inCache = inDyldCache();
1314 bool intel32 = (header()->cputype == CPU_TYPE_I386);
1315 result.segmentCount = 0;
1316 result.linkeditSegIndex = 0xFFFFFFFF;
1317 __block uint64_t textSegAddr = 0;
1318 __block uint64_t textSegFileOffset = 0;
1319 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
1320 auto& segInfo = result.segments[result.segmentCount];
1321 if ( strcmp(segName, "__TEXT") == 0 ) {
1322 textSegAddr = vmAddr;
1323 textSegFileOffset = fileOffset;
1324 }
1325 __block bool textRelocsAllowed = false;
1326 if ( intel32 ) {
1327 forEachSection(^(const char* curSegName, uint32_t segIndex, uint64_t segVMAddr, const char* sectionName, uint32_t sectFlags,
1328 uint64_t sectAddr, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& sectStop) {
1329 if ( strcmp(curSegName, segName) == 0 ) {
1330 if ( sectFlags & (S_ATTR_EXT_RELOC|S_ATTR_LOC_RELOC) ) {
1331 textRelocsAllowed = true;
1332 sectStop = true;
1333 }
1334 }
1335 });
1336 }
1337 if ( inCache ) {
1338 if ( inRawCache() ) {
1339 // whole cache file mapped somewhere (padding not expanded)
1340 // vmaddrs are useless. only file offset make sense
1341 segInfo.mappingOffset = fileOffset - textSegFileOffset;
1342 }
1343 else {
1344 // cache file was loaded by dyld into shared region
1345 // vmaddrs of segments are correct except for ASLR slide
1346 segInfo.mappingOffset = vmAddr - textSegAddr;
1347 }
1348 }
1349 else {
1350 // individual mach-o file mapped in one region, so mappingOffset == fileOffset
1351 segInfo.mappingOffset = fileOffset;
1352 }
1353 segInfo.fileOffset = fileOffset;
1354 segInfo.fileSize = fileSize;
1355 segInfo.segUnslidAddress = vmAddr;
1356 segInfo.segSize = vmSize;
1357 segInfo.writable = ((protections & VM_PROT_WRITE) == VM_PROT_WRITE);
1358 segInfo.executable = ((protections & VM_PROT_EXECUTE) == VM_PROT_EXECUTE);
1359 segInfo.textRelocsAllowed = textRelocsAllowed;
1360 if ( strcmp(segName, "__LINKEDIT") == 0 ) {
1361 result.linkeditSegIndex = result.segmentCount;
1362 }
1363 ++result.segmentCount;
1364 if ( result.segmentCount > 127 )
1365 stop = true;
1366 });
1367 #endif
1368 }
1369
1370
1371 void MachOParser::forEachSection(void (^callback)(const char* segName, const char* sectionName, uint32_t flags,
1372 const void* content, size_t size, bool illegalSectionSize, bool& stop)) const
1373 {
1374 forEachSection(^(const char* segName, const char* sectionName, uint32_t flags, uint64_t addr,
1375 const void* content, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& stop) {
1376 callback(segName, sectionName, flags, content, (size_t)size, illegalSectionSize, stop);
1377 });
1378 }
1379
1380 void MachOParser::forEachSection(void (^callback)(const char* segName, const char* sectionName, uint32_t flags, uint64_t addr,
1381 const void* content, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2,
1382 bool illegalSectionSize, bool& stop)) const
1383 {
1384 Diagnostics diag;
1385 //fprintf(stderr, "forEachSection() mh=%p\n", header());
1386 LayoutInfo layout;
1387 getLayoutInfo(layout);
1388 forEachSection(^(const char* segName, uint32_t segIndex, uint64_t segVMAddr, const char* sectionName, uint32_t sectFlags,
1389 uint64_t sectAddr, uint64_t sectSize, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& stop) {
1390 #if DYLD_IN_PROCESS
1391 const uint8_t* segContentStart = (uint8_t*)(segVMAddr + layout.slide);
1392 #else
1393 const uint8_t* segContentStart = (uint8_t*)header() + layout.segments[segIndex].mappingOffset;
1394 #endif
1395 const void* contentAddr = segContentStart + (sectAddr - segVMAddr);
1396 callback(segName, sectionName, sectFlags, sectAddr, contentAddr, sectSize, alignP2, reserved1, reserved2, illegalSectionSize, stop);
1397 });
1398
1399 }
1400
1401 // this iterator just walks the segment/section array. It does interpret addresses
1402 void MachOParser::forEachSection(void (^callback)(const char* segName, uint32_t segIndex, uint64_t segVMAddr, const char* sectionName, uint32_t sectFlags,
1403 uint64_t sectAddr, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& stop)) const
1404 {
1405 Diagnostics diag;
1406 //fprintf(stderr, "forEachSection() mh=%p\n", header());
1407 __block uint32_t segIndex = 0;
1408 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1409 if ( cmd->cmd == LC_SEGMENT_64 ) {
1410 const segment_command_64* seg = (segment_command_64*)cmd;
1411 const section_64* const sectionsStart = (section_64*)((char*)seg + sizeof(struct segment_command_64));
1412 const section_64* const sectionsEnd = &sectionsStart[seg->nsects];
1413 for (const section_64* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) {
1414 const char* sectName = sect->sectname;
1415 char sectNameCopy[20];
1416 if ( sectName[15] != '\0' ) {
1417 strlcpy(sectNameCopy, sectName, 17);
1418 sectName = sectNameCopy;
1419 }
1420 bool illegalSectionSize = (sect->addr < seg->vmaddr) || greaterThanAddOrOverflow(sect->addr, sect->size, seg->vmaddr + seg->filesize);
1421 callback(seg->segname, segIndex, seg->vmaddr, sectName, sect->flags, sect->addr, sect->size, sect->align, sect->reserved1, sect->reserved2, illegalSectionSize, stop);
1422 }
1423 ++segIndex;
1424 }
1425 else if ( cmd->cmd == LC_SEGMENT ) {
1426 const segment_command* seg = (segment_command*)cmd;
1427 const section* const sectionsStart = (section*)((char*)seg + sizeof(struct segment_command));
1428 const section* const sectionsEnd = &sectionsStart[seg->nsects];
1429 for (const section* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) {
1430 const char* sectName = sect->sectname;
1431 char sectNameCopy[20];
1432 if ( sectName[15] != '\0' ) {
1433 strlcpy(sectNameCopy, sectName, 17);
1434 sectName = sectNameCopy;
1435 }
1436 bool illegalSectionSize = (sect->addr < seg->vmaddr) || greaterThanAddOrOverflow(sect->addr, sect->size, seg->vmaddr + seg->filesize);
1437 callback(seg->segname, segIndex, seg->vmaddr, sectName, sect->flags, sect->addr, sect->size, sect->align, sect->reserved1, sect->reserved2, illegalSectionSize, stop);
1438 }
1439 ++segIndex;
1440 }
1441 });
1442 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1443 }
1444
1445 void MachOParser::forEachGlobalSymbol(Diagnostics& diag, void (^callback)(const char* symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop)) const
1446 {
1447 LinkEditInfo leInfo;
1448 getLinkEditPointers(diag, leInfo);
1449 if ( diag.hasError() )
1450 return;
1451
1452 const bool is64Bit = is64();
1453 if ( leInfo.symTab != nullptr ) {
1454 uint32_t globalsStartIndex = 0;
1455 uint32_t globalsCount = leInfo.symTab->nsyms;
1456 if ( leInfo.dynSymTab != nullptr ) {
1457 globalsStartIndex = leInfo.dynSymTab->iextdefsym;
1458 globalsCount = leInfo.dynSymTab->nextdefsym;
1459 }
1460 uint32_t maxStringOffset = leInfo.symTab->strsize;
1461 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1462 const struct nlist* symbols = (struct nlist*) (getLinkEditContent(leInfo.layout, leInfo.symTab->symoff));
1463 const struct nlist_64* symbols64 = (struct nlist_64*)(getLinkEditContent(leInfo.layout, leInfo.symTab->symoff));
1464 bool stop = false;
1465 for (uint32_t i=0; (i < globalsCount) && !stop; ++i) {
1466 if ( is64Bit ) {
1467 const struct nlist_64& sym = symbols64[globalsStartIndex+i];
1468 if ( sym.n_un.n_strx > maxStringOffset )
1469 continue;
1470 if ( (sym.n_type & N_EXT) && ((sym.n_type & N_TYPE) == N_SECT) && ((sym.n_type & N_STAB) == 0) )
1471 callback(&stringPool[sym.n_un.n_strx], sym.n_value, sym.n_type, sym.n_sect, sym.n_desc, stop);
1472 }
1473 else {
1474 const struct nlist& sym = symbols[globalsStartIndex+i];
1475 if ( sym.n_un.n_strx > maxStringOffset )
1476 continue;
1477 if ( (sym.n_type & N_EXT) && ((sym.n_type & N_TYPE) == N_SECT) && ((sym.n_type & N_STAB) == 0) )
1478 callback(&stringPool[sym.n_un.n_strx], sym.n_value, sym.n_type, sym.n_sect, sym.n_desc, stop);
1479 }
1480 }
1481 }
1482 }
1483
1484 void MachOParser::forEachLocalSymbol(Diagnostics& diag, void (^callback)(const char* symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop)) const
1485 {
1486 LinkEditInfo leInfo;
1487 getLinkEditPointers(diag, leInfo);
1488 if ( diag.hasError() )
1489 return;
1490
1491 const bool is64Bit = is64();
1492 if ( leInfo.symTab != nullptr ) {
1493 uint32_t localsStartIndex = 0;
1494 uint32_t localsCount = leInfo.symTab->nsyms;
1495 if ( leInfo.dynSymTab != nullptr ) {
1496 localsStartIndex = leInfo.dynSymTab->ilocalsym;
1497 localsCount = leInfo.dynSymTab->nlocalsym;
1498 }
1499 uint32_t maxStringOffset = leInfo.symTab->strsize;
1500 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1501 const struct nlist* symbols = (struct nlist*) (getLinkEditContent(leInfo.layout, leInfo.symTab->symoff));
1502 const struct nlist_64* symbols64 = (struct nlist_64*)(getLinkEditContent(leInfo.layout, leInfo.symTab->symoff));
1503 bool stop = false;
1504 for (uint32_t i=0; (i < localsCount) && !stop; ++i) {
1505 if ( is64Bit ) {
1506 const struct nlist_64& sym = symbols64[localsStartIndex+i];
1507 if ( sym.n_un.n_strx > maxStringOffset )
1508 continue;
1509 if ( ((sym.n_type & N_EXT) == 0) && ((sym.n_type & N_TYPE) == N_SECT) && ((sym.n_type & N_STAB) == 0) )
1510 callback(&stringPool[sym.n_un.n_strx], sym.n_value, sym.n_type, sym.n_sect, sym.n_desc, stop);
1511 }
1512 else {
1513 const struct nlist& sym = symbols[localsStartIndex+i];
1514 if ( sym.n_un.n_strx > maxStringOffset )
1515 continue;
1516 if ( ((sym.n_type & N_EXT) == 0) && ((sym.n_type & N_TYPE) == N_SECT) && ((sym.n_type & N_STAB) == 0) )
1517 callback(&stringPool[sym.n_un.n_strx], sym.n_value, sym.n_type, sym.n_sect, sym.n_desc, stop);
1518 }
1519 }
1520 }
1521 }
1522
1523
1524 bool MachOParser::findExportedSymbol(Diagnostics& diag, const char* symbolName, void* extra, FoundSymbol& foundInfo, DependentFinder findDependent) const
1525 {
1526 LinkEditInfo leInfo;
1527 getLinkEditPointers(diag, leInfo);
1528 if ( diag.hasError() )
1529 return false;
1530 if ( leInfo.dyldInfo != nullptr ) {
1531 const uint8_t* trieStart = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->export_off);
1532 const uint8_t* trieEnd = trieStart + leInfo.dyldInfo->export_size;
1533 const uint8_t* node = trieWalk(diag, trieStart, trieEnd, symbolName);
1534 if ( node == nullptr ) {
1535 // symbol not exported from this image. Seach any re-exported dylibs
1536 __block unsigned depIndex = 0;
1537 __block bool foundInReExportedDylib = false;
1538 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
1539 if ( isReExport && findDependent ) {
1540 const mach_header* depMH;
1541 void* depExtra;
1542 if ( findDependent(depIndex, loadPath, extra, &depMH, &depExtra) ) {
1543 bool depInRawCache = inRawCache() && (depMH->flags & 0x80000000);
1544 MachOParser dep(depMH, depInRawCache);
1545 if ( dep.findExportedSymbol(diag, symbolName, depExtra, foundInfo, findDependent) ) {
1546 stop = true;
1547 foundInReExportedDylib = true;
1548 }
1549 }
1550 else {
1551 fprintf(stderr, "could not find re-exported dylib %s\n", loadPath);
1552 }
1553 }
1554 ++depIndex;
1555 });
1556 return foundInReExportedDylib;
1557 }
1558 const uint8_t* p = node;
1559 const uint64_t flags = read_uleb128(diag, p, trieEnd);
1560 if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
1561 if ( !findDependent )
1562 return false;
1563 // re-export from another dylib, lookup there
1564 const uint64_t ordinal = read_uleb128(diag, p, trieEnd);
1565 const char* importedName = (char*)p;
1566 if ( importedName[0] == '\0' )
1567 importedName = symbolName;
1568 assert(ordinal >= 1);
1569 if (ordinal > dependentDylibCount()) {
1570 diag.error("ordinal %lld out of range for %s", ordinal, symbolName);
1571 return false;
1572 }
1573 uint32_t depIndex = (uint32_t)(ordinal-1);
1574 const mach_header* depMH;
1575 void* depExtra;
1576 if ( findDependent(depIndex, dependentDylibLoadPath(depIndex), extra, &depMH, &depExtra) ) {
1577 bool depInRawCache = inRawCache() && (depMH->flags & 0x80000000);
1578 MachOParser depParser(depMH, depInRawCache);
1579 return depParser.findExportedSymbol(diag, importedName, depExtra, foundInfo, findDependent);
1580 }
1581 else {
1582 diag.error("dependent dylib %lld not found for re-exported symbol %s", ordinal, symbolName);
1583 return false;
1584 }
1585 }
1586 foundInfo.kind = FoundSymbol::Kind::headerOffset;
1587 foundInfo.isThreadLocal = false;
1588 foundInfo.foundInDylib = header();
1589 foundInfo.foundExtra = extra;
1590 foundInfo.value = read_uleb128(diag, p, trieEnd);
1591 foundInfo.resolverFuncOffset = 0;
1592 foundInfo.foundSymbolName = symbolName;
1593 if ( diag.hasError() )
1594 return false;
1595 switch ( flags & EXPORT_SYMBOL_FLAGS_KIND_MASK ) {
1596 case EXPORT_SYMBOL_FLAGS_KIND_REGULAR:
1597 if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
1598 foundInfo.kind = FoundSymbol::Kind::headerOffset;
1599 foundInfo.resolverFuncOffset = (uint32_t)read_uleb128(diag, p, trieEnd);
1600 }
1601 else {
1602 foundInfo.kind = FoundSymbol::Kind::headerOffset;
1603 }
1604 break;
1605 case EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL:
1606 foundInfo.isThreadLocal = true;
1607 break;
1608 case EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE:
1609 foundInfo.kind = FoundSymbol::Kind::absolute;
1610 break;
1611 default:
1612 diag.error("unsupported exported symbol kind. flags=%llu at node offset=0x%0lX", flags, (long)(node-trieStart));
1613 return false;
1614 }
1615 return true;
1616 }
1617 else {
1618 // this is an old binary (before macOS 10.6), scan the symbol table
1619 foundInfo.foundInDylib = nullptr;
1620 uint64_t baseAddress = preferredLoadAddress();
1621 forEachGlobalSymbol(diag, ^(const char* aSymbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop) {
1622 if ( strcmp(aSymbolName, symbolName) == 0 ) {
1623 foundInfo.kind = FoundSymbol::Kind::headerOffset;
1624 foundInfo.isThreadLocal = false;
1625 foundInfo.foundInDylib = header();
1626 foundInfo.foundExtra = extra;
1627 foundInfo.value = n_value - baseAddress;
1628 foundInfo.resolverFuncOffset = 0;
1629 foundInfo.foundSymbolName = symbolName;
1630 stop = true;
1631 }
1632 });
1633 return (foundInfo.foundInDylib != nullptr);
1634 }
1635 }
1636
1637
1638 void MachOParser::getLinkEditLoadCommands(Diagnostics& diag, LinkEditInfo& result) const
1639 {
1640 result.dyldInfo = nullptr;
1641 result.symTab = nullptr;
1642 result.dynSymTab = nullptr;
1643 result.splitSegInfo = nullptr;
1644 result.functionStarts = nullptr;
1645 result.dataInCode = nullptr;
1646 result.codeSig = nullptr;
1647 __block bool hasUUID = false;
1648 __block bool hasVersion = false;
1649 __block bool hasEncrypt = false;
1650 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1651 switch ( cmd->cmd ) {
1652 case LC_DYLD_INFO:
1653 case LC_DYLD_INFO_ONLY:
1654 if ( cmd->cmdsize != sizeof(dyld_info_command) )
1655 diag.error("LC_DYLD_INFO load command size wrong");
1656 else if ( result.dyldInfo != nullptr )
1657 diag.error("multiple LC_DYLD_INFO load commands");
1658 result.dyldInfo = (dyld_info_command*)cmd;
1659 break;
1660 case LC_SYMTAB:
1661 if ( cmd->cmdsize != sizeof(symtab_command) )
1662 diag.error("LC_SYMTAB load command size wrong");
1663 else if ( result.symTab != nullptr )
1664 diag.error("multiple LC_SYMTAB load commands");
1665 result.symTab = (symtab_command*)cmd;
1666 break;
1667 case LC_DYSYMTAB:
1668 if ( cmd->cmdsize != sizeof(dysymtab_command) )
1669 diag.error("LC_DYSYMTAB load command size wrong");
1670 else if ( result.dynSymTab != nullptr )
1671 diag.error("multiple LC_DYSYMTAB load commands");
1672 result.dynSymTab = (dysymtab_command*)cmd;
1673 break;
1674 case LC_SEGMENT_SPLIT_INFO:
1675 if ( cmd->cmdsize != sizeof(linkedit_data_command) )
1676 diag.error("LC_SEGMENT_SPLIT_INFO load command size wrong");
1677 else if ( result.splitSegInfo != nullptr )
1678 diag.error("multiple LC_SEGMENT_SPLIT_INFO load commands");
1679 result.splitSegInfo = (linkedit_data_command*)cmd;
1680 break;
1681 case LC_FUNCTION_STARTS:
1682 if ( cmd->cmdsize != sizeof(linkedit_data_command) )
1683 diag.error("LC_FUNCTION_STARTS load command size wrong");
1684 else if ( result.functionStarts != nullptr )
1685 diag.error("multiple LC_FUNCTION_STARTS load commands");
1686 result.functionStarts = (linkedit_data_command*)cmd;
1687 break;
1688 case LC_DATA_IN_CODE:
1689 if ( cmd->cmdsize != sizeof(linkedit_data_command) )
1690 diag.error("LC_DATA_IN_CODE load command size wrong");
1691 else if ( result.dataInCode != nullptr )
1692 diag.error("multiple LC_DATA_IN_CODE load commands");
1693 result.dataInCode = (linkedit_data_command*)cmd;
1694 break;
1695 case LC_CODE_SIGNATURE:
1696 if ( cmd->cmdsize != sizeof(linkedit_data_command) )
1697 diag.error("LC_CODE_SIGNATURE load command size wrong");
1698 else if ( result.codeSig != nullptr )
1699 diag.error("multiple LC_CODE_SIGNATURE load commands");
1700 result.codeSig = (linkedit_data_command*)cmd;
1701 break;
1702 case LC_UUID:
1703 if ( cmd->cmdsize != sizeof(uuid_command) )
1704 diag.error("LC_UUID load command size wrong");
1705 else if ( hasUUID )
1706 diag.error("multiple LC_UUID load commands");
1707 hasUUID = true;
1708 break;
1709 case LC_VERSION_MIN_IPHONEOS:
1710 case LC_VERSION_MIN_MACOSX:
1711 case LC_VERSION_MIN_TVOS:
1712 case LC_VERSION_MIN_WATCHOS:
1713 if ( cmd->cmdsize != sizeof(version_min_command) )
1714 diag.error("LC_VERSION_* load command size wrong");
1715 else if ( hasVersion )
1716 diag.error("multiple LC_VERSION_MIN_* load commands");
1717 hasVersion = true;
1718 break;
1719 case LC_BUILD_VERSION:
1720 if ( cmd->cmdsize != (sizeof(build_version_command) + ((build_version_command*)cmd)->ntools * sizeof(build_tool_version)) )
1721 diag.error("LC_BUILD_VERSION load command size wrong");
1722 else if ( hasVersion )
1723 diag.error("multiple LC_BUILD_VERSION load commands");
1724 hasVersion = true;
1725 break;
1726 case LC_ENCRYPTION_INFO:
1727 if ( cmd->cmdsize != sizeof(encryption_info_command) )
1728 diag.error("LC_ENCRYPTION_INFO load command size wrong");
1729 else if ( hasEncrypt )
1730 diag.error("multiple LC_ENCRYPTION_INFO load commands");
1731 else if ( is64() )
1732 diag.error("LC_ENCRYPTION_INFO found in 64-bit mach-o");
1733 hasEncrypt = true;
1734 break;
1735 case LC_ENCRYPTION_INFO_64:
1736 if ( cmd->cmdsize != sizeof(encryption_info_command_64) )
1737 diag.error("LC_ENCRYPTION_INFO_64 load command size wrong");
1738 else if ( hasEncrypt )
1739 diag.error("multiple LC_ENCRYPTION_INFO_64 load commands");
1740 else if ( !is64() )
1741 diag.error("LC_ENCRYPTION_INFO_64 found in 32-bit mach-o");
1742 hasEncrypt = true;
1743 break;
1744 }
1745 });
1746 if ( diag.noError() && (result.dynSymTab != nullptr) && (result.symTab == nullptr) )
1747 diag.error("LC_DYSYMTAB but no LC_SYMTAB load command");
1748
1749 }
1750
1751 void MachOParser::getLinkEditPointers(Diagnostics& diag, LinkEditInfo& result) const
1752 {
1753 getLinkEditLoadCommands(diag, result);
1754 if ( diag.noError() )
1755 getLayoutInfo(result.layout);
1756 }
1757
1758 void MachOParser::forEachSegment(void (^callback)(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop)) const
1759 {
1760 Diagnostics diag;
1761 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1762 if ( cmd->cmd == LC_SEGMENT_64 ) {
1763 const segment_command_64* seg = (segment_command_64*)cmd;
1764 callback(seg->segname, (uint32_t)seg->fileoff, (uint32_t)seg->filesize, seg->vmaddr, seg->vmsize, seg->initprot, stop);
1765 }
1766 else if ( cmd->cmd == LC_SEGMENT ) {
1767 const segment_command* seg = (segment_command*)cmd;
1768 callback(seg->segname, seg->fileoff, seg->filesize, seg->vmaddr, seg->vmsize, seg->initprot, stop);
1769 }
1770 });
1771 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1772 }
1773
1774 const uint8_t* MachOParser::trieWalk(Diagnostics& diag, const uint8_t* start, const uint8_t* end, const char* symbol)
1775 {
1776 uint32_t visitedNodeOffsets[128];
1777 int visitedNodeOffsetCount = 0;
1778 visitedNodeOffsets[visitedNodeOffsetCount++] = 0;
1779 const uint8_t* p = start;
1780 while ( p < end ) {
1781 uint64_t terminalSize = *p++;
1782 if ( terminalSize > 127 ) {
1783 // except for re-export-with-rename, all terminal sizes fit in one byte
1784 --p;
1785 terminalSize = read_uleb128(diag, p, end);
1786 if ( diag.hasError() )
1787 return nullptr;
1788 }
1789 if ( (*symbol == '\0') && (terminalSize != 0) ) {
1790 return p;
1791 }
1792 const uint8_t* children = p + terminalSize;
1793 if ( children > end ) {
1794 diag.error("malformed trie node, terminalSize=0x%llX extends past end of trie\n", terminalSize);
1795 return nullptr;
1796 }
1797 uint8_t childrenRemaining = *children++;
1798 p = children;
1799 uint64_t nodeOffset = 0;
1800 for (; childrenRemaining > 0; --childrenRemaining) {
1801 const char* ss = symbol;
1802 bool wrongEdge = false;
1803 // scan whole edge to get to next edge
1804 // if edge is longer than target symbol name, don't read past end of symbol name
1805 char c = *p;
1806 while ( c != '\0' ) {
1807 if ( !wrongEdge ) {
1808 if ( c != *ss )
1809 wrongEdge = true;
1810 ++ss;
1811 }
1812 ++p;
1813 c = *p;
1814 }
1815 if ( wrongEdge ) {
1816 // advance to next child
1817 ++p; // skip over zero terminator
1818 // skip over uleb128 until last byte is found
1819 while ( (*p & 0x80) != 0 )
1820 ++p;
1821 ++p; // skip over last byte of uleb128
1822 if ( p > end ) {
1823 diag.error("malformed trie node, child node extends past end of trie\n");
1824 return nullptr;
1825 }
1826 }
1827 else {
1828 // the symbol so far matches this edge (child)
1829 // so advance to the child's node
1830 ++p;
1831 nodeOffset = read_uleb128(diag, p, end);
1832 if ( diag.hasError() )
1833 return nullptr;
1834 if ( (nodeOffset == 0) || ( &start[nodeOffset] > end) ) {
1835 diag.error("malformed trie child, nodeOffset=0x%llX out of range\n", nodeOffset);
1836 return nullptr;
1837 }
1838 symbol = ss;
1839 break;
1840 }
1841 }
1842 if ( nodeOffset != 0 ) {
1843 if ( nodeOffset > (end-start) ) {
1844 diag.error("malformed trie child, nodeOffset=0x%llX out of range\n", nodeOffset);
1845 return nullptr;
1846 }
1847 for (int i=0; i < visitedNodeOffsetCount; ++i) {
1848 if ( visitedNodeOffsets[i] == nodeOffset ) {
1849 diag.error("malformed trie child, cycle to nodeOffset=0x%llX\n", nodeOffset);
1850 return nullptr;
1851 }
1852 }
1853 visitedNodeOffsets[visitedNodeOffsetCount++] = (uint32_t)nodeOffset;
1854 if ( visitedNodeOffsetCount >= 128 ) {
1855 diag.error("malformed trie too deep\n");
1856 return nullptr;
1857 }
1858 p = &start[nodeOffset];
1859 }
1860 else
1861 p = end;
1862 }
1863 return nullptr;
1864 }
1865
1866
1867 uint64_t MachOParser::read_uleb128(Diagnostics& diag, const uint8_t*& p, const uint8_t* end)
1868 {
1869 uint64_t result = 0;
1870 int bit = 0;
1871 do {
1872 if ( p == end ) {
1873 diag.error("malformed uleb128");
1874 break;
1875 }
1876 uint64_t slice = *p & 0x7f;
1877
1878 if ( bit > 63 ) {
1879 diag.error("uleb128 too big for uint64");
1880 break;
1881 }
1882 else {
1883 result |= (slice << bit);
1884 bit += 7;
1885 }
1886 }
1887 while (*p++ & 0x80);
1888 return result;
1889 }
1890
1891
1892 int64_t MachOParser::read_sleb128(Diagnostics& diag, const uint8_t*& p, const uint8_t* end)
1893 {
1894 int64_t result = 0;
1895 int bit = 0;
1896 uint8_t byte = 0;
1897 do {
1898 if ( p == end ) {
1899 diag.error("malformed sleb128");
1900 break;
1901 }
1902 byte = *p++;
1903 result |= (((int64_t)(byte & 0x7f)) << bit);
1904 bit += 7;
1905 } while (byte & 0x80);
1906 // sign extend negative numbers
1907 if ( (byte & 0x40) != 0 )
1908 result |= (-1LL) << bit;
1909 return result;
1910 }
1911
1912 bool MachOParser::is64() const
1913 {
1914 #if DYLD_IN_PROCESS
1915 return (sizeof(void*) == 8);
1916 #else
1917 return (header()->magic == MH_MAGIC_64);
1918 #endif
1919 }
1920
1921
1922
1923
1924 bool MachOParser::findClosestSymbol(uint64_t targetUnslidAddress, const char** symbolName, uint64_t* symbolUnslidAddr) const
1925 {
1926 Diagnostics diag;
1927 __block uint64_t closestNValueSoFar = 0;
1928 __block const char* closestNameSoFar = nullptr;
1929 forEachGlobalSymbol(diag, ^(const char* aSymbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop) {
1930 if ( n_value <= targetUnslidAddress ) {
1931 if ( (closestNameSoFar == nullptr) || (closestNValueSoFar < n_value) ) {
1932 closestNValueSoFar = n_value;
1933 closestNameSoFar = aSymbolName;
1934 }
1935 }
1936 });
1937 forEachLocalSymbol(diag, ^(const char* aSymbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop) {
1938 if ( n_value <= targetUnslidAddress ) {
1939 if ( (closestNameSoFar == nullptr) || (closestNValueSoFar < n_value) ) {
1940 closestNValueSoFar = n_value;
1941 closestNameSoFar = aSymbolName;
1942 }
1943 }
1944 });
1945 if ( closestNameSoFar == nullptr ) {
1946 return false;
1947 }
1948
1949 *symbolName = closestNameSoFar;
1950 *symbolUnslidAddr = closestNValueSoFar;
1951 return true;
1952 }
1953
1954
1955 #if DYLD_IN_PROCESS
1956
1957 bool MachOParser::findClosestSymbol(const void* addr, const char** symbolName, const void** symbolAddress) const
1958 {
1959 uint64_t slide = getSlide();
1960 uint64_t symbolUnslidAddr;
1961 if ( findClosestSymbol((uint64_t)addr - slide, symbolName, &symbolUnslidAddr) ) {
1962 *symbolAddress = (const void*)(long)(symbolUnslidAddr + slide);
1963 return true;
1964 }
1965 return false;
1966 }
1967
1968 intptr_t MachOParser::getSlide() const
1969 {
1970 Diagnostics diag;
1971 __block intptr_t slide = 0;
1972 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1973 #if __LP64__
1974 if ( cmd->cmd == LC_SEGMENT_64 ) {
1975 const segment_command_64* seg = (segment_command_64*)cmd;
1976 if ( strcmp(seg->segname, "__TEXT") == 0 ) {
1977 slide = ((uint64_t)header()) - seg->vmaddr;
1978 stop = true;
1979 }
1980 }
1981 #else
1982 if ( cmd->cmd == LC_SEGMENT ) {
1983 const segment_command* seg = (segment_command*)cmd;
1984 if ( strcmp(seg->segname, "__TEXT") == 0 ) {
1985 slide = ((uint32_t)header()) - seg->vmaddr;
1986 stop = true;
1987 }
1988 }
1989 #endif
1990 });
1991 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1992 return slide;
1993 }
1994
1995 // this is only used by dlsym() at runtime. All other binding is done when the closure is built.
1996 bool MachOParser::hasExportedSymbol(const char* symbolName, DependentFinder finder, void** result) const
1997 {
1998 typedef void* (*ResolverFunc)(void);
1999 ResolverFunc resolver;
2000 Diagnostics diag;
2001 FoundSymbol foundInfo;
2002 if ( findExportedSymbol(diag, symbolName, (void*)header(), foundInfo, finder) ) {
2003 switch ( foundInfo.kind ) {
2004 case FoundSymbol::Kind::headerOffset:
2005 *result = (uint8_t*)foundInfo.foundInDylib + foundInfo.value;
2006 break;
2007 case FoundSymbol::Kind::absolute:
2008 *result = (void*)(long)foundInfo.value;
2009 break;
2010 case FoundSymbol::Kind::resolverOffset:
2011 // foundInfo.value contains "stub".
2012 // in dlsym() we want to call resolver function to get final function address
2013 resolver = (ResolverFunc)((uint8_t*)foundInfo.foundInDylib + foundInfo.resolverFuncOffset);
2014 *result = (*resolver)();
2015 break;
2016 }
2017 return true;
2018 }
2019 return false;
2020 }
2021
2022 const char* MachOParser::segmentName(uint32_t targetSegIndex) const
2023 {
2024 __block const char* result = nullptr;
2025 __block uint32_t segIndex = 0;
2026 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
2027 if ( segIndex == targetSegIndex ) {
2028 result = segName;
2029 stop = true;
2030 }
2031 ++segIndex;
2032 });
2033 return result;
2034 }
2035
2036 #else
2037
2038
2039 bool MachOParser::uses16KPages() const
2040 {
2041 return (header()->cputype == CPU_TYPE_ARM64);
2042 }
2043
2044
2045 bool MachOParser::isEncrypted() const
2046 {
2047 __block bool result = false;
2048 Diagnostics diag;
2049 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2050 if ( cmd->cmd == LC_SEGMENT_64 ) {
2051 const segment_command_64* segCmd = (segment_command_64*)cmd;
2052 if ( segCmd->flags & SG_PROTECTED_VERSION_1 ) {
2053 result = true;
2054 stop = true;
2055 }
2056 }
2057 else if ( cmd->cmd == LC_SEGMENT ) {
2058 const segment_command* segCmd = (segment_command*)cmd;
2059 if ( segCmd->flags & SG_PROTECTED_VERSION_1 ) {
2060 result = true;
2061 stop = true;
2062 }
2063 }
2064 else if ( (cmd->cmd == LC_ENCRYPTION_INFO) || (cmd->cmd == LC_ENCRYPTION_INFO_64) ) {
2065 const encryption_info_command* encCmd = (encryption_info_command*)cmd;
2066 if ( encCmd->cryptid != 0 ) {
2067 result = true;
2068 stop = true;
2069 }
2070 }
2071 });
2072 return result;
2073 }
2074
2075 bool MachOParser::hasWeakDefs() const
2076 {
2077 return (header()->flags & (MH_WEAK_DEFINES|MH_BINDS_TO_WEAK));
2078 }
2079
2080 bool MachOParser::hasObjC() const
2081 {
2082 __block bool result = false;
2083 forEachSection(^(const char* segmentName, const char* sectionName, uint32_t flags, const void* content, size_t size, bool illegalSectionSize, bool& stop) {
2084 if ( (strncmp(sectionName, "__objc_imageinfo", 16) == 0) && (strncmp(segmentName, "__DATA", 6) == 0) ) {
2085 result = true;
2086 stop = true;
2087 }
2088 if ( (header()->cputype == CPU_TYPE_I386) && (strcmp(sectionName, "__image_info") == 0) && (strcmp(segmentName, "__OBJC") == 0) ) {
2089 result = true;
2090 stop = true;
2091 }
2092 });
2093 return result;
2094 }
2095
2096 bool MachOParser::hasPlusLoadMethod(Diagnostics& diag) const
2097 {
2098 #if 1
2099 __block bool result = false;
2100 forEachSection(^(const char* segmentName, const char* sectionName, uint32_t flags, uint64_t addr, const void* content, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& stop) {
2101 if ( ( (flags & SECTION_TYPE) == S_CSTRING_LITERALS ) ) {
2102 if (illegalSectionSize) {
2103 diag.error("cstring section %s/%s extends beyond the end of the segment", segmentName, sectionName);
2104 return;
2105 }
2106 const char* s = (char*)content;
2107 const char* end = s + size;
2108 while ( s < end ) {
2109 if ( strcmp(s, "load") == 0 ) {
2110 result = true;
2111 stop = true;
2112 return;
2113 }
2114 while (*s != '\0' )
2115 ++s;
2116 ++s;
2117 }
2118 }
2119 });
2120 return result;
2121 #else
2122 LayoutInfo layout;
2123 getLayoutInfo(layout);
2124
2125 __block bool hasSwift = false;
2126 __block const void* classList = nullptr;
2127 __block size_t classListSize = 0;
2128 __block const void* objcData = nullptr;
2129 __block size_t objcDataSize = 0;
2130 __block const void* objcConstData = nullptr;
2131 __block size_t objcConstDataSize = 0;
2132 forEachSection(^(const char* segmentName, const char* sectionName, uint32_t flags, uint64_t addr, const void* content, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool& stop) {
2133 if ( (strcmp(sectionName, "__objc_classlist") == 0) && (strncmp(segmentName, "__DATA", 6) == 0) ) {
2134 classList = content;
2135 classListSize = size;
2136 }
2137 if ( (strcmp(sectionName, "__objc_imageinfo") == 0) && (strncmp(segmentName, "__DATA", 6) == 0) ) {
2138 const uint32_t* info = (uint32_t*)content;
2139 uint8_t swiftVersion = (info[1] >> 8) & 0xFF;
2140 if ( swiftVersion != 0 )
2141 hasSwift = true;
2142 }
2143 });
2144 if ( classList == nullptr )
2145 return false;
2146 // FIXME: might be objc and swift intermixed
2147 if ( hasSwift )
2148 return true;
2149 const bool p64 = is64();
2150 const uint32_t pointerSize = (p64 ? 8 : 4);
2151 const uint64_t* classArray64 = (uint64_t*)classList;
2152 const uint32_t* classArray32 = (uint32_t*)classList;
2153 const uint32_t classListCount = (uint32_t)(classListSize/pointerSize);
2154 for (uint32_t i=0; i < classListCount; ++i) {
2155 if ( p64 ) {
2156 uint64_t classObjAddr = classArray64[i];
2157 const uint64_t* classObjContent = (uint64_t*)getContentForVMAddr(layout, classObjAddr);
2158 uint64_t classROAddr = classObjContent[4];
2159 uint64_t metaClassObjAddr = classObjContent[0];
2160 const uint64_t* metaClassObjContent = (uint64_t*)getContentForVMAddr(layout, metaClassObjAddr);
2161 uint64_t metaClassROObjAddr = metaClassObjContent[4];
2162 const uint64_t* metaClassROObjContent = (uint64_t*)getContentForVMAddr(layout, metaClassROObjAddr);
2163 uint64_t metaClassMethodListAddr = metaClassROObjContent[4];
2164 if ( metaClassMethodListAddr != 0 ) {
2165 const uint64_t* metaClassMethodListContent = (uint64_t*)getContentForVMAddr(layout, metaClassMethodListAddr);
2166 const uint32_t methodListCount = ((uint32_t*)metaClassMethodListContent)[1];
2167 for (uint32_t m=0; m < methodListCount; ++m) {
2168 uint64_t methodNameAddr = metaClassMethodListContent[m*3+1];
2169 const char* methodNameContent = (char*)getContentForVMAddr(layout, methodNameAddr);
2170 if ( strcmp(methodNameContent, "load") == 0 ) {
2171 return true;
2172 }
2173 }
2174 }
2175 }
2176 else {
2177
2178 }
2179 }
2180
2181 return false;
2182 #endif
2183 }
2184
2185 bool MachOParser::getCDHash(uint8_t cdHash[20])
2186 {
2187 Diagnostics diag;
2188 LinkEditInfo leInfo;
2189 getLinkEditPointers(diag, leInfo);
2190 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2191 return false;
2192
2193 return cdHashOfCodeSignature(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff), leInfo.codeSig->datasize, cdHash);
2194 }
2195
2196 bool MachOParser::usesLibraryValidation() const
2197 {
2198 Diagnostics diag;
2199 LinkEditInfo leInfo;
2200 getLinkEditPointers(diag, leInfo);
2201 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2202 return false;
2203
2204 const CS_CodeDirectory* cd = (const CS_CodeDirectory*)findCodeDirectoryBlob(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff), leInfo.codeSig->datasize);
2205 if ( cd == nullptr )
2206 return false;
2207
2208 // check for CS_REQUIRE_LV in CS_CodeDirectory.flags
2209 return (htonl(cd->flags) & CS_REQUIRE_LV);
2210 }
2211
2212
2213 bool MachOParser::isRestricted() const
2214 {
2215 __block bool result = false;
2216 forEachSection(^(const char* segName, const char* sectionName, uint32_t flags, const void* content, size_t size, bool illegalSectionSize, bool& stop) {
2217 if ( (strcmp(segName, "__RESTRICT") == 0) && (strcmp(sectionName, "__restrict") == 0) ) {
2218 result = true;
2219 stop = true;
2220 }
2221
2222 });
2223 return result;
2224 }
2225
2226 bool MachOParser::hasCodeSignature(uint32_t& fileOffset, uint32_t& size)
2227 {
2228 fileOffset = 0;
2229 size = 0;
2230
2231 // <rdar://problem/13622786> ignore code signatures in macOS binaries built with pre-10.9 tools
2232 Platform platform;
2233 uint32_t minOS;
2234 uint32_t sdk;
2235 if ( getPlatformAndVersion(&platform, &minOS, &sdk) ) {
2236 // if have LC_VERSION_MIN_MACOSX and it says SDK < 10.9, so ignore code signature
2237 if ( (platform == Platform::macOS) && (sdk < 0x000A0900) )
2238 return false;
2239 }
2240 else {
2241 switch ( header()->cputype ) {
2242 case CPU_TYPE_I386:
2243 case CPU_TYPE_X86_64:
2244 // old binary with no LC_VERSION_*, assume intel binaries are old macOS binaries (ignore code signature)
2245 return false;
2246 }
2247 }
2248
2249 Diagnostics diag;
2250 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2251 if ( cmd->cmd == LC_CODE_SIGNATURE ) {
2252 const linkedit_data_command* sigCmd = (linkedit_data_command*)cmd;
2253 fileOffset = sigCmd->dataoff;
2254 size = sigCmd->datasize;
2255 stop = true;
2256 }
2257 });
2258 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2259 return (fileOffset != 0);
2260 }
2261
2262 bool MachOParser::getEntry(uint32_t& offset, bool& usesCRT)
2263 {
2264 Diagnostics diag;
2265 offset = 0;
2266 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2267 if ( cmd->cmd == LC_MAIN ) {
2268 entry_point_command* mainCmd = (entry_point_command*)cmd;
2269 usesCRT = false;
2270 offset = (uint32_t)mainCmd->entryoff;
2271 stop = true;
2272 }
2273 else if ( cmd->cmd == LC_UNIXTHREAD ) {
2274 stop = true;
2275 usesCRT = true;
2276 const uint32_t* regs32 = (uint32_t*)(((char*)cmd) + 16);
2277 const uint64_t* regs64 = (uint64_t*)(((char*)cmd) + 16);
2278 uint64_t startAddress = 0;
2279 switch ( header()->cputype ) {
2280 case CPU_TYPE_I386:
2281 startAddress = regs32[10]; // i386_thread_state_t.eip
2282 break;
2283 case CPU_TYPE_X86_64:
2284 startAddress = regs64[16]; // x86_thread_state64_t.rip
2285 break;
2286 case CPU_TYPE_ARM:
2287 startAddress = regs32[15]; // arm_thread_state_t.__pc
2288 break;
2289 case CPU_TYPE_ARM64:
2290 startAddress = regs64[32]; // arm_thread_state64_t.__pc
2291 break;
2292 }
2293 offset = (uint32_t)(startAddress - preferredLoadAddress());
2294 }
2295 });
2296 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2297 // FIXME: validate offset is into executable segment
2298 return (offset != 0);
2299 }
2300
2301 bool MachOParser::canBePlacedInDyldCache(const std::string& path) const {
2302 std::set<std::string> reasons;
2303 return canBePlacedInDyldCache(path, reasons);
2304 }
2305
2306 bool MachOParser::canBePlacedInDyldCache(const std::string& path, std::set<std::string>& reasons) const
2307 {
2308 bool retval = true;
2309 // only dylibs can go in cache
2310 if ( fileType() != MH_DYLIB ) {
2311 reasons.insert("Not MH_DYLIB");
2312 return false; // cannot continue, installName() will assert() if not a dylib
2313 }
2314
2315 // only dylibs built for /usr/lib or /System/Library can go in cache
2316 const char* dylibName = installName();
2317 if ( (strncmp(dylibName, "/usr/lib/", 9) != 0) && (strncmp(dylibName, "/System/Library/", 16) != 0) ) {
2318 retval = false;
2319 reasons.insert("Not in '/usr/lib/' or '/System/Library/'");
2320 }
2321
2322 // flat namespace files cannot go in cache
2323 if ( (header()->flags & MH_TWOLEVEL) == 0 ) {
2324 retval = false;
2325 reasons.insert("Not built with two level namespaces");
2326 }
2327
2328 // don't put debug variants into dyld cache
2329 if ( endsWith(path, "_profile.dylib") || endsWith(path, "_debug.dylib") || endsWith(path, "_profile") || endsWith(path, "_debug") || endsWith(path, "/CoreADI") ) {
2330 retval = false;
2331 reasons.insert("Variant image");
2332 }
2333
2334 // dylib must have extra info for moving DATA and TEXT segments apart
2335 __block bool hasExtraInfo = false;
2336 __block bool hasDyldInfo = false;
2337 Diagnostics diag;
2338 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2339 if ( cmd->cmd == LC_SEGMENT_SPLIT_INFO )
2340 hasExtraInfo = true;
2341 if ( cmd->cmd == LC_DYLD_INFO_ONLY )
2342 hasDyldInfo = true;
2343 });
2344 if ( !hasExtraInfo ) {
2345 retval = false;
2346 reasons.insert("Missing split seg info");
2347 }
2348 if ( !hasDyldInfo ) {
2349 retval = false;
2350 reasons.insert("Old binary, missing dyld info");
2351 }
2352
2353 // dylib can only depend on other dylibs in the shared cache
2354 __block bool allDepPathsAreGood = true;
2355 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
2356 if ( (strncmp(loadPath, "/usr/lib/", 9) != 0) && (strncmp(loadPath, "/System/Library/", 16) != 0) ) {
2357 allDepPathsAreGood = false;
2358 stop = true;
2359 }
2360 });
2361 if ( !allDepPathsAreGood ) {
2362 retval = false;
2363 reasons.insert("Depends on cache inelegible dylibs");
2364 }
2365
2366 // dylibs with interposing info cannot be in cache
2367 __block bool hasInterposing = false;
2368 forEachInterposingTuple(diag, ^(uint32_t segIndex, uint64_t replacementSegOffset, uint64_t replaceeSegOffset, uint64_t replacementContent, bool& stop) {
2369 hasInterposing = true;
2370 });
2371 if ( hasInterposing ) {
2372 retval = false;
2373 reasons.insert("Has interposing tuples");
2374 }
2375
2376 return retval;
2377 }
2378
2379 bool MachOParser::isDynamicExecutable() const
2380 {
2381 if ( fileType() != MH_EXECUTE )
2382 return false;
2383
2384 // static executables do not have dyld load command
2385 __block bool hasDyldLoad = false;
2386 Diagnostics diag;
2387 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2388 if ( cmd->cmd == LC_LOAD_DYLINKER ) {
2389 hasDyldLoad = true;
2390 stop = true;
2391 }
2392 });
2393 return hasDyldLoad;
2394 }
2395
2396
2397 bool MachOParser::isSlideable() const
2398 {
2399 if ( header()->filetype == MH_DYLIB )
2400 return true;
2401 if ( header()->filetype == MH_BUNDLE )
2402 return true;
2403 if ( (header()->filetype == MH_EXECUTE) && (header()->flags & MH_PIE) )
2404 return true;
2405
2406 return false;
2407 }
2408
2409
2410
2411 bool MachOParser::hasInitializer(Diagnostics& diag) const
2412 {
2413 __block bool result = false;
2414 forEachInitializer(diag, ^(uint32_t offset) {
2415 result = true;
2416 });
2417 return result;
2418 }
2419
2420 void MachOParser::forEachInitializer(Diagnostics& diag, void (^callback)(uint32_t offset)) const
2421 {
2422 __block uint64_t textSegAddrStart = 0;
2423 __block uint64_t textSegAddrEnd = 0;
2424
2425 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
2426 if ( strcmp(segName, "__TEXT") == 0 ) {
2427 textSegAddrStart = vmAddr;
2428 textSegAddrEnd = vmAddr + vmSize;
2429 stop = true;
2430 }
2431 });
2432 if ( textSegAddrStart == textSegAddrEnd ) {
2433 diag.error("no __TEXT segment");
2434 return;
2435 }
2436
2437 // if dylib linked with -init linker option, that initializer is first
2438 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2439 if ( cmd->cmd == LC_ROUTINES ) {
2440 const routines_command* routines = (routines_command*)cmd;
2441 uint64_t dashInit = routines->init_address;
2442 if ( (textSegAddrStart < dashInit) && (dashInit < textSegAddrEnd) )
2443 callback((uint32_t)(dashInit - textSegAddrStart));
2444 else
2445 diag.error("-init does not point within __TEXT segment");
2446 }
2447 else if ( cmd->cmd == LC_ROUTINES_64 ) {
2448 const routines_command_64* routines = (routines_command_64*)cmd;
2449 uint64_t dashInit = routines->init_address;
2450 if ( (textSegAddrStart < dashInit) && (dashInit < textSegAddrEnd) )
2451 callback((uint32_t)(dashInit - textSegAddrStart));
2452 else
2453 diag.error("-init does not point within __TEXT segment");
2454 }
2455 });
2456
2457 // next any function pointers in mod-init section
2458 bool p64 = is64();
2459 unsigned pointerSize = p64 ? 8 : 4;
2460 forEachSection(^(const char* segmentName, const char* sectionName, uint32_t flags, const void* content, size_t size, bool illegalSectionSize, bool& stop) {
2461 if ( (flags & SECTION_TYPE) == S_MOD_INIT_FUNC_POINTERS ) {
2462 if ( (size % pointerSize) != 0 ) {
2463 diag.error("initializer section %s/%s has bad size", segmentName, sectionName);
2464 stop = true;
2465 return;
2466 }
2467 if ( illegalSectionSize ) {
2468 diag.error("initializer section %s/%s extends beyond the end of the segment", segmentName, sectionName);
2469 stop = true;
2470 return;
2471 }
2472 if ( ((long)content % pointerSize) != 0 ) {
2473 diag.error("initializer section %s/%s is not pointer aligned", segmentName, sectionName);
2474 stop = true;
2475 return;
2476 }
2477 if ( p64 ) {
2478 const uint64_t* initsStart = (uint64_t*)content;
2479 const uint64_t* initsEnd = (uint64_t*)((uint8_t*)content + size);
2480 for (const uint64_t* p=initsStart; p < initsEnd; ++p) {
2481 uint64_t anInit = *p;
2482 if ( (anInit <= textSegAddrStart) || (anInit > textSegAddrEnd) ) {
2483 diag.error("initializer 0x%0llX does not point within __TEXT segment", anInit);
2484 stop = true;
2485 break;
2486 }
2487 callback((uint32_t)(anInit - textSegAddrStart));
2488 }
2489 }
2490 else {
2491 const uint32_t* initsStart = (uint32_t*)content;
2492 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + size);
2493 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2494 uint32_t anInit = *p;
2495 if ( (anInit <= textSegAddrStart) || (anInit > textSegAddrEnd) ) {
2496 diag.error("initializer 0x%0X does not point within __TEXT segment", anInit);
2497 stop = true;
2498 break;
2499 }
2500 callback(anInit - (uint32_t)textSegAddrStart);
2501 }
2502 }
2503 }
2504 });
2505 }
2506
2507 void MachOParser::forEachDOFSection(Diagnostics& diag, void (^callback)(uint32_t offset)) const
2508 {
2509 forEachSection(^(const char* segmentName, const char* sectionName, uint32_t flags, const void* content, size_t size, bool illegalSectionSize, bool& stop) {
2510 if ( ( (flags & SECTION_TYPE) == S_DTRACE_DOF ) && !illegalSectionSize ) {
2511 callback((uint32_t)((uintptr_t)content - (uintptr_t)header()));
2512 }
2513 });
2514 }
2515
2516
2517 uint32_t MachOParser::segmentCount() const
2518 {
2519 __block uint32_t count = 0;
2520 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
2521 ++count;
2522 });
2523 return count;
2524 }
2525
2526 void MachOParser::forEachSegment(void (^callback)(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, uint32_t segIndex, uint64_t sizeOfSections, uint8_t p2align, bool& stop)) const
2527 {
2528 Diagnostics diag;
2529 __block uint32_t segIndex = 0;
2530 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2531 if ( cmd->cmd == LC_SEGMENT_64 ) {
2532 const segment_command_64* segCmd = (segment_command_64*)cmd;
2533 uint64_t sizeOfSections = segCmd->vmsize;
2534 uint8_t p2align = 0;
2535 const section_64* const sectionsStart = (section_64*)((char*)segCmd + sizeof(struct segment_command_64));
2536 const section_64* const sectionsEnd = &sectionsStart[segCmd->nsects];
2537 for (const section_64* sect=sectionsStart; sect < sectionsEnd; ++sect) {
2538 sizeOfSections = sect->addr + sect->size - segCmd->vmaddr;
2539 if ( sect->align > p2align )
2540 p2align = sect->align;
2541 }
2542 callback(segCmd->segname, (uint32_t)segCmd->fileoff, (uint32_t)segCmd->filesize, segCmd->vmaddr, segCmd->vmsize, segCmd->initprot, segIndex, sizeOfSections, p2align, stop);
2543 ++segIndex;
2544 }
2545 else if ( cmd->cmd == LC_SEGMENT ) {
2546 const segment_command* segCmd = (segment_command*)cmd;
2547 uint64_t sizeOfSections = segCmd->vmsize;
2548 uint8_t p2align = 0;
2549 const section* const sectionsStart = (section*)((char*)segCmd + sizeof(struct segment_command));
2550 const section* const sectionsEnd = &sectionsStart[segCmd->nsects];
2551 for (const section* sect=sectionsStart; sect < sectionsEnd; ++sect) {
2552 sizeOfSections = sect->addr + sect->size - segCmd->vmaddr;
2553 if ( sect->align > p2align )
2554 p2align = sect->align;
2555 }
2556 callback(segCmd->segname, (uint32_t)segCmd->fileoff, (uint32_t)segCmd->filesize, segCmd->vmaddr, segCmd->vmsize, segCmd->initprot, segIndex, sizeOfSections, p2align, stop);
2557 ++segIndex;
2558 }
2559 });
2560 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2561 }
2562
2563 void MachOParser::forEachExportedSymbol(Diagnostics diag, void (^handler)(const char* symbolName, uint64_t imageOffset, bool isReExport, bool& stop)) const
2564 {
2565 LinkEditInfo leInfo;
2566 getLinkEditPointers(diag, leInfo);
2567 if ( diag.hasError() )
2568 return;
2569
2570 if ( leInfo.dyldInfo != nullptr ) {
2571 const uint8_t* trieStart = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->export_off);
2572 const uint8_t* trieEnd = trieStart + leInfo.dyldInfo->export_size;
2573 std::vector<ExportInfoTrie::Entry> exports;
2574 if ( !ExportInfoTrie::parseTrie(trieStart, trieEnd, exports) ) {
2575 diag.error("malformed exports trie");
2576 return;
2577 }
2578 bool stop = false;
2579 for (const ExportInfoTrie::Entry& exp : exports) {
2580 bool isReExport = (exp.info.flags & EXPORT_SYMBOL_FLAGS_REEXPORT);
2581 handler(exp.name.c_str(), exp.info.address, isReExport, stop);
2582 if ( stop )
2583 break;
2584 }
2585 }
2586 }
2587
2588 bool MachOParser::invalidRebaseState(Diagnostics& diag, const char* opcodeName, const MachOParser::LinkEditInfo& leInfo,
2589 bool segIndexSet, uint32_t pointerSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type) const
2590 {
2591 if ( !segIndexSet ) {
2592 diag.error("%s missing preceding REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", opcodeName);
2593 return true;
2594 }
2595 if ( segmentIndex >= leInfo.layout.segmentCount ) {
2596 diag.error("%s segment index %d too large", opcodeName, segmentIndex);
2597 return true;
2598 }
2599 if ( segmentOffset > (leInfo.layout.segments[segmentIndex].segSize-pointerSize) ) {
2600 diag.error("%s current segment offset 0x%08llX beyond segment size (0x%08llX)", opcodeName, segmentOffset, leInfo.layout.segments[segmentIndex].segSize);
2601 return true;
2602 }
2603 switch ( type ) {
2604 case REBASE_TYPE_POINTER:
2605 if ( !leInfo.layout.segments[segmentIndex].writable ) {
2606 diag.error("%s pointer rebase is in non-writable segment", opcodeName);
2607 return true;
2608 }
2609 if ( leInfo.layout.segments[segmentIndex].executable ) {
2610 diag.error("%s pointer rebase is in executable segment", opcodeName);
2611 return true;
2612 }
2613 break;
2614 case REBASE_TYPE_TEXT_ABSOLUTE32:
2615 case REBASE_TYPE_TEXT_PCREL32:
2616 if ( !leInfo.layout.segments[segmentIndex].textRelocsAllowed ) {
2617 diag.error("%s text rebase is in segment that does not support text relocations", opcodeName);
2618 return true;
2619 }
2620 if ( leInfo.layout.segments[segmentIndex].writable ) {
2621 diag.error("%s text rebase is in writable segment", opcodeName);
2622 return true;
2623 }
2624 if ( !leInfo.layout.segments[segmentIndex].executable ) {
2625 diag.error("%s pointer rebase is in non-executable segment", opcodeName);
2626 return true;
2627 }
2628 break;
2629 default:
2630 diag.error("%s unknown rebase type %d", opcodeName, type);
2631 return true;
2632 }
2633 return false;
2634 }
2635
2636 void MachOParser::forEachRebase(Diagnostics& diag, void (^handler)(uint32_t segIndex, uint64_t segOffset, uint8_t type, bool& stop)) const
2637 {
2638 LinkEditInfo leInfo;
2639 getLinkEditPointers(diag, leInfo);
2640 if ( diag.hasError() )
2641 return;
2642
2643 if ( leInfo.dyldInfo != nullptr ) {
2644 // work around linker bug that laid down rebase opcodes for lazy pointer section when -bind_at_load used
2645 __block int lpSegIndex = 0;
2646 __block uint64_t lpSegOffsetStart = 0;
2647 __block uint64_t lpSegOffsetEnd = 0;
2648 bool hasWeakBinds = (leInfo.dyldInfo->weak_bind_size != 0);
2649 if ( leInfo.dyldInfo->lazy_bind_size == 0 ) {
2650 __block uint64_t lpAddr = 0;
2651 __block uint64_t lpSize = 0;
2652 forEachSection(^(const char* segName, const char* sectionName, uint32_t flags, uint64_t addr, const void* content, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& sectStop) {
2653 if ( (flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS ) {
2654 lpAddr = addr;
2655 lpSize = size;
2656 sectStop = true;
2657 }
2658 });
2659 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& segStop) {
2660 if ( (vmAddr <= lpAddr) && (vmAddr+vmSize >= lpAddr+lpSize) ) {
2661 lpSegOffsetStart = lpAddr - vmAddr;
2662 lpSegOffsetEnd = lpSegOffsetStart + lpSize;
2663 segStop = true;
2664 return;
2665 }
2666 ++lpSegIndex;
2667 });
2668 }
2669 // don't remove rebase if there is a weak-bind at pointer location
2670 bool (^weakBindAt)(uint64_t segOffset) = ^(uint64_t segOffset) {
2671 if ( !hasWeakBinds )
2672 return false;
2673 __block bool result = false;
2674 Diagnostics weakDiag;
2675 forEachWeakDef(weakDiag, ^(bool strongDef, uint32_t dataSegIndex, uint64_t dataSegOffset, uint64_t addend, const char* symbolName, bool& weakStop) {
2676 if ( segOffset == dataSegOffset ) {
2677 result = true;
2678 weakStop = true;
2679 }
2680 });
2681 return result;
2682 };
2683
2684
2685 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->rebase_off);
2686 const uint8_t* end = p + leInfo.dyldInfo->rebase_size;
2687 const uint32_t pointerSize = (is64() ? 8 : 4);
2688 uint8_t type = 0;
2689 int segIndex = 0;
2690 uint64_t segOffset = 0;
2691 uint64_t count;
2692 uint64_t skip;
2693 bool segIndexSet = false;
2694 bool stop = false;
2695 while ( !stop && diag.noError() && (p < end) ) {
2696 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
2697 uint8_t opcode = *p & REBASE_OPCODE_MASK;
2698 ++p;
2699 switch (opcode) {
2700 case REBASE_OPCODE_DONE:
2701 stop = true;
2702 break;
2703 case REBASE_OPCODE_SET_TYPE_IMM:
2704 type = immediate;
2705 break;
2706 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
2707 segIndex = immediate;
2708 segOffset = read_uleb128(diag, p, end);
2709 segIndexSet = true;
2710 break;
2711 case REBASE_OPCODE_ADD_ADDR_ULEB:
2712 segOffset += read_uleb128(diag, p, end);
2713 break;
2714 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
2715 segOffset += immediate*pointerSize;
2716 break;
2717 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
2718 for (int i=0; i < immediate; ++i) {
2719 if ( invalidRebaseState(diag, "REBASE_OPCODE_DO_REBASE_IMM_TIMES", leInfo, segIndexSet, pointerSize, segIndex, segOffset, type) )
2720 return;
2721 if ( (segIndex != lpSegIndex) || (segOffset > lpSegOffsetEnd) || (segOffset < lpSegOffsetStart) || weakBindAt(segOffset) )
2722 handler(segIndex, segOffset, type, stop);
2723 segOffset += pointerSize;
2724 }
2725 break;
2726 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
2727 count = read_uleb128(diag, p, end);
2728 for (uint32_t i=0; i < count; ++i) {
2729 if ( invalidRebaseState(diag, "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segIndexSet, pointerSize, segIndex, segOffset, type) )
2730 return;
2731 if ( (segIndex != lpSegIndex) || (segOffset > lpSegOffsetEnd) || (segOffset < lpSegOffsetStart) || weakBindAt(segOffset) )
2732 handler(segIndex, segOffset, type, stop);
2733 segOffset += pointerSize;
2734 }
2735 break;
2736 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
2737 if ( invalidRebaseState(diag, "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segIndexSet, pointerSize, segIndex, segOffset, type) )
2738 return;
2739 handler(segIndex, segOffset, type, stop);
2740 segOffset += read_uleb128(diag, p, end) + pointerSize;
2741 break;
2742 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
2743 count = read_uleb128(diag, p, end);
2744 if ( diag.hasError() )
2745 break;
2746 skip = read_uleb128(diag, p, end);
2747 for (uint32_t i=0; i < count; ++i) {
2748 if ( invalidRebaseState(diag, "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB", leInfo, segIndexSet, pointerSize, segIndex, segOffset, type) )
2749 return;
2750 handler(segIndex, segOffset, type, stop);
2751 segOffset += skip + pointerSize;
2752 }
2753 break;
2754 default:
2755 diag.error("unknown rebase opcode 0x%02X", opcode);
2756 }
2757 }
2758 }
2759 else {
2760 // old binary
2761 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->locreloff);
2762 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nlocrel];
2763 bool stop = false;
2764 const uint8_t relocSize = (is64() ? 3 : 2);
2765 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
2766 if ( reloc->r_length != relocSize ) {
2767 diag.error("local relocation has wrong r_length");
2768 break;
2769 }
2770 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
2771 diag.error("local relocation has wrong r_type");
2772 break;
2773 }
2774 doLocalReloc(diag, reloc->r_address, stop, handler);
2775 }
2776 // then process indirect symbols
2777 forEachIndirectPointer(diag, ^(uint32_t segIndex, uint64_t segOffset, bool bind, int bindLibOrdinal,
2778 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
2779 if ( !bind && !bindLazy )
2780 handler(segIndex, segOffset, REBASE_TYPE_POINTER, indStop);
2781 });
2782 }
2783 }
2784
2785 bool MachOParser::doLocalReloc(Diagnostics& diag, uint32_t r_address, bool& stop, void (^handler)(uint32_t segIndex, uint64_t segOffset, uint8_t type, bool& stop)) const
2786 {
2787 bool firstWritable = (header()->cputype == CPU_TYPE_X86_64);
2788 __block uint64_t relocBaseAddress = 0;
2789 __block bool baseFound = false;
2790 __block uint32_t segIndex = 0;
2791 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool &stopSeg) {
2792 if ( !baseFound ) {
2793 if ( !firstWritable || (protections & VM_PROT_WRITE) ) {
2794 baseFound = true;
2795 relocBaseAddress = vmAddr;
2796 }
2797 }
2798 if ( baseFound && (vmAddr < relocBaseAddress+r_address) && (relocBaseAddress+r_address < vmAddr+vmSize) ) {
2799 uint8_t type = REBASE_TYPE_POINTER;
2800 uint64_t segOffset = relocBaseAddress + r_address - vmAddr;
2801 handler(segIndex, segOffset, type, stop);
2802 stopSeg = true;
2803 }
2804 ++segIndex;
2805 });
2806
2807 return false;
2808 }
2809
2810 int MachOParser::libOrdinalFromDesc(uint16_t n_desc) const
2811 {
2812 // -flat_namespace is always flat lookup
2813 if ( (header()->flags & MH_TWOLEVEL) == 0 )
2814 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
2815
2816 // extract byte from undefined symbol entry
2817 int libIndex = GET_LIBRARY_ORDINAL(n_desc);
2818 switch ( libIndex ) {
2819 case SELF_LIBRARY_ORDINAL:
2820 return BIND_SPECIAL_DYLIB_SELF;
2821
2822 case DYNAMIC_LOOKUP_ORDINAL:
2823 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
2824
2825 case EXECUTABLE_ORDINAL:
2826 return BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
2827 }
2828
2829 return libIndex;
2830 }
2831
2832 bool MachOParser::doExternalReloc(Diagnostics& diag, uint32_t r_address, uint32_t r_symbolnum, LinkEditInfo& leInfo, bool& stop,
2833 void (^handler)(uint32_t dataSegIndex, uint64_t dataSegOffset, uint8_t type, int libOrdinal,
2834 uint64_t addend, const char* symbolName, bool weakImport, bool lazy, bool& stop)) const
2835 {
2836 const bool firstWritable = (header()->cputype == CPU_TYPE_X86_64);
2837 const bool is64Bit = is64();
2838 __block uint64_t relocBaseAddress = 0;
2839 __block bool baseFound = false;
2840 __block uint32_t segIndex = 0;
2841 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool &stopSeg) {
2842 if ( !baseFound ) {
2843 if ( !firstWritable || (protections & VM_PROT_WRITE) ) {
2844 baseFound = true;
2845 relocBaseAddress = vmAddr;
2846 }
2847 }
2848 if ( baseFound && (vmAddr < relocBaseAddress+r_address) && (relocBaseAddress+r_address < vmAddr+vmSize) ) {
2849 uint8_t type = BIND_TYPE_POINTER;
2850 uint64_t segOffset = relocBaseAddress + r_address - vmAddr;
2851 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
2852 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
2853 const struct nlist* symbols32 = (struct nlist*)symbolTable;
2854 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
2855 uint32_t symCount = leInfo.symTab->nsyms;
2856 uint32_t poolSize = leInfo.symTab->strsize;
2857 if ( r_symbolnum < symCount ) {
2858 uint16_t n_desc = is64Bit ? symbols64[r_symbolnum].n_desc : symbols32[r_symbolnum].n_desc;
2859 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
2860 uint32_t strOffset = is64Bit ? symbols64[r_symbolnum].n_un.n_strx : symbols32[r_symbolnum].n_un.n_strx;
2861 if ( strOffset < poolSize ) {
2862 const char* symbolName = stringPool + strOffset;
2863 bool weakImport = (n_desc & N_WEAK_REF);
2864 bool lazy = false;
2865 uint64_t addend = is64Bit ? (*((uint64_t*)((char*)header()+fileOffset+segOffset))) : (*((uint32_t*)((char*)header()+fileOffset+segOffset)));
2866 handler(segIndex, segOffset, type, libOrdinal, addend, symbolName, weakImport, lazy, stop);
2867 stopSeg = true;
2868 }
2869 }
2870 }
2871 ++segIndex;
2872 });
2873
2874 return false;
2875 }
2876
2877 bool MachOParser::invalidBindState(Diagnostics& diag, const char* opcodeName, const LinkEditInfo& leInfo, bool segIndexSet, bool libraryOrdinalSet,
2878 uint32_t dylibCount, int libOrdinal, uint32_t pointerSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, const char* symbolName) const
2879 {
2880 if ( !segIndexSet ) {
2881 diag.error("%s missing preceding BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", opcodeName);
2882 return true;
2883 }
2884 if ( segmentIndex >= leInfo.layout.segmentCount ) {
2885 diag.error("%s segment index %d too large", opcodeName, segmentIndex);
2886 return true;
2887 }
2888 if ( segmentOffset > (leInfo.layout.segments[segmentIndex].segSize-pointerSize) ) {
2889 diag.error("%s current segment offset 0x%08llX beyond segment size (0x%08llX)", opcodeName, segmentOffset, leInfo.layout.segments[segmentIndex].segSize);
2890 return true;
2891 }
2892 if ( symbolName == NULL ) {
2893 diag.error("%s missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", opcodeName);
2894 return true;
2895 }
2896 if ( !libraryOrdinalSet ) {
2897 diag.error("%s missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL", opcodeName);
2898 return true;
2899 }
2900 if ( libOrdinal > (int)dylibCount ) {
2901 diag.error("%s has library ordinal too large (%d) max (%d)", opcodeName, libOrdinal, dylibCount);
2902 return true;
2903 }
2904 if ( libOrdinal < -2 ) {
2905 diag.error("%s has unknown library special ordinal (%d)", opcodeName, libOrdinal);
2906 return true;
2907 }
2908 switch ( type ) {
2909 case BIND_TYPE_POINTER:
2910 if ( !leInfo.layout.segments[segmentIndex].writable ) {
2911 diag.error("%s pointer bind is in non-writable segment", opcodeName);
2912 return true;
2913 }
2914 if ( leInfo.layout.segments[segmentIndex].executable ) {
2915 diag.error("%s pointer bind is in executable segment", opcodeName);
2916 return true;
2917 }
2918 break;
2919 case BIND_TYPE_TEXT_ABSOLUTE32:
2920 case BIND_TYPE_TEXT_PCREL32:
2921 if ( !leInfo.layout.segments[segmentIndex].textRelocsAllowed ) {
2922 diag.error("%s text bind is in segment that does not support text relocations", opcodeName);
2923 return true;
2924 }
2925 if ( leInfo.layout.segments[segmentIndex].writable ) {
2926 diag.error("%s text bind is in writable segment", opcodeName);
2927 return true;
2928 }
2929 if ( !leInfo.layout.segments[segmentIndex].executable ) {
2930 diag.error("%s pointer bind is in non-executable segment", opcodeName);
2931 return true;
2932 }
2933 break;
2934 default:
2935 diag.error("%s unknown bind type %d", opcodeName, type);
2936 return true;
2937 }
2938 return false;
2939 }
2940
2941 void MachOParser::forEachBind(Diagnostics& diag, void (^handler)(uint32_t dataSegIndex, uint64_t dataSegOffset, uint8_t type,
2942 int libOrdinal, uint64_t addend, const char* symbolName, bool weakImport, bool lazy, bool& stop)) const
2943 {
2944 LinkEditInfo leInfo;
2945 getLinkEditPointers(diag, leInfo);
2946 if ( diag.hasError() )
2947 return;
2948 const uint32_t dylibCount = dependentDylibCount();
2949
2950 if ( leInfo.dyldInfo != nullptr ) {
2951 // process bind opcodes
2952 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
2953 const uint8_t* end = p + leInfo.dyldInfo->bind_size;
2954 const uint32_t pointerSize = (is64() ? 8 : 4);
2955 uint8_t type = 0;
2956 uint64_t segmentOffset = 0;
2957 uint8_t segmentIndex = 0;
2958 const char* symbolName = NULL;
2959 int libraryOrdinal = 0;
2960 bool segIndexSet = false;
2961 bool libraryOrdinalSet = false;
2962
2963 int64_t addend = 0;
2964 uint64_t count;
2965 uint64_t skip;
2966 bool weakImport = false;
2967 bool done = false;
2968 bool stop = false;
2969 while ( !done && !stop && diag.noError() && (p < end) ) {
2970 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
2971 uint8_t opcode = *p & BIND_OPCODE_MASK;
2972 ++p;
2973 switch (opcode) {
2974 case BIND_OPCODE_DONE:
2975 done = true;
2976 break;
2977 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
2978 libraryOrdinal = immediate;
2979 libraryOrdinalSet = true;
2980 break;
2981 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
2982 libraryOrdinal = (int)read_uleb128(diag, p, end);
2983 libraryOrdinalSet = true;
2984 break;
2985 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
2986 // the special ordinals are negative numbers
2987 if ( immediate == 0 )
2988 libraryOrdinal = 0;
2989 else {
2990 int8_t signExtended = BIND_OPCODE_MASK | immediate;
2991 libraryOrdinal = signExtended;
2992 }
2993 libraryOrdinalSet = true;
2994 break;
2995 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
2996 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
2997 symbolName = (char*)p;
2998 while (*p != '\0')
2999 ++p;
3000 ++p;
3001 break;
3002 case BIND_OPCODE_SET_TYPE_IMM:
3003 type = immediate;
3004 break;
3005 case BIND_OPCODE_SET_ADDEND_SLEB:
3006 addend = read_sleb128(diag, p, end);
3007 break;
3008 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
3009 segmentIndex = immediate;
3010 segmentOffset = read_uleb128(diag, p, end);
3011 segIndexSet = true;
3012 break;
3013 case BIND_OPCODE_ADD_ADDR_ULEB:
3014 segmentOffset += read_uleb128(diag, p, end);
3015 break;
3016 case BIND_OPCODE_DO_BIND:
3017 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND", leInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3018 return;
3019 handler(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, weakImport, false, stop);
3020 segmentOffset += pointerSize;
3021 break;
3022 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
3023 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", leInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3024 return;
3025 handler(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, weakImport, false, stop);
3026 segmentOffset += read_uleb128(diag, p, end) + pointerSize;
3027 break;
3028 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
3029 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", leInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3030 return;
3031 handler(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, weakImport, false, stop);
3032 segmentOffset += immediate*pointerSize + pointerSize;
3033 break;
3034 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
3035 count = read_uleb128(diag, p, end);
3036 skip = read_uleb128(diag, p, end);
3037 for (uint32_t i=0; i < count; ++i) {
3038 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", leInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3039 return;
3040 handler(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, weakImport, false, stop);
3041 segmentOffset += skip + pointerSize;
3042 }
3043 break;
3044 default:
3045 diag.error("bad bind opcode 0x%02X", *p);
3046 }
3047 }
3048 if ( diag.hasError() || stop )
3049 return;
3050 // process lazy bind opcodes
3051 if ( leInfo.dyldInfo->lazy_bind_size != 0 ) {
3052 p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->lazy_bind_off);
3053 end = p + leInfo.dyldInfo->lazy_bind_size;
3054 type = BIND_TYPE_POINTER;
3055 segmentOffset = 0;
3056 segmentIndex = 0;
3057 symbolName = NULL;
3058 libraryOrdinal = 0;
3059 segIndexSet = false;
3060 libraryOrdinalSet= false;
3061 addend = 0;
3062 weakImport = false;
3063 stop = false;
3064 while ( !stop && diag.noError() && (p < end) ) {
3065 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
3066 uint8_t opcode = *p & BIND_OPCODE_MASK;
3067 ++p;
3068 switch (opcode) {
3069 case BIND_OPCODE_DONE:
3070 // this opcode marks the end of each lazy pointer binding
3071 break;
3072 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
3073 libraryOrdinal = immediate;
3074 libraryOrdinalSet = true;
3075 break;
3076 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
3077 libraryOrdinal = (int)read_uleb128(diag, p, end);
3078 libraryOrdinalSet = true;
3079 break;
3080 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
3081 // the special ordinals are negative numbers
3082 if ( immediate == 0 )
3083 libraryOrdinal = 0;
3084 else {
3085 int8_t signExtended = BIND_OPCODE_MASK | immediate;
3086 libraryOrdinal = signExtended;
3087 }
3088 libraryOrdinalSet = true;
3089 break;
3090 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
3091 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
3092 symbolName = (char*)p;
3093 while (*p != '\0')
3094 ++p;
3095 ++p;
3096 break;
3097 case BIND_OPCODE_SET_ADDEND_SLEB:
3098 addend = read_sleb128(diag, p, end);
3099 break;
3100 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
3101 segmentIndex = immediate;
3102 segmentOffset = read_uleb128(diag, p, end);
3103 segIndexSet = true;
3104 break;
3105 case BIND_OPCODE_DO_BIND:
3106 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND", leInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3107 return;
3108 handler(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, weakImport, true, stop);
3109 segmentOffset += pointerSize;
3110 break;
3111 case BIND_OPCODE_SET_TYPE_IMM:
3112 case BIND_OPCODE_ADD_ADDR_ULEB:
3113 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
3114 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
3115 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
3116 default:
3117 diag.error("bad lazy bind opcode 0x%02X", opcode);
3118 break;
3119 }
3120 }
3121 }
3122 }
3123 else {
3124 // old binary, first process relocation
3125 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->extreloff);
3126 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nextrel];
3127 bool stop = false;
3128 const uint8_t relocSize = (is64() ? 3 : 2);
3129 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
3130 if ( reloc->r_length != relocSize ) {
3131 diag.error("external relocation has wrong r_length");
3132 break;
3133 }
3134 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
3135 diag.error("external relocation has wrong r_type");
3136 break;
3137 }
3138 doExternalReloc(diag, reloc->r_address, reloc->r_symbolnum, leInfo, stop, handler);
3139 }
3140 // then process indirect symbols
3141 forEachIndirectPointer(diag, ^(uint32_t segIndex, uint64_t segOffset, bool bind, int bindLibOrdinal,
3142 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
3143 if ( bind )
3144 handler(segIndex, segOffset, (selfModifyingStub ? BIND_TYPE_IMPORT_JMP_REL32 : BIND_TYPE_POINTER), bindLibOrdinal, 0, bindSymbolName, bindWeakImport, bindLazy, indStop);
3145 });
3146 }
3147 }
3148
3149
3150 void MachOParser::forEachWeakDef(Diagnostics& diag, void (^handler)(bool strongDef, uint32_t dataSegIndex, uint64_t dataSegOffset,
3151 uint64_t addend, const char* symbolName, bool& stop)) const
3152 {
3153 LinkEditInfo leInfo;
3154 getLinkEditPointers(diag, leInfo);
3155 if ( diag.hasError() )
3156 return;
3157
3158 const uint32_t dylibCount = dependentDylibCount();
3159 if ( leInfo.dyldInfo != nullptr ) {
3160 // process weak bind opcodes
3161 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->weak_bind_off);
3162 const uint8_t* end = p + leInfo.dyldInfo->weak_bind_size;
3163 const uint32_t pointerSize = (is64() ? 8 : 4);
3164 uint8_t type = 0;
3165 uint64_t segmentOffset = 0;
3166 uint8_t segmentIndex = 0;
3167 const char* symbolName = NULL;
3168 int64_t addend = 0;
3169 uint64_t count;
3170 uint64_t skip;
3171 bool segIndexSet = false;
3172 bool done = false;
3173 bool stop = false;
3174 while ( !done && !stop && diag.noError() && (p < end) ) {
3175 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
3176 uint8_t opcode = *p & BIND_OPCODE_MASK;
3177 ++p;
3178 switch (opcode) {
3179 case BIND_OPCODE_DONE:
3180 done = true;
3181 break;
3182 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
3183 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
3184 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
3185 diag.error("unexpected dylib ordinal in weak binding info");
3186 return;
3187 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
3188 symbolName = (char*)p;
3189 while (*p != '\0')
3190 ++p;
3191 ++p;
3192 if ( (immediate & BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) != 0 )
3193 handler(true, 0, 0, 0, symbolName, stop);
3194 break;
3195 case BIND_OPCODE_SET_TYPE_IMM:
3196 type = immediate;
3197 break;
3198 case BIND_OPCODE_SET_ADDEND_SLEB:
3199 addend = read_sleb128(diag, p, end);
3200 break;
3201 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
3202 segmentIndex = immediate;
3203 segmentOffset = read_uleb128(diag, p, end);
3204 segIndexSet = true;
3205 break;
3206 case BIND_OPCODE_ADD_ADDR_ULEB:
3207 segmentOffset += read_uleb128(diag, p, end);
3208 break;
3209 case BIND_OPCODE_DO_BIND:
3210 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND", leInfo, segIndexSet, true, dylibCount, -2, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3211 return;
3212 handler(false, segmentIndex, segmentOffset, addend, symbolName, stop);
3213 segmentOffset += pointerSize;
3214 break;
3215 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
3216 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND", leInfo, segIndexSet, true, dylibCount, -2, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3217 return;
3218 handler(false, segmentIndex, segmentOffset, addend, symbolName, stop);
3219 segmentOffset += read_uleb128(diag, p, end) + pointerSize;
3220 break;
3221 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
3222 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND", leInfo, segIndexSet, true, dylibCount, -2, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3223 return;
3224 handler(false, segmentIndex, segmentOffset, addend, symbolName, stop);
3225 segmentOffset += immediate*pointerSize + pointerSize;
3226 break;
3227 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
3228 count = read_uleb128(diag, p, end);
3229 skip = read_uleb128(diag, p, end);
3230 for (uint32_t i=0; i < count; ++i) {
3231 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND", leInfo, segIndexSet, true, dylibCount, -2, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3232 return;
3233 handler(false, segmentIndex, segmentOffset, addend, symbolName, stop);
3234 segmentOffset += skip + pointerSize;
3235 }
3236 break;
3237 default:
3238 diag.error("bad weak bind opcode 0x%02X", *p);
3239 }
3240 }
3241 if ( diag.hasError() || stop )
3242 return;
3243 }
3244 else {
3245 // old binary
3246 //assert(0 && "weak defs not supported for old binaries yet");
3247 }
3248 }
3249
3250
3251
3252 void MachOParser::forEachIndirectPointer(Diagnostics& diag, void (^handler)(uint32_t dataSegIndex, uint64_t dataSegOffset, bool bind, int bindLibOrdinal,
3253 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& stop)) const
3254 {
3255 LinkEditInfo leInfo;
3256 getLinkEditPointers(diag, leInfo);
3257 if ( diag.hasError() )
3258 return;
3259
3260 // find lazy and non-lazy pointer sections
3261 const bool is64Bit = is64();
3262 const uint32_t* const indirectSymbolTable = (uint32_t*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->indirectsymoff);
3263 const uint32_t indirectSymbolTableCount = leInfo.dynSymTab->nindirectsyms;
3264 const uint32_t pointerSize = is64Bit ? 8 : 4;
3265 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
3266 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
3267 const struct nlist* symbols32 = (struct nlist*)symbolTable;
3268 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
3269 uint32_t symCount = leInfo.symTab->nsyms;
3270 uint32_t poolSize = leInfo.symTab->strsize;
3271 __block bool stop = false;
3272 forEachSection(^(const char* segName, const char* sectionName, uint32_t flags, uint64_t addr, const void* content,
3273 uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& sectionStop) {
3274 uint8_t sectionType = (flags & SECTION_TYPE);
3275 if ( (sectionType != S_LAZY_SYMBOL_POINTERS) && (sectionType != S_NON_LAZY_SYMBOL_POINTERS) && (sectionType != S_SYMBOL_STUBS) )
3276 return;
3277 bool selfModifyingStub = (sectionType == S_SYMBOL_STUBS) && (flags & S_ATTR_SELF_MODIFYING_CODE) && (reserved2 == 5) && (header()->cputype == CPU_TYPE_I386);
3278 if ( (flags & S_ATTR_SELF_MODIFYING_CODE) && !selfModifyingStub ) {
3279 diag.error("S_ATTR_SELF_MODIFYING_CODE section type only valid in old i386 binaries");
3280 sectionStop = true;
3281 return;
3282 }
3283 uint32_t elementSize = selfModifyingStub ? reserved2 : pointerSize;
3284 uint32_t elementCount = (uint32_t)(size/elementSize);
3285 if (greaterThanAddOrOverflow(reserved1, elementCount, indirectSymbolTableCount)) {
3286 diag.error("section %s overflows indirect symbol table", sectionName);
3287 sectionStop = true;
3288 return;
3289 }
3290 __block uint32_t index = 0;
3291 __block uint32_t segIndex = 0;
3292 __block uint64_t sectionSegOffset;
3293 forEachSegment(^(const char* segmentName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool &segStop) {
3294 if ( (vmAddr <= addr) && (addr < vmAddr+vmSize) ) {
3295 sectionSegOffset = addr - vmAddr;
3296 segIndex = index;
3297 segStop = true;
3298 }
3299 ++index;
3300 });
3301
3302 for (int i=0; (i < elementCount) && !stop; ++i) {
3303 uint32_t symNum = indirectSymbolTable[reserved1 + i];
3304 if ( symNum == INDIRECT_SYMBOL_ABS )
3305 continue;
3306 uint64_t segOffset = sectionSegOffset+i*elementSize;
3307 if ( symNum == INDIRECT_SYMBOL_LOCAL ) {
3308 handler(segIndex, segOffset, false, 0, "", false, false, false, stop);
3309 continue;
3310 }
3311 if ( symNum > symCount ) {
3312 diag.error("indirect symbol[%d] = %d which is invalid symbol index", reserved1 + i, symNum);
3313 sectionStop = true;
3314 return;
3315 }
3316 uint16_t n_desc = is64Bit ? symbols64[symNum].n_desc : symbols32[symNum].n_desc;
3317 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
3318 uint32_t strOffset = is64Bit ? symbols64[symNum].n_un.n_strx : symbols32[symNum].n_un.n_strx;
3319 if ( strOffset > poolSize ) {
3320 diag.error("symbol[%d] string offset out of range", reserved1 + i);
3321 sectionStop = true;
3322 return;
3323 }
3324 const char* symbolName = stringPool + strOffset;
3325 bool weakImport = (n_desc & N_WEAK_REF);
3326 bool lazy = (sectionType == S_LAZY_SYMBOL_POINTERS);
3327 handler(segIndex, segOffset, true, libOrdinal, symbolName, weakImport, lazy, selfModifyingStub, stop);
3328 }
3329 sectionStop = stop;
3330 });
3331 }
3332
3333 void MachOParser::forEachInterposingTuple(Diagnostics& diag, void (^handler)(uint32_t segIndex, uint64_t replacementSegOffset, uint64_t replaceeSegOffset, uint64_t replacementContent, bool& stop)) const
3334 {
3335 const bool is64Bit = is64();
3336 const unsigned entrySize = is64Bit ? 16 : 8;
3337 const unsigned pointerSize = is64Bit ? 8 : 4;
3338 forEachSection(^(const char* segmentName, const char* sectionName, uint32_t flags, uint64_t addr, c