]> git.saurik.com Git - apple/dyld.git/blame - dyld3/MachOParser.cpp
dyld-519.2.1.tar.gz
[apple/dyld.git] / dyld3 / MachOParser.cpp
CommitLineData
10b92d3b
A
1/*
2 * Copyright (c) 2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24
25
26#include <stdint.h>
27#include <string.h>
28#include <assert.h>
29#include <uuid/uuid.h>
30#include <fcntl.h>
31#include <errno.h>
32#include <unistd.h>
33#include <sys/uio.h>
34#include <sys/param.h>
35#include <sys/sysctl.h>
36#include <sys/resource.h>
37#include <sys/types.h>
38#include <sys/stat.h>
39#include <sys/mman.h>
40#include <rootless.h>
41#include <dirent.h>
42#include <mach/mach.h>
43#include <mach/machine.h>
44#include <mach-o/loader.h>
45#include <mach-o/nlist.h>
46#include <mach-o/fat.h>
47#include <mach-o/reloc.h>
48#include <mach-o/dyld_priv.h>
49#include <CommonCrypto/CommonDigest.h>
50
51#if !DYLD_IN_PROCESS
52#include <dlfcn.h>
53#endif
54
55#include "MachOParser.h"
56#include "Logging.h"
57#include "CodeSigningTypes.h"
58#include "DyldSharedCache.h"
59#include "Trie.hpp"
60
61#if DYLD_IN_PROCESS
62 #include "APIs.h"
63#else
64 #include "StringUtils.h"
65#endif
66
67
68
69#ifndef EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE
70 #define EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE 0x02
71#endif
72
73#ifndef CPU_SUBTYPE_ARM64_E
74 #define CPU_SUBTYPE_ARM64_E 2
75#endif
76
77#ifndef LC_BUILD_VERSION
78 #define LC_BUILD_VERSION 0x32 /* build for platform min OS version */
79
80 /*
81 * The build_version_command contains the min OS version on which this
82 * binary was built to run for its platform. The list of known platforms and
83 * tool values following it.
84 */
85 struct build_version_command {
86 uint32_t cmd; /* LC_BUILD_VERSION */
87 uint32_t cmdsize; /* sizeof(struct build_version_command) plus */
88 /* ntools * sizeof(struct build_tool_version) */
89 uint32_t platform; /* platform */
90 uint32_t minos; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
91 uint32_t sdk; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
92 uint32_t ntools; /* number of tool entries following this */
93 };
94
95 struct build_tool_version {
96 uint32_t tool; /* enum for the tool */
97 uint32_t version; /* version number of the tool */
98 };
99
100 /* Known values for the platform field above. */
101 #define PLATFORM_MACOS 1
102 #define PLATFORM_IOS 2
103 #define PLATFORM_TVOS 3
104 #define PLATFORM_WATCHOS 4
105 #define PLATFORM_BRIDGEOS 5
106
107 /* Known values for the tool field above. */
108 #define TOOL_CLANG 1
109 #define TOOL_SWIFT 2
110 #define TOOL_LD 3
111#endif
112
113
114namespace dyld3 {
115
116
117bool FatUtil::isFatFile(const void* fileStart)
118{
119 const fat_header* fileStartAsFat = (fat_header*)fileStart;
120 return ( fileStartAsFat->magic == OSSwapBigToHostInt32(FAT_MAGIC) );
121}
122
123/// Returns true if (addLHS + addRHS) > b, or if the add overflowed
124template<typename T>
125static bool greaterThanAddOrOverflow(uint32_t addLHS, uint32_t addRHS, T b) {
126 return (addLHS > b) || (addRHS > (b-addLHS));
127}
128
129/// Returns true if (addLHS + addRHS) > b, or if the add overflowed
130template<typename T>
131static bool greaterThanAddOrOverflow(uint64_t addLHS, uint64_t addRHS, T b) {
132 return (addLHS > b) || (addRHS > (b-addLHS));
133}
134
135void FatUtil::forEachSlice(Diagnostics& diag, const void* fileContent, size_t fileLen, void (^callback)(uint32_t sliceCpuType, uint32_t sliceCpuSubType, const void* sliceStart, size_t sliceSize, bool& stop))
136{
137 const fat_header* fh = (fat_header*)fileContent;
138 if ( fh->magic != OSSwapBigToHostInt32(FAT_MAGIC) ) {
139 diag.error("not a fat file");
140 return;
141 }
142
143 if ( OSSwapBigToHostInt32(fh->nfat_arch) > ((4096 - sizeof(fat_header)) / sizeof(fat_arch)) ) {
144 diag.error("fat header too large: %u entries", OSSwapBigToHostInt32(fh->nfat_arch));
145 }
146 const fat_arch* const archs = (fat_arch*)(((char*)fh)+sizeof(fat_header));
147 bool stop = false;
148 for (uint32_t i=0; i < OSSwapBigToHostInt32(fh->nfat_arch); ++i) {
149 uint32_t cpuType = OSSwapBigToHostInt32(archs[i].cputype);
150 uint32_t cpuSubType = OSSwapBigToHostInt32(archs[i].cpusubtype);
151 uint32_t offset = OSSwapBigToHostInt32(archs[i].offset);
152 uint32_t len = OSSwapBigToHostInt32(archs[i].size);
153 if (greaterThanAddOrOverflow(offset, len, fileLen)) {
154 diag.error("slice %d extends beyond end of file", i);
155 return;
156 }
157 callback(cpuType, cpuSubType, (uint8_t*)fileContent+offset, len, stop);
158 if ( stop )
159 break;
160 }
161}
162
163#if !DYLD_IN_PROCESS
164bool FatUtil::isFatFileWithSlice(Diagnostics& diag, const void* fileContent, size_t fileLen, const std::string& archName, size_t& sliceOffset, size_t& sliceLen, bool& missingSlice)
165{
166 missingSlice = false;
167 if ( !isFatFile(fileContent) )
168 return false;
169
170 __block bool found = false;
171 forEachSlice(diag, fileContent, fileLen, ^(uint32_t sliceCpuType, uint32_t sliceCpuSubType, const void* sliceStart, size_t sliceSize, bool& stop) {
172 std::string sliceArchName = MachOParser::archName(sliceCpuType, sliceCpuSubType);
173 if ( sliceArchName == archName ) {
174 sliceOffset = (char*)sliceStart - (char*)fileContent;
175 sliceLen = sliceSize;
176 found = true;
177 stop = true;
178 }
179 });
180 if ( diag.hasError() )
181 return false;
182
183 if ( !found )
184 missingSlice = true;
185
186 // when looking for x86_64h fallback to x86_64
187 if ( !found && (archName == "x86_64h") )
188 return isFatFileWithSlice(diag, fileContent, fileLen, "x86_64", sliceOffset, sliceLen, missingSlice);
189
190 return found;
191}
192
193#endif
194
195MachOParser::MachOParser(const mach_header* mh, bool dyldCacheIsRaw)
196{
197#if DYLD_IN_PROCESS
198 // assume all in-process mach_headers are real loaded images
199 _data = (long)mh;
200#else
201 if (mh == nullptr)
202 return;
203 _data = (long)mh;
204 if ( (mh->flags & 0x80000000) == 0 ) {
205 // asssume out-of-process mach_header not in a dyld cache are raw mapped files
206 _data |= 1;
207 }
208 else {
209 // out-of-process mach_header in a dyld cache are not raw, but cache may be raw
210 if ( dyldCacheIsRaw )
211 _data |= 2;
212 }
213#endif
214}
215
216const mach_header* MachOParser::header() const
217{
218 return (mach_header*)(_data & -4);
219}
220
221// "raw" means the whole mach-o file was mapped as one contiguous region
222// not-raw means the the mach-o file was mapped like dyld does - with zero fill expansion
223bool MachOParser::isRaw() const
224{
225 return (_data & 1);
226}
227
228// A raw dyld cache is when the whole dyld cache file is mapped in one contiguous region
229// not-raw manes the dyld cache was mapped as it is at runtime with padding between regions
230bool MachOParser::inRawCache() const
231{
232 return (_data & 2);
233}
234
235uint32_t MachOParser::fileType() const
236{
237 return header()->filetype;
238}
239
240bool MachOParser::inDyldCache() const
241{
242 return (header()->flags & 0x80000000);
243}
244
245bool MachOParser::hasThreadLocalVariables() const
246{
247 return (header()->flags & MH_HAS_TLV_DESCRIPTORS);
248}
249
250Platform MachOParser::platform() const
251{
252 Platform platform;
253 uint32_t minOS;
254 uint32_t sdk;
255 if ( getPlatformAndVersion(&platform, &minOS, &sdk) )
256 return platform;
257
258 // old binary with no explict load command to mark platform, look at arch
259 switch ( header()->cputype ) {
260 case CPU_TYPE_X86_64:
261 case CPU_TYPE_I386:
262 return Platform::macOS;
263 case CPU_TYPE_ARM64:
264 case CPU_TYPE_ARM:
265 return Platform::iOS;
266 }
267 return Platform::macOS;
268}
269
270
271#if !DYLD_IN_PROCESS
272
273const MachOParser::ArchInfo MachOParser::_s_archInfos[] = {
274 { "x86_64", CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL },
275 { "x86_64h", CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_H },
276 { "i386", CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL },
277 { "arm64", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL },
278 { "arm64e", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_E },
279 { "armv7k", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7K },
280 { "armv7s", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S },
281 { "armv7", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7 }
282};
283
284bool MachOParser::isValidMachO(Diagnostics& diag, const std::string& archName, Platform platform, const void* fileContent, size_t fileLength, const std::string& pathOpened, bool ignoreMainExecutables)
285{
286 // must start with mach-o magic value
287 const mach_header* mh = (const mach_header*)fileContent;
288 if ( (mh->magic != MH_MAGIC) && (mh->magic != MH_MAGIC_64) ) {
289 diag.warning("could not use '%s' because it is not a mach-o file", pathOpened.c_str());
290 return false;
291 }
292
293 // must match requested architecture if specified
294 if (!archName.empty() && !isArch(mh, archName)) {
295 // except when looking for x86_64h, fallback to x86_64
296 if ( (archName != "x86_64h") || !isArch(mh, "x86_64") ) {
297 diag.warning("could not use '%s' because it does not contain required architecture %s", pathOpened.c_str(), archName.c_str());
298 return false;
299 }
300 }
301
302 // must be a filetype dyld can load
303 switch ( mh->filetype ) {
304 case MH_EXECUTE:
305 if ( ignoreMainExecutables )
306 return false;
307 break;
308 case MH_DYLIB:
309 case MH_BUNDLE:
310 break;
311 default:
312 diag.warning("could not use '%s' because it is not a dylib, bundle, or executable", pathOpened.c_str());
313 return false;
314 }
315
316 // must be from a file - not in the dyld shared cache
317 if ( mh->flags & 0x80000000 ) {
318 diag.warning("could not use '%s' because the high bit of mach_header flags is reserved for images in dyld cache", pathOpened.c_str());
319 return false;
320 }
321
322 // validate load commands structure
323 MachOParser parser(mh);
324 if ( !parser.validLoadCommands(diag, fileLength) )
325 return false;
326
327 // must match requested platform
328 if ( parser.platform() != platform ) {
329 diag.warning("could not use '%s' because it was built for a different platform", pathOpened.c_str());
330 return false;
331 }
332
333 // cannot be a static executable
334 if ( (mh->filetype == MH_EXECUTE) && !parser.isDynamicExecutable() ) {
335 diag.warning("could not use '%s' because it is a static executable", pathOpened.c_str());
336 return false;
337 }
338
339 // validate dylib loads
340 if ( !parser.validEmbeddedPaths(diag) )
341 return false;
342
343 // validate segments
344 if ( !parser.validSegments(diag, fileLength) )
345 return false;
346
347 // validate LINKEDIT layout
348 if ( !parser.validLinkeditLayout(diag) )
349 return false;
350
351 return true;
352}
353
354
355bool MachOParser::validLoadCommands(Diagnostics& diag, size_t fileLen)
356{
357 // check load command don't exceed file length
358 if ( header()->sizeofcmds + sizeof(mach_header_64) > fileLen ) {
359 diag.warning("load commands exceed length of file");
360 return false;
361 }
362 // walk all load commands and sanity check them
363 Diagnostics walkDiag;
364 LinkEditInfo lePointers;
365 getLinkEditLoadCommands(walkDiag, lePointers);
366 if ( walkDiag.hasError() ) {
367 diag.warning("%s", walkDiag.errorMessage().c_str());
368 return false;
369 }
370
371 // check load commands fit in TEXT segment
372 __block bool overflowText = false;
373 forEachSegment(^(const char* segName, uint32_t segFileOffset, uint32_t segFileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
374 if ( strcmp(segName, "__TEXT") == 0 ) {
375 if ( header()->sizeofcmds + sizeof(mach_header_64) > segFileSize ) {
376 diag.warning("load commands exceed length of __TEXT segment");
377 overflowText = true;
378 }
379 stop = true;
380 }
381 });
382 if ( overflowText )
383 return false;
384
385 return true;
386}
387
388bool MachOParser::validEmbeddedPaths(Diagnostics& diag)
389{
390 __block int index = 1;
391 __block bool allGood = true;
392 __block bool foundInstallName = false;
393 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
394 const dylib_command* dylibCmd;
395 const rpath_command* rpathCmd;
396 switch ( cmd->cmd ) {
397 case LC_ID_DYLIB:
398 foundInstallName = true;
399 // fall through
400 case LC_LOAD_DYLIB:
401 case LC_LOAD_WEAK_DYLIB:
402 case LC_REEXPORT_DYLIB:
403 case LC_LOAD_UPWARD_DYLIB:
404 dylibCmd = (dylib_command*)cmd;
405 if ( dylibCmd->dylib.name.offset > cmd->cmdsize ) {
406 diag.warning("load command #%d name offset (%u) outside its size (%u)", index, dylibCmd->dylib.name.offset, cmd->cmdsize);
407 stop = true;
408 allGood = false;
409 }
410 else {
411 bool foundEnd = false;
412 const char* start = (char*)dylibCmd + dylibCmd->dylib.name.offset;
413 const char* end = (char*)dylibCmd + cmd->cmdsize;
414 for (const char* s=start; s < end; ++s) {
415 if ( *s == '\0' ) {
416 foundEnd = true;
417 break;
418 }
419 }
420 if ( !foundEnd ) {
421 diag.warning("load command #%d string extends beyond end of load command", index);
422 stop = true;
423 allGood = false;
424 }
425 }
426 break;
427 case LC_RPATH:
428 rpathCmd = (rpath_command*)cmd;
429 if ( rpathCmd->path.offset > cmd->cmdsize ) {
430 diag.warning("load command #%d path offset (%u) outside its size (%u)", index, rpathCmd->path.offset, cmd->cmdsize);
431 stop = true;
432 allGood = false;
433 }
434 else {
435 bool foundEnd = false;
436 const char* start = (char*)rpathCmd + rpathCmd->path.offset;
437 const char* end = (char*)rpathCmd + cmd->cmdsize;
438 for (const char* s=start; s < end; ++s) {
439 if ( *s == '\0' ) {
440 foundEnd = true;
441 break;
442 }
443 }
444 if ( !foundEnd ) {
445 diag.warning("load command #%d string extends beyond end of load command", index);
446 stop = true;
447 allGood = false;
448 }
449 }
450 break;
451 }
452 ++index;
453 });
454
455 if ( header()->filetype == MH_DYLIB ) {
456 if ( !foundInstallName ) {
457 diag.warning("MH_DYLIB is missing LC_ID_DYLIB");
458 allGood = false;
459 }
460 }
461 else {
462 if ( foundInstallName ) {
463 diag.warning("LC_ID_DYLIB found in non-MH_DYLIB");
464 allGood = false;
465 }
466 }
467
468 return allGood;
469}
470
471bool MachOParser::validSegments(Diagnostics& diag, size_t fileLen)
472{
473 // check segment load command size
474 __block bool badSegmentLoadCommand = false;
475 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
476 if ( cmd->cmd == LC_SEGMENT_64 ) {
477 const segment_command_64* seg = (segment_command_64*)cmd;
478 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command_64);
479 if ( sectionsSpace < 0 ) {
480 diag.warning("load command size too small for LC_SEGMENT_64");
481 badSegmentLoadCommand = true;
482 stop = true;
483 }
484 else if ( (sectionsSpace % sizeof(section_64)) != 0 ) {
485 diag.warning("segment load command size 0x%X will not fit whole number of sections", cmd->cmdsize);
486 badSegmentLoadCommand = true;
487 stop = true;
488 }
489 else if ( sectionsSpace != (seg->nsects * sizeof(section_64)) ) {
490 diag.warning("load command size 0x%X does not match nsects %d", cmd->cmdsize, seg->nsects);
491 badSegmentLoadCommand = true;
492 stop = true;
493 } else if (greaterThanAddOrOverflow(seg->fileoff, seg->filesize, fileLen)) {
494 diag.warning("segment load command content extends beyond end of file");
495 badSegmentLoadCommand = true;
496 stop = true;
497 } else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
498 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
499 diag.warning("segment filesize exceeds vmsize");
500 badSegmentLoadCommand = true;
501 stop = true;
502 }
503 }
504 else if ( cmd->cmd == LC_SEGMENT ) {
505 const segment_command* seg = (segment_command*)cmd;
506 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command);
507 if ( sectionsSpace < 0 ) {
508 diag.warning("load command size too small for LC_SEGMENT");
509 badSegmentLoadCommand = true;
510 stop = true;
511 }
512 else if ( (sectionsSpace % sizeof(section)) != 0 ) {
513 diag.warning("segment load command size 0x%X will not fit whole number of sections", cmd->cmdsize);
514 badSegmentLoadCommand = true;
515 stop = true;
516 }
517 else if ( sectionsSpace != (seg->nsects * sizeof(section)) ) {
518 diag.warning("load command size 0x%X does not match nsects %d", cmd->cmdsize, seg->nsects);
519 badSegmentLoadCommand = true;
520 stop = true;
521 } else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
522 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
523 diag.warning("segment filesize exceeds vmsize");
524 badSegmentLoadCommand = true;
525 stop = true;
526 }
527 }
528 });
529 if ( badSegmentLoadCommand )
530 return false;
531
532 // check mapping permissions of segments
533 __block bool badPermissions = false;
534 __block bool badSize = false;
535 __block bool hasTEXT = false;
536 __block bool hasLINKEDIT = false;
537 forEachSegment(^(const char* segName, uint32_t segFileOffset, uint32_t segFileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
538 if ( strcmp(segName, "__TEXT") == 0 ) {
539 if ( protections != (VM_PROT_READ|VM_PROT_EXECUTE) ) {
540 diag.warning("__TEXT segment permissions is not 'r-x'");
541 badPermissions = true;
542 stop = true;
543 }
544 hasTEXT = true;
545 }
546 else if ( strcmp(segName, "__LINKEDIT") == 0 ) {
547 if ( protections != VM_PROT_READ ) {
548 diag.warning("__LINKEDIT segment permissions is not 'r--'");
549 badPermissions = true;
550 stop = true;
551 }
552 hasLINKEDIT = true;
553 }
554 else if ( (protections & 0xFFFFFFF8) != 0 ) {
555 diag.warning("%s segment permissions has invalid bits set", segName);
556 badPermissions = true;
557 stop = true;
558 }
559 if (greaterThanAddOrOverflow(segFileOffset, segFileSize, fileLen)) {
560 diag.warning("%s segment content extends beyond end of file", segName);
561 badSize = true;
562 stop = true;
563 }
564 if ( is64() ) {
565 if ( vmAddr+vmSize < vmAddr ) {
566 diag.warning("%s segment vm range wraps", segName);
567 badSize = true;
568 stop = true;
569 }
570 }
571 else {
572 if ( (uint32_t)(vmAddr+vmSize) < (uint32_t)(vmAddr) ) {
573 diag.warning("%s segment vm range wraps", segName);
574 badSize = true;
575 stop = true;
576 }
577 }
578 });
579 if ( badPermissions || badSize )
580 return false;
581 if ( !hasTEXT ) {
582 diag.warning("missing __TEXT segment");
583 return false;
584 }
585 if ( !hasLINKEDIT ) {
586 diag.warning("missing __LINKEDIT segment");
587 return false;
588 }
589
590 // check for overlapping segments
591 __block bool badSegments = false;
592 forEachSegment(^(const char* seg1Name, uint32_t seg1FileOffset, uint32_t seg1FileSize, uint64_t seg1vmAddr, uint64_t seg1vmSize, uint8_t seg1Protections, uint32_t seg1Index, uint64_t seg1SizeOfSections, uint8_t seg1Align, bool& stop1) {
593 uint64_t seg1vmEnd = seg1vmAddr + seg1vmSize;
594 uint32_t seg1FileEnd = seg1FileOffset + seg1FileSize;
595 forEachSegment(^(const char* seg2Name, uint32_t seg2FileOffset, uint32_t seg2FileSize, uint64_t seg2vmAddr, uint64_t seg2vmSize, uint8_t seg2Protections, uint32_t seg2Index, uint64_t seg2SizeOfSections, uint8_t seg2Align, bool& stop2) {
596 if ( seg1Index == seg2Index )
597 return;
598 uint64_t seg2vmEnd = seg2vmAddr + seg2vmSize;
599 uint32_t seg2FileEnd = seg2FileOffset + seg2FileSize;
600 if ( ((seg2vmAddr <= seg1vmAddr) && (seg2vmEnd > seg1vmAddr) && (seg1vmEnd > seg1vmAddr)) || ((seg2vmAddr >= seg1vmAddr) && (seg2vmAddr < seg1vmEnd) && (seg2vmEnd > seg2vmAddr)) ) {
601 diag.warning("segment %s vm range overlaps segment %s", seg1Name, seg2Name);
602 badSegments = true;
603 stop1 = true;
604 stop2 = true;
605 }
606 if ( ((seg2FileOffset <= seg1FileOffset) && (seg2FileEnd > seg1FileOffset) && (seg1FileEnd > seg1FileOffset)) || ((seg2FileOffset >= seg1FileOffset) && (seg2FileOffset < seg1FileEnd) && (seg2FileEnd > seg2FileOffset)) ) {
607 diag.warning("segment %s file content overlaps segment %s", seg1Name, seg2Name);
608 badSegments = true;
609 stop1 = true;
610 stop2 = true;
611 }
612 // check for out of order segments
613 if ( (seg1Index < seg2Index) && !stop1 ) {
614 if ( (seg1vmAddr > seg2vmAddr) || ((seg1FileOffset > seg2FileOffset) && (seg1FileOffset != 0) && (seg2FileOffset != 0)) ){
615 diag.warning("segment load commands out of order with respect to layout for %s and %s", seg1Name, seg2Name);
616 badSegments = true;
617 stop1 = true;
618 stop2 = true;
619 }
620 }
621 });
622 });
623 if ( badSegments )
624 return false;
625
626 // check sections are within segment
627 __block bool badSections = false;
628 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
629 if ( cmd->cmd == LC_SEGMENT_64 ) {
630 const segment_command_64* seg = (segment_command_64*)cmd;
631 const section_64* const sectionsStart = (section_64*)((char*)seg + sizeof(struct segment_command_64));
632 const section_64* const sectionsEnd = &sectionsStart[seg->nsects];
633 for (const section_64* sect=sectionsStart; (sect < sectionsEnd); ++sect) {
634 if ( (int64_t)(sect->size) < 0 ) {
635 diag.warning("section %s size too large 0x%llX", sect->sectname, sect->size);
636 badSections = true;
637 }
638 else if ( sect->addr < seg->vmaddr ) {
639 diag.warning("section %s start address 0x%llX is before containing segment's address 0x%0llX", sect->sectname, sect->addr, seg->vmaddr);
640 badSections = true;
641 }
642 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
643 diag.warning("section %s end address 0x%llX is beyond containing segment's end address 0x%0llX", sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
644 badSections = true;
645 }
646 }
647 }
648 else if ( cmd->cmd == LC_SEGMENT ) {
649 const segment_command* seg = (segment_command*)cmd;
650 const section* const sectionsStart = (section*)((char*)seg + sizeof(struct segment_command));
651 const section* const sectionsEnd = &sectionsStart[seg->nsects];
652 for (const section* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) {
653 if ( (int64_t)(sect->size) < 0 ) {
654 diag.warning("section %s size too large 0x%X", sect->sectname, sect->size);
655 badSections = true;
656 }
657 else if ( sect->addr < seg->vmaddr ) {
658 diag.warning("section %s start address 0x%X is before containing segment's address 0x%0X", sect->sectname, sect->addr, seg->vmaddr);
659 badSections = true;
660 }
661 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
662 diag.warning("section %s end address 0x%X is beyond containing segment's end address 0x%0X", sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
663 badSections = true;
664 }
665 }
666 }
667 });
668
669 return !badSections;
670}
671
672struct LinkEditContent
673{
674 const char* name;
675 uint32_t stdOrder;
676 uint32_t fileOffsetStart;
677 uint32_t size;
678};
679
680
681
682bool MachOParser::validLinkeditLayout(Diagnostics& diag)
683{
684 LinkEditInfo leInfo;
685 getLinkEditPointers(diag, leInfo);
686 if ( diag.hasError() )
687 return false;
688 const bool is64Bit = is64();
689 const uint32_t pointerSize = (is64Bit ? 8 : 4);
690
691 // build vector of all blobs in LINKEDIT
692 std::vector<LinkEditContent> blobs;
693 if ( leInfo.dyldInfo != nullptr ) {
694 if ( leInfo.dyldInfo->rebase_size != 0 )
695 blobs.push_back({"rebase opcodes", 1, leInfo.dyldInfo->rebase_off, leInfo.dyldInfo->rebase_size});
696 if ( leInfo.dyldInfo->bind_size != 0 )
697 blobs.push_back({"bind opcodes", 2, leInfo.dyldInfo->bind_off, leInfo.dyldInfo->bind_size});
698 if ( leInfo.dyldInfo->weak_bind_size != 0 )
699 blobs.push_back({"weak bind opcodes", 3, leInfo.dyldInfo->weak_bind_off, leInfo.dyldInfo->weak_bind_size});
700 if ( leInfo.dyldInfo->lazy_bind_size != 0 )
701 blobs.push_back({"lazy bind opcodes", 4, leInfo.dyldInfo->lazy_bind_off, leInfo.dyldInfo->lazy_bind_size});
702 if ( leInfo.dyldInfo->export_size!= 0 )
703 blobs.push_back({"exports trie", 5, leInfo.dyldInfo->export_off, leInfo.dyldInfo->export_size});
704 }
705 if ( leInfo.dynSymTab != nullptr ) {
706 if ( leInfo.dynSymTab->nlocrel != 0 )
707 blobs.push_back({"local relocations", 6, leInfo.dynSymTab->locreloff, static_cast<uint32_t>(leInfo.dynSymTab->nlocrel*sizeof(relocation_info))});
708 if ( leInfo.dynSymTab->nextrel != 0 )
709 blobs.push_back({"external relocations", 11, leInfo.dynSymTab->extreloff, static_cast<uint32_t>(leInfo.dynSymTab->nextrel*sizeof(relocation_info))});
710 if ( leInfo.dynSymTab->nindirectsyms != 0 )
711 blobs.push_back({"indirect symbol table", 12, leInfo.dynSymTab->indirectsymoff, leInfo.dynSymTab->nindirectsyms*4});
712 }
713 if ( leInfo.splitSegInfo != nullptr ) {
714 if ( leInfo.splitSegInfo->datasize != 0 )
715 blobs.push_back({"shared cache info", 6, leInfo.splitSegInfo->dataoff, leInfo.splitSegInfo->datasize});
716 }
717 if ( leInfo.functionStarts != nullptr ) {
718 if ( leInfo.functionStarts->datasize != 0 )
719 blobs.push_back({"function starts", 7, leInfo.functionStarts->dataoff, leInfo.functionStarts->datasize});
720 }
721 if ( leInfo.dataInCode != nullptr ) {
722 if ( leInfo.dataInCode->datasize != 0 )
723 blobs.push_back({"data in code", 8, leInfo.dataInCode->dataoff, leInfo.dataInCode->datasize});
724 }
725 if ( leInfo.symTab != nullptr ) {
726 if ( leInfo.symTab->nsyms != 0 )
727 blobs.push_back({"symbol table", 10, leInfo.symTab->symoff, static_cast<uint32_t>(leInfo.symTab->nsyms*(is64Bit ? sizeof(nlist_64) : sizeof(struct nlist)))});
728 if ( leInfo.symTab->strsize != 0 )
729 blobs.push_back({"symbol table strings", 20, leInfo.symTab->stroff, leInfo.symTab->strsize});
730 }
731 if ( leInfo.codeSig != nullptr ) {
732 if ( leInfo.codeSig->datasize != 0 )
733 blobs.push_back({"code signature", 21, leInfo.codeSig->dataoff, leInfo.codeSig->datasize});
734 }
735
736 // check for bad combinations
737 if ( (leInfo.dyldInfo != nullptr) && (leInfo.dyldInfo->cmd == LC_DYLD_INFO_ONLY) && (leInfo.dynSymTab != nullptr) ) {
738 if ( leInfo.dynSymTab->nlocrel != 0 ) {
739 diag.error("malformed mach-o contains LC_DYLD_INFO_ONLY and local relocations");
740 return false;
741 }
742 if ( leInfo.dynSymTab->nextrel != 0 ) {
743 diag.error("malformed mach-o contains LC_DYLD_INFO_ONLY and external relocations");
744 return false;
745 }
746 }
747 if ( (leInfo.dyldInfo == nullptr) && (leInfo.dynSymTab == nullptr) ) {
748 diag.error("malformed mach-o misssing LC_DYLD_INFO and LC_DYSYMTAB");
749 return false;
750 }
751 if ( blobs.empty() ) {
752 diag.error("malformed mach-o misssing LINKEDIT");
753 return false;
754 }
755
756 // sort vector by file offset and error on overlaps
757 std::sort(blobs.begin(), blobs.end(), [&](const LinkEditContent& a, const LinkEditContent& b) {
758 return a.fileOffsetStart < b.fileOffsetStart;
759 });
760 uint32_t prevEnd = (uint32_t)(leInfo.layout.segments[leInfo.layout.linkeditSegIndex].fileOffset);
761 const char* prevName = "start of LINKEDIT";
762 for (const LinkEditContent& blob : blobs) {
763 if ( blob.fileOffsetStart < prevEnd ) {
764 diag.error("LINKEDIT overlap of %s and %s", prevName, blob.name);
765 return false;
766 }
767 prevEnd = blob.fileOffsetStart + blob.size;
768 prevName = blob.name;
769 }
770 const LinkEditContent& lastBlob = blobs.back();
771 uint32_t linkeditFileEnd = (uint32_t)(leInfo.layout.segments[leInfo.layout.linkeditSegIndex].fileOffset + leInfo.layout.segments[leInfo.layout.linkeditSegIndex].fileSize);
772 if (greaterThanAddOrOverflow(lastBlob.fileOffsetStart, lastBlob.size, linkeditFileEnd)) {
773 diag.error("LINKEDIT content '%s' extends beyond end of segment", lastBlob.name);
774 return false;
775 }
776
777 // sort vector by order and warn on non standard order or mis-alignment
778 std::sort(blobs.begin(), blobs.end(), [&](const LinkEditContent& a, const LinkEditContent& b) {
779 return a.stdOrder < b.stdOrder;
780 });
781 prevEnd = (uint32_t)(leInfo.layout.segments[leInfo.layout.linkeditSegIndex].fileOffset);
782 prevName = "start of LINKEDIT";
783 for (const LinkEditContent& blob : blobs) {
784 if ( ((blob.fileOffsetStart & (pointerSize-1)) != 0) && (blob.stdOrder != 20) ) // ok for "symbol table strings" to be mis-aligned
785 diag.warning("mis-aligned LINKEDIT content '%s'", blob.name);
786 if ( blob.fileOffsetStart < prevEnd ) {
787 diag.warning("LINKEDIT out of order %s", blob.name);
788 }
789 prevEnd = blob.fileOffsetStart;
790 prevName = blob.name;
791 }
792
793 // Check for invalid symbol table sizes
794 if ( leInfo.symTab != nullptr ) {
795 if ( leInfo.symTab->nsyms > 0x10000000 ) {
796 diag.error("malformed mach-o image: symbol table too large");
797 return false;
798 }
799 if ( leInfo.dynSymTab != nullptr ) {
800 // validate indirect symbol table
801 if ( leInfo.dynSymTab->nindirectsyms != 0 ) {
802 if ( leInfo.dynSymTab->nindirectsyms > 0x10000000 ) {
803 diag.error("malformed mach-o image: indirect symbol table too large");
804 return false;
805 }
806 }
807 if ( (leInfo.dynSymTab->nlocalsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->ilocalsym > leInfo.symTab->nsyms) ) {
808 diag.error("malformed mach-o image: indirect symbol table local symbol count exceeds total symbols");
809 return false;
810 }
811 if ( leInfo.dynSymTab->ilocalsym + leInfo.dynSymTab->nlocalsym < leInfo.dynSymTab->ilocalsym ) {
812 diag.error("malformed mach-o image: indirect symbol table local symbol count wraps");
813 return false;
814 }
815 if ( (leInfo.dynSymTab->nextdefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iextdefsym > leInfo.symTab->nsyms) ) {
816 diag.error("malformed mach-o image: indirect symbol table extern symbol count exceeds total symbols");
817 return false;
818 }
819 if ( leInfo.dynSymTab->iextdefsym + leInfo.dynSymTab->nextdefsym < leInfo.dynSymTab->iextdefsym ) {
820 diag.error("malformed mach-o image: indirect symbol table extern symbol count wraps");
821 return false;
822 }
823 if ( (leInfo.dynSymTab->nundefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iundefsym > leInfo.symTab->nsyms) ) {
824 diag.error("malformed mach-o image: indirect symbol table undefined symbol count exceeds total symbols");
825 return false;
826 }
827 if ( leInfo.dynSymTab->iundefsym + leInfo.dynSymTab->nundefsym < leInfo.dynSymTab->iundefsym ) {
828 diag.error("malformed mach-o image: indirect symbol table undefined symbol count wraps");
829 return false;
830 }
831 }
832 }
833
834 return true;
835}
836
837bool MachOParser::isArch(const mach_header* mh, const std::string& archName)
838{
839 for (const ArchInfo& info : _s_archInfos) {
840 if ( archName == info.name ) {
841 return ( (mh->cputype == info.cputype) && ((mh->cpusubtype & ~CPU_SUBTYPE_MASK) == info.cpusubtype) );
842 }
843 }
844 return false;
845}
846
847
848std::string MachOParser::archName(uint32_t cputype, uint32_t cpusubtype)
849{
850 for (const ArchInfo& info : _s_archInfos) {
851 if ( (cputype == info.cputype) && ((cpusubtype & ~CPU_SUBTYPE_MASK) == info.cpusubtype) ) {
852 return info.name;
853 }
854 }
855 return "unknown";
856}
857
858uint32_t MachOParser::cpuTypeFromArchName(const std::string& archName)
859{
860 for (const ArchInfo& info : _s_archInfos) {
861 if ( archName == info.name ) {
862 return info.cputype;
863 }
864 }
865 return 0;
866}
867
868uint32_t MachOParser::cpuSubtypeFromArchName(const std::string& archName)
869{
870 for (const ArchInfo& info : _s_archInfos) {
871 if ( archName == info.name ) {
872 return info.cpusubtype;
873 }
874 }
875 return 0;
876}
877
878std::string MachOParser::archName() const
879{
880 return archName(header()->cputype, header()->cpusubtype);
881}
882
883std::string MachOParser::platformName(Platform platform)
884{
885 switch ( platform ) {
886 case Platform::unknown:
887 return "unknown";
888 case Platform::macOS:
889 return "macOS";
890 case Platform::iOS:
891 return "iOS";
892 case Platform::tvOS:
893 return "tvOS";
894 case Platform::watchOS:
895 return "watchOS";
896 case Platform::bridgeOS:
897 return "bridgeOS";
898 }
899 return "unknown platform";
900}
901
902std::string MachOParser::versionString(uint32_t packedVersion)
903{
904 char buff[64];
905 sprintf(buff, "%d.%d.%d", (packedVersion >> 16), ((packedVersion >> 8) & 0xFF), (packedVersion & 0xFF));
906 return buff;
907}
908
909#else
910
911bool MachOParser::isMachO(Diagnostics& diag, const void* fileContent, size_t mappedLength)
912{
913 // sanity check length
914 if ( mappedLength < 4096 ) {
915 diag.error("file too short");
916 return false;
917 }
918
919 // must start with mach-o magic value
920 const mach_header* mh = (const mach_header*)fileContent;
921#if __LP64__
922 const uint32_t requiredMagic = MH_MAGIC_64;
923#else
924 const uint32_t requiredMagic = MH_MAGIC;
925#endif
926 if ( mh->magic != requiredMagic ) {
927 diag.error("not a mach-o file");
928 return false;
929 }
930
931#if __x86_64__
932 const uint32_t requiredCPU = CPU_TYPE_X86_64;
933#elif __i386__
934 const uint32_t requiredCPU = CPU_TYPE_I386;
935#elif __arm__
936 const uint32_t requiredCPU = CPU_TYPE_ARM;
937#elif __arm64__
938 const uint32_t requiredCPU = CPU_TYPE_ARM64;
939#else
940 #error unsupported architecture
941#endif
942 if ( mh->cputype != requiredCPU ) {
943 diag.error("wrong cpu type");
944 return false;
945 }
946
947 return true;
948}
949
950bool MachOParser::wellFormedMachHeaderAndLoadCommands(const mach_header* mh)
951{
952 const load_command* startCmds = nullptr;
953 if ( mh->magic == MH_MAGIC_64 )
954 startCmds = (load_command*)((char *)mh + sizeof(mach_header_64));
955 else if ( mh->magic == MH_MAGIC )
956 startCmds = (load_command*)((char *)mh + sizeof(mach_header));
957 else
958 return false; // not a mach-o file, or wrong endianness
959
960 const load_command* const cmdsEnd = (load_command*)((char*)startCmds + mh->sizeofcmds);
961 const load_command* cmd = startCmds;
962 for(uint32_t i = 0; i < mh->ncmds; ++i) {
963 const load_command* nextCmd = (load_command*)((char *)cmd + cmd->cmdsize);
964 if ( (cmd->cmdsize < 8) || (nextCmd > cmdsEnd) || (nextCmd < startCmds)) {
965 return false;
966 }
967 cmd = nextCmd;
968 }
969 return true;
970}
971
972#endif
973
974Platform MachOParser::currentPlatform()
975{
976#if TARGET_OS_BRIDGE
977 return Platform::bridgeOS;
978#elif TARGET_OS_WATCH
979 return Platform::watchOS;
980#elif TARGET_OS_TV
981 return Platform::tvOS;
982#elif TARGET_OS_IOS
983 return Platform::iOS;
984#elif TARGET_OS_MAC
985 return Platform::macOS;
986#else
987 #error unknown platform
988#endif
989}
990
991
992bool MachOParser::valid(Diagnostics& diag)
993{
994#if DYLD_IN_PROCESS
995 // only images loaded by dyld to be parsed
996 const mach_header* inImage = dyld3::dyld_image_header_containing_address(header());
997 if ( inImage != header() ) {
998 diag.error("only dyld loaded images can be parsed by MachOParser");
999 return false;
1000 }
1001#else
1002
1003#endif
1004 return true;
1005}
1006
1007
1008void MachOParser::forEachLoadCommand(Diagnostics& diag, void (^callback)(const load_command* cmd, bool& stop)) const
1009{
1010 bool stop = false;
1011 const load_command* startCmds = nullptr;
1012 if ( header()->magic == MH_MAGIC_64 )
1013 startCmds = (load_command*)((char *)header() + sizeof(mach_header_64));
1014 else if ( header()->magic == MH_MAGIC )
1015 startCmds = (load_command*)((char *)header() + sizeof(mach_header));
1016 else {
1017 diag.error("file does not start with MH_MAGIC[_64]");
1018 return; // not a mach-o file, or wrong endianness
1019 }
1020 const load_command* const cmdsEnd = (load_command*)((char*)startCmds + header()->sizeofcmds);
1021 const load_command* cmd = startCmds;
1022 for(uint32_t i = 0; i < header()->ncmds; ++i) {
1023 const load_command* nextCmd = (load_command*)((char *)cmd + cmd->cmdsize);
1024 if ( cmd->cmdsize < 8 ) {
1025 diag.error("malformed load command #%d, size too small %d", i, cmd->cmdsize);
1026 return;
1027 }
1028 if ( (nextCmd > cmdsEnd) || (nextCmd < startCmds) ) {
1029 diag.error("malformed load command #%d, size too large 0x%X", i, cmd->cmdsize);
1030 return;
1031 }
1032 callback(cmd, stop);
1033 if ( stop )
1034 return;
1035 cmd = nextCmd;
1036 }
1037}
1038
1039UUID MachOParser::uuid() const
1040{
1041 uuid_t uuid;
1042 getUuid(uuid);
1043 return uuid;
1044}
1045
1046bool MachOParser::getUuid(uuid_t uuid) const
1047{
1048 Diagnostics diag;
1049 __block bool found = false;
1050 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1051 if ( cmd->cmd == LC_UUID ) {
1052 const uuid_command* uc = (const uuid_command*)cmd;
1053 memcpy(uuid, uc->uuid, sizeof(uuid_t));
1054 found = true;
1055 stop = true;
1056 }
1057 });
1058 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1059 if ( !found )
1060 bzero(uuid, sizeof(uuid_t));
1061 return found;
1062}
1063
1064uint64_t MachOParser::preferredLoadAddress() const
1065{
1066 __block uint64_t result = 0;
1067 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
1068 if ( strcmp(segName, "__TEXT") == 0 ) {
1069 result = vmAddr;
1070 stop = true;
1071 }
1072 });
1073 return result;
1074}
1075
1076bool MachOParser::getPlatformAndVersion(Platform* platform, uint32_t* minOS, uint32_t* sdk) const
1077{
1078 Diagnostics diag;
1079 __block bool found = false;
1080 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1081 const version_min_command* versCmd;
1082 switch ( cmd->cmd ) {
1083 case LC_VERSION_MIN_IPHONEOS:
1084 versCmd = (version_min_command*)cmd;
1085 *platform = Platform::iOS;
1086 *minOS = versCmd->version;
1087 *sdk = versCmd->sdk;
1088 found = true;
1089 stop = true;
1090 break;
1091 case LC_VERSION_MIN_MACOSX:
1092 versCmd = (version_min_command*)cmd;
1093 *platform = Platform::macOS;
1094 *minOS = versCmd->version;
1095 *sdk = versCmd->sdk;
1096 found = true;
1097 stop = true;
1098 break;
1099 case LC_VERSION_MIN_TVOS:
1100 versCmd = (version_min_command*)cmd;
1101 *platform = Platform::tvOS;
1102 *minOS = versCmd->version;
1103 *sdk = versCmd->sdk;
1104 found = true;
1105 stop = true;
1106 break;
1107 case LC_VERSION_MIN_WATCHOS:
1108 versCmd = (version_min_command*)cmd;
1109 *platform = Platform::watchOS;
1110 *minOS = versCmd->version;
1111 *sdk = versCmd->sdk;
1112 found = true;
1113 stop = true;
1114 break;
1115 case LC_BUILD_VERSION: {
1116 const build_version_command* buildCmd = (build_version_command *)cmd;
1117 *minOS = buildCmd->minos;
1118 *sdk = buildCmd->sdk;
1119
1120 switch(buildCmd->platform) {
1121 /* Known values for the platform field above. */
1122 case PLATFORM_MACOS:
1123 *platform = Platform::macOS;
1124 break;
1125 case PLATFORM_IOS:
1126 *platform = Platform::iOS;
1127 break;
1128 case PLATFORM_TVOS:
1129 *platform = Platform::tvOS;
1130 break;
1131 case PLATFORM_WATCHOS:
1132 *platform = Platform::watchOS;
1133 break;
1134 case PLATFORM_BRIDGEOS:
1135 *platform = Platform::bridgeOS;
1136 break;
1137 }
1138 found = true;
1139 stop = true;
1140 } break;
1141 }
1142 });
1143 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1144 return found;
1145}
1146
1147
1148bool MachOParser::isSimulatorBinary() const
1149{
1150 Platform platform;
1151 uint32_t minOS;
1152 uint32_t sdk;
1153 switch ( header()->cputype ) {
1154 case CPU_TYPE_I386:
1155 case CPU_TYPE_X86_64:
1156 if ( getPlatformAndVersion(&platform, &minOS, &sdk) ) {
1157 return (platform != Platform::macOS);
1158 }
1159 break;
1160 }
1161 return false;
1162}
1163
1164
1165bool MachOParser::getDylibInstallName(const char** installName, uint32_t* compatVersion, uint32_t* currentVersion) const
1166{
1167 Diagnostics diag;
1168 __block bool found = false;
1169 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1170 if ( cmd->cmd == LC_ID_DYLIB ) {
1171 const dylib_command* dylibCmd = (dylib_command*)cmd;
1172 *compatVersion = dylibCmd->dylib.compatibility_version;
1173 *currentVersion = dylibCmd->dylib.current_version;
1174 *installName = (char*)dylibCmd + dylibCmd->dylib.name.offset;
1175 found = true;
1176 stop = true;
1177 }
1178 });
1179 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1180 return found;
1181}
1182
1183const char* MachOParser::installName() const
1184{
1185 assert(header()->filetype == MH_DYLIB);
1186 const char* result;
1187 uint32_t ignoreVersion;
1188 assert(getDylibInstallName(&result, &ignoreVersion, &ignoreVersion));
1189 return result;
1190}
1191
1192
1193uint32_t MachOParser::dependentDylibCount() const
1194{
1195 __block uint32_t count = 0;
1196 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
1197 ++count;
1198 });
1199 return count;
1200}
1201
1202const char* MachOParser::dependentDylibLoadPath(uint32_t depIndex) const
1203{
1204 __block const char* foundLoadPath = nullptr;
1205 __block uint32_t curDepIndex = 0;
1206 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
1207 if ( curDepIndex == depIndex ) {
1208 foundLoadPath = loadPath;
1209 stop = true;
1210 }
1211 ++curDepIndex;
1212 });
1213 return foundLoadPath;
1214}
1215
1216
1217void MachOParser::forEachDependentDylib(void (^callback)(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop)) const
1218{
1219 Diagnostics diag;
1220 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1221 switch ( cmd->cmd ) {
1222 case LC_LOAD_DYLIB:
1223 case LC_LOAD_WEAK_DYLIB:
1224 case LC_REEXPORT_DYLIB:
1225 case LC_LOAD_UPWARD_DYLIB: {
1226 const dylib_command* dylibCmd = (dylib_command*)cmd;
1227 assert(dylibCmd->dylib.name.offset < cmd->cmdsize);
1228 const char* loadPath = (char*)dylibCmd + dylibCmd->dylib.name.offset;
1229 callback(loadPath, (cmd->cmd == LC_LOAD_WEAK_DYLIB), (cmd->cmd == LC_REEXPORT_DYLIB), (cmd->cmd == LC_LOAD_UPWARD_DYLIB),
1230 dylibCmd->dylib.compatibility_version, dylibCmd->dylib.current_version, stop);
1231 }
1232 break;
1233 }
1234 });
1235 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1236}
1237
1238void MachOParser::forEachRPath(void (^callback)(const char* rPath, bool& stop)) const
1239{
1240 Diagnostics diag;
1241 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1242 if ( cmd->cmd == LC_RPATH ) {
1243 const char* rpath = (char*)cmd + ((struct rpath_command*)cmd)->path.offset;
1244 callback(rpath, stop);
1245 }
1246 });
1247 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1248}
1249
1250/*
1251 struct LayoutInfo {
1252#if DYLD_IN_PROCESS
1253 uintptr_t slide;
1254 uintptr_t textUnslidVMAddr;
1255 uintptr_t linkeditUnslidVMAddr;
1256 uint32_t linkeditFileOffset;
1257#else
1258 uint32_t segmentCount;
1259 uint32_t linkeditSegIndex;
1260 struct {
1261 uint64_t mappingOffset;
1262 uint64_t fileOffset;
1263 uint64_t segUnslidAddress;
1264 uint64_t segSize;
1265 } segments[16];
1266#endif
1267 };
1268*/
1269
1270#if !DYLD_IN_PROCESS
1271const uint8_t* MachOParser::getContentForVMAddr(const LayoutInfo& info, uint64_t addr) const
1272{
1273 for (uint32_t i=0; i < info.segmentCount; ++i) {
1274 if ( (addr >= info.segments[i].segUnslidAddress) && (addr < (info.segments[i].segUnslidAddress+info.segments[i].segSize)) )
1275 return (uint8_t*)header() + info.segments[i].mappingOffset + (addr - info.segments[i].segUnslidAddress);
1276 }
1277 // value is outside this image. could be pointer into another image
1278 if ( inDyldCache() ) {
1279 return (uint8_t*)header() + info.segments[0].mappingOffset + (addr - info.segments[0].segUnslidAddress);
1280 }
1281 assert(0 && "address not found in segment");
1282 return nullptr;
1283}
1284#endif
1285
1286const uint8_t* MachOParser::getLinkEditContent(const LayoutInfo& info, uint32_t fileOffset) const
1287{
1288#if DYLD_IN_PROCESS
1289 uint32_t offsetInLinkedit = fileOffset - info.linkeditFileOffset;
1290 uintptr_t linkeditStartAddr = info.linkeditUnslidVMAddr + info.slide;
1291 return (uint8_t*)(linkeditStartAddr + offsetInLinkedit);
1292#else
1293 uint32_t offsetInLinkedit = fileOffset - (uint32_t)(info.segments[info.linkeditSegIndex].fileOffset);
1294 const uint8_t* linkeditStart = (uint8_t*)header() + info.segments[info.linkeditSegIndex].mappingOffset;
1295 return linkeditStart + offsetInLinkedit;
1296#endif
1297}
1298
1299
1300void MachOParser::getLayoutInfo(LayoutInfo& result) const
1301{
1302#if DYLD_IN_PROCESS
1303 // image loaded by dyld, just record the addr and file offset of TEXT and LINKEDIT segments
1304 result.slide = getSlide();
1305 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
1306 if ( strcmp(segName, "__TEXT") == 0 ) {
1307 result.textUnslidVMAddr = (uintptr_t)vmAddr;
1308 }
1309 else if ( strcmp(segName, "__LINKEDIT") == 0 ) {
1310 result.linkeditUnslidVMAddr = (uintptr_t)vmAddr;
1311 result.linkeditFileOffset = fileOffset;
1312 }
1313 });
1314#else
1315 bool inCache = inDyldCache();
1316 bool intel32 = (header()->cputype == CPU_TYPE_I386);
1317 result.segmentCount = 0;
1318 result.linkeditSegIndex = 0xFFFFFFFF;
1319 __block uint64_t textSegAddr = 0;
1320 __block uint64_t textSegFileOffset = 0;
1321 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
1322 auto& segInfo = result.segments[result.segmentCount];
1323 if ( strcmp(segName, "__TEXT") == 0 ) {
1324 textSegAddr = vmAddr;
1325 textSegFileOffset = fileOffset;
1326 }
1327 __block bool textRelocsAllowed = false;
1328 if ( intel32 ) {
1329 forEachSection(^(const char* curSegName, uint32_t segIndex, uint64_t segVMAddr, const char* sectionName, uint32_t sectFlags,
1330 uint64_t sectAddr, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& sectStop) {
1331 if ( strcmp(curSegName, segName) == 0 ) {
1332 if ( sectFlags & (S_ATTR_EXT_RELOC|S_ATTR_LOC_RELOC) ) {
1333 textRelocsAllowed = true;
1334 sectStop = true;
1335 }
1336 }
1337 });
1338 }
1339 if ( inCache ) {
1340 if ( inRawCache() ) {
1341 // whole cache file mapped somewhere (padding not expanded)
1342 // vmaddrs are useless. only file offset make sense
1343 segInfo.mappingOffset = fileOffset - textSegFileOffset;
1344 }
1345 else {
1346 // cache file was loaded by dyld into shared region
1347 // vmaddrs of segments are correct except for ASLR slide
1348 segInfo.mappingOffset = vmAddr - textSegAddr;
1349 }
1350 }
1351 else {
1352 // individual mach-o file mapped in one region, so mappingOffset == fileOffset
1353 segInfo.mappingOffset = fileOffset;
1354 }
1355 segInfo.fileOffset = fileOffset;
1356 segInfo.fileSize = fileSize;
1357 segInfo.segUnslidAddress = vmAddr;
1358 segInfo.segSize = vmSize;
1359 segInfo.writable = ((protections & VM_PROT_WRITE) == VM_PROT_WRITE);
1360 segInfo.executable = ((protections & VM_PROT_EXECUTE) == VM_PROT_EXECUTE);
1361 segInfo.textRelocsAllowed = textRelocsAllowed;
1362 if ( strcmp(segName, "__LINKEDIT") == 0 ) {
1363 result.linkeditSegIndex = result.segmentCount;
1364 }
1365 ++result.segmentCount;
1366 if ( result.segmentCount > 127 )
1367 stop = true;
1368 });
1369#endif
1370}
1371
1372
1373void MachOParser::forEachSection(void (^callback)(const char* segName, const char* sectionName, uint32_t flags,
1374 const void* content, size_t size, bool illegalSectionSize, bool& stop)) const
1375{
1376 forEachSection(^(const char* segName, const char* sectionName, uint32_t flags, uint64_t addr,
1377 const void* content, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& stop) {
1378 callback(segName, sectionName, flags, content, (size_t)size, illegalSectionSize, stop);
1379 });
1380}
1381
1382void MachOParser::forEachSection(void (^callback)(const char* segName, const char* sectionName, uint32_t flags, uint64_t addr,
1383 const void* content, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2,
1384 bool illegalSectionSize, bool& stop)) const
1385{
1386 Diagnostics diag;
1387 //fprintf(stderr, "forEachSection() mh=%p\n", header());
1388 LayoutInfo layout;
1389 getLayoutInfo(layout);
1390 forEachSection(^(const char* segName, uint32_t segIndex, uint64_t segVMAddr, const char* sectionName, uint32_t sectFlags,
1391 uint64_t sectAddr, uint64_t sectSize, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& stop) {
1392 #if DYLD_IN_PROCESS
1393 const uint8_t* segContentStart = (uint8_t*)(segVMAddr + layout.slide);
1394 #else
1395 const uint8_t* segContentStart = (uint8_t*)header() + layout.segments[segIndex].mappingOffset;
1396 #endif
1397 const void* contentAddr = segContentStart + (sectAddr - segVMAddr);
1398 callback(segName, sectionName, sectFlags, sectAddr, contentAddr, sectSize, alignP2, reserved1, reserved2, illegalSectionSize, stop);
1399 });
1400
1401}
1402
1403// this iterator just walks the segment/section array. It does interpret addresses
1404void MachOParser::forEachSection(void (^callback)(const char* segName, uint32_t segIndex, uint64_t segVMAddr, const char* sectionName, uint32_t sectFlags,
1405 uint64_t sectAddr, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& stop)) const
1406{
1407 Diagnostics diag;
1408 //fprintf(stderr, "forEachSection() mh=%p\n", header());
1409 __block uint32_t segIndex = 0;
1410 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1411 if ( cmd->cmd == LC_SEGMENT_64 ) {
1412 const segment_command_64* seg = (segment_command_64*)cmd;
1413 const section_64* const sectionsStart = (section_64*)((char*)seg + sizeof(struct segment_command_64));
1414 const section_64* const sectionsEnd = &sectionsStart[seg->nsects];
1415 for (const section_64* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) {
1416 const char* sectName = sect->sectname;
1417 char sectNameCopy[20];
1418 if ( sectName[15] != '\0' ) {
1419 strlcpy(sectNameCopy, sectName, 17);
1420 sectName = sectNameCopy;
1421 }
1422 bool illegalSectionSize = (sect->addr < seg->vmaddr) || greaterThanAddOrOverflow(sect->addr, sect->size, seg->vmaddr + seg->filesize);
1423 callback(seg->segname, segIndex, seg->vmaddr, sectName, sect->flags, sect->addr, sect->size, sect->align, sect->reserved1, sect->reserved2, illegalSectionSize, stop);
1424 }
1425 ++segIndex;
1426 }
1427 else if ( cmd->cmd == LC_SEGMENT ) {
1428 const segment_command* seg = (segment_command*)cmd;
1429 const section* const sectionsStart = (section*)((char*)seg + sizeof(struct segment_command));
1430 const section* const sectionsEnd = &sectionsStart[seg->nsects];
1431 for (const section* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) {
1432 const char* sectName = sect->sectname;
1433 char sectNameCopy[20];
1434 if ( sectName[15] != '\0' ) {
1435 strlcpy(sectNameCopy, sectName, 17);
1436 sectName = sectNameCopy;
1437 }
1438 bool illegalSectionSize = (sect->addr < seg->vmaddr) || greaterThanAddOrOverflow(sect->addr, sect->size, seg->vmaddr + seg->filesize);
1439 callback(seg->segname, segIndex, seg->vmaddr, sectName, sect->flags, sect->addr, sect->size, sect->align, sect->reserved1, sect->reserved2, illegalSectionSize, stop);
1440 }
1441 ++segIndex;
1442 }
1443 });
1444 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1445}
1446
1447void MachOParser::forEachGlobalSymbol(Diagnostics& diag, void (^callback)(const char* symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop)) const
1448{
1449 LinkEditInfo leInfo;
1450 getLinkEditPointers(diag, leInfo);
1451 if ( diag.hasError() )
1452 return;
1453
1454 const bool is64Bit = is64();
1455 if ( leInfo.symTab != nullptr ) {
1456 uint32_t globalsStartIndex = 0;
1457 uint32_t globalsCount = leInfo.symTab->nsyms;
1458 if ( leInfo.dynSymTab != nullptr ) {
1459 globalsStartIndex = leInfo.dynSymTab->iextdefsym;
1460 globalsCount = leInfo.dynSymTab->nextdefsym;
1461 }
1462 uint32_t maxStringOffset = leInfo.symTab->strsize;
1463 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1464 const struct nlist* symbols = (struct nlist*) (getLinkEditContent(leInfo.layout, leInfo.symTab->symoff));
1465 const struct nlist_64* symbols64 = (struct nlist_64*)(getLinkEditContent(leInfo.layout, leInfo.symTab->symoff));
1466 bool stop = false;
1467 for (uint32_t i=0; (i < globalsCount) && !stop; ++i) {
1468 if ( is64Bit ) {
1469 const struct nlist_64& sym = symbols64[globalsStartIndex+i];
1470 if ( sym.n_un.n_strx > maxStringOffset )
1471 continue;
1472 if ( (sym.n_type & N_EXT) && ((sym.n_type & N_TYPE) == N_SECT) && ((sym.n_type & N_STAB) == 0) )
1473 callback(&stringPool[sym.n_un.n_strx], sym.n_value, sym.n_type, sym.n_sect, sym.n_desc, stop);
1474 }
1475 else {
1476 const struct nlist& sym = symbols[globalsStartIndex+i];
1477 if ( sym.n_un.n_strx > maxStringOffset )
1478 continue;
1479 if ( (sym.n_type & N_EXT) && ((sym.n_type & N_TYPE) == N_SECT) && ((sym.n_type & N_STAB) == 0) )
1480 callback(&stringPool[sym.n_un.n_strx], sym.n_value, sym.n_type, sym.n_sect, sym.n_desc, stop);
1481 }
1482 }
1483 }
1484}
1485
1486void MachOParser::forEachLocalSymbol(Diagnostics& diag, void (^callback)(const char* symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop)) const
1487{
1488 LinkEditInfo leInfo;
1489 getLinkEditPointers(diag, leInfo);
1490 if ( diag.hasError() )
1491 return;
1492
1493 const bool is64Bit = is64();
1494 if ( leInfo.symTab != nullptr ) {
1495 uint32_t localsStartIndex = 0;
1496 uint32_t localsCount = leInfo.symTab->nsyms;
1497 if ( leInfo.dynSymTab != nullptr ) {
1498 localsStartIndex = leInfo.dynSymTab->ilocalsym;
1499 localsCount = leInfo.dynSymTab->nlocalsym;
1500 }
1501 uint32_t maxStringOffset = leInfo.symTab->strsize;
1502 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1503 const struct nlist* symbols = (struct nlist*) (getLinkEditContent(leInfo.layout, leInfo.symTab->symoff));
1504 const struct nlist_64* symbols64 = (struct nlist_64*)(getLinkEditContent(leInfo.layout, leInfo.symTab->symoff));
1505 bool stop = false;
1506 for (uint32_t i=0; (i < localsCount) && !stop; ++i) {
1507 if ( is64Bit ) {
1508 const struct nlist_64& sym = symbols64[localsStartIndex+i];
1509 if ( sym.n_un.n_strx > maxStringOffset )
1510 continue;
1511 if ( ((sym.n_type & N_EXT) == 0) && ((sym.n_type & N_TYPE) == N_SECT) && ((sym.n_type & N_STAB) == 0) )
1512 callback(&stringPool[sym.n_un.n_strx], sym.n_value, sym.n_type, sym.n_sect, sym.n_desc, stop);
1513 }
1514 else {
1515 const struct nlist& sym = symbols[localsStartIndex+i];
1516 if ( sym.n_un.n_strx > maxStringOffset )
1517 continue;
1518 if ( ((sym.n_type & N_EXT) == 0) && ((sym.n_type & N_TYPE) == N_SECT) && ((sym.n_type & N_STAB) == 0) )
1519 callback(&stringPool[sym.n_un.n_strx], sym.n_value, sym.n_type, sym.n_sect, sym.n_desc, stop);
1520 }
1521 }
1522 }
1523}
1524
1525
1526bool MachOParser::findExportedSymbol(Diagnostics& diag, const char* symbolName, void* extra, FoundSymbol& foundInfo, DependentFinder findDependent) const
1527{
1528 LinkEditInfo leInfo;
1529 getLinkEditPointers(diag, leInfo);
1530 if ( diag.hasError() )
1531 return false;
1532 if ( leInfo.dyldInfo != nullptr ) {
1533 const uint8_t* trieStart = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->export_off);
1534 const uint8_t* trieEnd = trieStart + leInfo.dyldInfo->export_size;
1535 const uint8_t* node = trieWalk(diag, trieStart, trieEnd, symbolName);
1536 if ( node == nullptr ) {
1537 // symbol not exported from this image. Seach any re-exported dylibs
1538 __block unsigned depIndex = 0;
1539 __block bool foundInReExportedDylib = false;
1540 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
1541 if ( isReExport && findDependent ) {
1542 const mach_header* depMH;
1543 void* depExtra;
1544 if ( findDependent(depIndex, loadPath, extra, &depMH, &depExtra) ) {
1545 bool depInRawCache = inRawCache() && (depMH->flags & 0x80000000);
1546 MachOParser dep(depMH, depInRawCache);
1547 if ( dep.findExportedSymbol(diag, symbolName, depExtra, foundInfo, findDependent) ) {
1548 stop = true;
1549 foundInReExportedDylib = true;
1550 }
1551 }
1552 else {
1553 fprintf(stderr, "could not find re-exported dylib %s\n", loadPath);
1554 }
1555 }
1556 ++depIndex;
1557 });
1558 return foundInReExportedDylib;
1559 }
1560 const uint8_t* p = node;
1561 const uint64_t flags = read_uleb128(diag, p, trieEnd);
1562 if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
1563 if ( !findDependent )
1564 return false;
1565 // re-export from another dylib, lookup there
1566 const uint64_t ordinal = read_uleb128(diag, p, trieEnd);
1567 const char* importedName = (char*)p;
1568 if ( importedName[0] == '\0' )
1569 importedName = symbolName;
1570 assert(ordinal >= 1);
1571 if (ordinal > dependentDylibCount()) {
1572 diag.error("ordinal %lld out of range for %s", ordinal, symbolName);
1573 return false;
1574 }
1575 uint32_t depIndex = (uint32_t)(ordinal-1);
1576 const mach_header* depMH;
1577 void* depExtra;
1578 if ( findDependent(depIndex, dependentDylibLoadPath(depIndex), extra, &depMH, &depExtra) ) {
1579 bool depInRawCache = inRawCache() && (depMH->flags & 0x80000000);
1580 MachOParser depParser(depMH, depInRawCache);
1581 return depParser.findExportedSymbol(diag, importedName, depExtra, foundInfo, findDependent);
1582 }
1583 else {
1584 diag.error("dependent dylib %lld not found for re-exported symbol %s", ordinal, symbolName);
1585 return false;
1586 }
1587 }
1588 foundInfo.kind = FoundSymbol::Kind::headerOffset;
1589 foundInfo.isThreadLocal = false;
1590 foundInfo.foundInDylib = header();
1591 foundInfo.foundExtra = extra;
1592 foundInfo.value = read_uleb128(diag, p, trieEnd);
1593 foundInfo.resolverFuncOffset = 0;
1594 foundInfo.foundSymbolName = symbolName;
1595 if ( diag.hasError() )
1596 return false;
1597 switch ( flags & EXPORT_SYMBOL_FLAGS_KIND_MASK ) {
1598 case EXPORT_SYMBOL_FLAGS_KIND_REGULAR:
1599 if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
1600 foundInfo.kind = FoundSymbol::Kind::headerOffset;
1601 foundInfo.resolverFuncOffset = (uint32_t)read_uleb128(diag, p, trieEnd);
1602 }
1603 else {
1604 foundInfo.kind = FoundSymbol::Kind::headerOffset;
1605 }
1606 break;
1607 case EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL:
1608 foundInfo.isThreadLocal = true;
1609 break;
1610 case EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE:
1611 foundInfo.kind = FoundSymbol::Kind::absolute;
1612 break;
1613 default:
1614 diag.error("unsupported exported symbol kind. flags=%llu at node offset=0x%0lX", flags, (long)(node-trieStart));
1615 return false;
1616 }
1617 return true;
1618 }
1619 else {
1620 // this is an old binary (before macOS 10.6), scan the symbol table
1621 foundInfo.foundInDylib = nullptr;
1622 uint64_t baseAddress = preferredLoadAddress();
1623 forEachGlobalSymbol(diag, ^(const char* aSymbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop) {
1624 if ( strcmp(aSymbolName, symbolName) == 0 ) {
1625 foundInfo.kind = FoundSymbol::Kind::headerOffset;
1626 foundInfo.isThreadLocal = false;
1627 foundInfo.foundInDylib = header();
1628 foundInfo.foundExtra = extra;
1629 foundInfo.value = n_value - baseAddress;
1630 foundInfo.resolverFuncOffset = 0;
1631 foundInfo.foundSymbolName = symbolName;
1632 stop = true;
1633 }
1634 });
1635 return (foundInfo.foundInDylib != nullptr);
1636 }
1637}
1638
1639
1640void MachOParser::getLinkEditLoadCommands(Diagnostics& diag, LinkEditInfo& result) const
1641{
1642 result.dyldInfo = nullptr;
1643 result.symTab = nullptr;
1644 result.dynSymTab = nullptr;
1645 result.splitSegInfo = nullptr;
1646 result.functionStarts = nullptr;
1647 result.dataInCode = nullptr;
1648 result.codeSig = nullptr;
1649 __block bool hasUUID = false;
1650 __block bool hasVersion = false;
1651 __block bool hasEncrypt = false;
1652 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1653 switch ( cmd->cmd ) {
1654 case LC_DYLD_INFO:
1655 case LC_DYLD_INFO_ONLY:
1656 if ( cmd->cmdsize != sizeof(dyld_info_command) )
1657 diag.error("LC_DYLD_INFO load command size wrong");
1658 else if ( result.dyldInfo != nullptr )
1659 diag.error("multiple LC_DYLD_INFO load commands");
1660 result.dyldInfo = (dyld_info_command*)cmd;
1661 break;
1662 case LC_SYMTAB:
1663 if ( cmd->cmdsize != sizeof(symtab_command) )
1664 diag.error("LC_SYMTAB load command size wrong");
1665 else if ( result.symTab != nullptr )
1666 diag.error("multiple LC_SYMTAB load commands");
1667 result.symTab = (symtab_command*)cmd;
1668 break;
1669 case LC_DYSYMTAB:
1670 if ( cmd->cmdsize != sizeof(dysymtab_command) )
1671 diag.error("LC_DYSYMTAB load command size wrong");
1672 else if ( result.dynSymTab != nullptr )
1673 diag.error("multiple LC_DYSYMTAB load commands");
1674 result.dynSymTab = (dysymtab_command*)cmd;
1675 break;
1676 case LC_SEGMENT_SPLIT_INFO:
1677 if ( cmd->cmdsize != sizeof(linkedit_data_command) )
1678 diag.error("LC_SEGMENT_SPLIT_INFO load command size wrong");
1679 else if ( result.splitSegInfo != nullptr )
1680 diag.error("multiple LC_SEGMENT_SPLIT_INFO load commands");
1681 result.splitSegInfo = (linkedit_data_command*)cmd;
1682 break;
1683 case LC_FUNCTION_STARTS:
1684 if ( cmd->cmdsize != sizeof(linkedit_data_command) )
1685 diag.error("LC_FUNCTION_STARTS load command size wrong");
1686 else if ( result.functionStarts != nullptr )
1687 diag.error("multiple LC_FUNCTION_STARTS load commands");
1688 result.functionStarts = (linkedit_data_command*)cmd;
1689 break;
1690 case LC_DATA_IN_CODE:
1691 if ( cmd->cmdsize != sizeof(linkedit_data_command) )
1692 diag.error("LC_DATA_IN_CODE load command size wrong");
1693 else if ( result.dataInCode != nullptr )
1694 diag.error("multiple LC_DATA_IN_CODE load commands");
1695 result.dataInCode = (linkedit_data_command*)cmd;
1696 break;
1697 case LC_CODE_SIGNATURE:
1698 if ( cmd->cmdsize != sizeof(linkedit_data_command) )
1699 diag.error("LC_CODE_SIGNATURE load command size wrong");
1700 else if ( result.codeSig != nullptr )
1701 diag.error("multiple LC_CODE_SIGNATURE load commands");
1702 result.codeSig = (linkedit_data_command*)cmd;
1703 break;
1704 case LC_UUID:
1705 if ( cmd->cmdsize != sizeof(uuid_command) )
1706 diag.error("LC_UUID load command size wrong");
1707 else if ( hasUUID )
1708 diag.error("multiple LC_UUID load commands");
1709 hasUUID = true;
1710 break;
1711 case LC_VERSION_MIN_IPHONEOS:
1712 case LC_VERSION_MIN_MACOSX:
1713 case LC_VERSION_MIN_TVOS:
1714 case LC_VERSION_MIN_WATCHOS:
1715 if ( cmd->cmdsize != sizeof(version_min_command) )
1716 diag.error("LC_VERSION_* load command size wrong");
1717 else if ( hasVersion )
1718 diag.error("multiple LC_VERSION_MIN_* load commands");
1719 hasVersion = true;
1720 break;
1721 case LC_BUILD_VERSION:
1722 if ( cmd->cmdsize != (sizeof(build_version_command) + ((build_version_command*)cmd)->ntools * sizeof(build_tool_version)) )
1723 diag.error("LC_BUILD_VERSION load command size wrong");
1724 else if ( hasVersion )
1725 diag.error("multiple LC_BUILD_VERSION load commands");
1726 hasVersion = true;
1727 break;
1728 case LC_ENCRYPTION_INFO:
1729 if ( cmd->cmdsize != sizeof(encryption_info_command) )
1730 diag.error("LC_ENCRYPTION_INFO load command size wrong");
1731 else if ( hasEncrypt )
1732 diag.error("multiple LC_ENCRYPTION_INFO load commands");
1733 else if ( is64() )
1734 diag.error("LC_ENCRYPTION_INFO found in 64-bit mach-o");
1735 hasEncrypt = true;
1736 break;
1737 case LC_ENCRYPTION_INFO_64:
1738 if ( cmd->cmdsize != sizeof(encryption_info_command_64) )
1739 diag.error("LC_ENCRYPTION_INFO_64 load command size wrong");
1740 else if ( hasEncrypt )
1741 diag.error("multiple LC_ENCRYPTION_INFO_64 load commands");
1742 else if ( !is64() )
1743 diag.error("LC_ENCRYPTION_INFO_64 found in 32-bit mach-o");
1744 hasEncrypt = true;
1745 break;
1746 }
1747 });
1748 if ( diag.noError() && (result.dynSymTab != nullptr) && (result.symTab == nullptr) )
1749 diag.error("LC_DYSYMTAB but no LC_SYMTAB load command");
1750
1751}
1752
1753void MachOParser::getLinkEditPointers(Diagnostics& diag, LinkEditInfo& result) const
1754{
1755 getLinkEditLoadCommands(diag, result);
1756 if ( diag.noError() )
1757 getLayoutInfo(result.layout);
1758}
1759
1760void MachOParser::forEachSegment(void (^callback)(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop)) const
1761{
1762 Diagnostics diag;
1763 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1764 if ( cmd->cmd == LC_SEGMENT_64 ) {
1765 const segment_command_64* seg = (segment_command_64*)cmd;
1766 callback(seg->segname, (uint32_t)seg->fileoff, (uint32_t)seg->filesize, seg->vmaddr, seg->vmsize, seg->initprot, stop);
1767 }
1768 else if ( cmd->cmd == LC_SEGMENT ) {
1769 const segment_command* seg = (segment_command*)cmd;
1770 callback(seg->segname, seg->fileoff, seg->filesize, seg->vmaddr, seg->vmsize, seg->initprot, stop);
1771 }
1772 });
1773 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1774}
1775
1776const uint8_t* MachOParser::trieWalk(Diagnostics& diag, const uint8_t* start, const uint8_t* end, const char* symbol)
1777{
1778 uint32_t visitedNodeOffsets[128];
1779 int visitedNodeOffsetCount = 0;
1780 visitedNodeOffsets[visitedNodeOffsetCount++] = 0;
1781 const uint8_t* p = start;
1782 while ( p < end ) {
1783 uint64_t terminalSize = *p++;
1784 if ( terminalSize > 127 ) {
1785 // except for re-export-with-rename, all terminal sizes fit in one byte
1786 --p;
1787 terminalSize = read_uleb128(diag, p, end);
1788 if ( diag.hasError() )
1789 return nullptr;
1790 }
1791 if ( (*symbol == '\0') && (terminalSize != 0) ) {
1792 return p;
1793 }
1794 const uint8_t* children = p + terminalSize;
1795 if ( children > end ) {
1796 diag.error("malformed trie node, terminalSize=0x%llX extends past end of trie\n", terminalSize);
1797 return nullptr;
1798 }
1799 uint8_t childrenRemaining = *children++;
1800 p = children;
1801 uint64_t nodeOffset = 0;
1802 for (; childrenRemaining > 0; --childrenRemaining) {
1803 const char* ss = symbol;
1804 bool wrongEdge = false;
1805 // scan whole edge to get to next edge
1806 // if edge is longer than target symbol name, don't read past end of symbol name
1807 char c = *p;
1808 while ( c != '\0' ) {
1809 if ( !wrongEdge ) {
1810 if ( c != *ss )
1811 wrongEdge = true;
1812 ++ss;
1813 }
1814 ++p;
1815 c = *p;
1816 }
1817 if ( wrongEdge ) {
1818 // advance to next child
1819 ++p; // skip over zero terminator
1820 // skip over uleb128 until last byte is found
1821 while ( (*p & 0x80) != 0 )
1822 ++p;
1823 ++p; // skip over last byte of uleb128
1824 if ( p > end ) {
1825 diag.error("malformed trie node, child node extends past end of trie\n");
1826 return nullptr;
1827 }
1828 }
1829 else {
1830 // the symbol so far matches this edge (child)
1831 // so advance to the child's node
1832 ++p;
1833 nodeOffset = read_uleb128(diag, p, end);
1834 if ( diag.hasError() )
1835 return nullptr;
1836 if ( (nodeOffset == 0) || ( &start[nodeOffset] > end) ) {
1837 diag.error("malformed trie child, nodeOffset=0x%llX out of range\n", nodeOffset);
1838 return nullptr;
1839 }
1840 symbol = ss;
1841 break;
1842 }
1843 }
1844 if ( nodeOffset != 0 ) {
1845 if ( nodeOffset > (end-start) ) {
1846 diag.error("malformed trie child, nodeOffset=0x%llX out of range\n", nodeOffset);
1847 return nullptr;
1848 }
1849 for (int i=0; i < visitedNodeOffsetCount; ++i) {
1850 if ( visitedNodeOffsets[i] == nodeOffset ) {
1851 diag.error("malformed trie child, cycle to nodeOffset=0x%llX\n", nodeOffset);
1852 return nullptr;
1853 }
1854 }
1855 visitedNodeOffsets[visitedNodeOffsetCount++] = (uint32_t)nodeOffset;
1856 if ( visitedNodeOffsetCount >= 128 ) {
1857 diag.error("malformed trie too deep\n");
1858 return nullptr;
1859 }
1860 p = &start[nodeOffset];
1861 }
1862 else
1863 p = end;
1864 }
1865 return nullptr;
1866}
1867
1868
1869uint64_t MachOParser::read_uleb128(Diagnostics& diag, const uint8_t*& p, const uint8_t* end)
1870{
1871 uint64_t result = 0;
1872 int bit = 0;
1873 do {
1874 if ( p == end ) {
1875 diag.error("malformed uleb128");
1876 break;
1877 }
1878 uint64_t slice = *p & 0x7f;
1879
1880 if ( bit > 63 ) {
1881 diag.error("uleb128 too big for uint64");
1882 break;
1883 }
1884 else {
1885 result |= (slice << bit);
1886 bit += 7;
1887 }
1888 }
1889 while (*p++ & 0x80);
1890 return result;
1891}
1892
1893
1894int64_t MachOParser::read_sleb128(Diagnostics& diag, const uint8_t*& p, const uint8_t* end)
1895{
1896 int64_t result = 0;
1897 int bit = 0;
1898 uint8_t byte = 0;
1899 do {
1900 if ( p == end ) {
1901 diag.error("malformed sleb128");
1902 break;
1903 }
1904 byte = *p++;
1905 result |= (((int64_t)(byte & 0x7f)) << bit);
1906 bit += 7;
1907 } while (byte & 0x80);
1908 // sign extend negative numbers
1909 if ( (byte & 0x40) != 0 )
1910 result |= (-1LL) << bit;
1911 return result;
1912}
1913
1914bool MachOParser::is64() const
1915{
1916#if DYLD_IN_PROCESS
1917 return (sizeof(void*) == 8);
1918#else
1919 return (header()->magic == MH_MAGIC_64);
1920#endif
1921}
1922
1923
1924
1925
1926bool MachOParser::findClosestSymbol(uint64_t targetUnslidAddress, const char** symbolName, uint64_t* symbolUnslidAddr) const
1927{
1928 Diagnostics diag;
1929 __block uint64_t closestNValueSoFar = 0;
1930 __block const char* closestNameSoFar = nullptr;
1931 forEachGlobalSymbol(diag, ^(const char* aSymbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop) {
1932 if ( n_value <= targetUnslidAddress ) {
1933 if ( (closestNameSoFar == nullptr) || (closestNValueSoFar < n_value) ) {
1934 closestNValueSoFar = n_value;
1935 closestNameSoFar = aSymbolName;
1936 }
1937 }
1938 });
1939 forEachLocalSymbol(diag, ^(const char* aSymbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop) {
1940 if ( n_value <= targetUnslidAddress ) {
1941 if ( (closestNameSoFar == nullptr) || (closestNValueSoFar < n_value) ) {
1942 closestNValueSoFar = n_value;
1943 closestNameSoFar = aSymbolName;
1944 }
1945 }
1946 });
1947 if ( closestNameSoFar == nullptr ) {
1948 return false;
1949 }
1950
1951 *symbolName = closestNameSoFar;
1952 *symbolUnslidAddr = closestNValueSoFar;
1953 return true;
1954}
1955
1956
1957#if DYLD_IN_PROCESS
1958
1959bool MachOParser::findClosestSymbol(const void* addr, const char** symbolName, const void** symbolAddress) const
1960{
1961 uint64_t slide = getSlide();
1962 uint64_t symbolUnslidAddr;
1963 if ( findClosestSymbol((uint64_t)addr - slide, symbolName, &symbolUnslidAddr) ) {
1964 *symbolAddress = (const void*)(long)(symbolUnslidAddr + slide);
1965 return true;
1966 }
1967 return false;
1968}
1969
1970intptr_t MachOParser::getSlide() const
1971{
1972 Diagnostics diag;
1973 __block intptr_t slide = 0;
1974 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
1975#if __LP64__
1976 if ( cmd->cmd == LC_SEGMENT_64 ) {
1977 const segment_command_64* seg = (segment_command_64*)cmd;
1978 if ( strcmp(seg->segname, "__TEXT") == 0 ) {
1979 slide = ((uint64_t)header()) - seg->vmaddr;
1980 stop = true;
1981 }
1982 }
1983#else
1984 if ( cmd->cmd == LC_SEGMENT ) {
1985 const segment_command* seg = (segment_command*)cmd;
1986 if ( strcmp(seg->segname, "__TEXT") == 0 ) {
1987 slide = ((uint32_t)header()) - seg->vmaddr;
1988 stop = true;
1989 }
1990 }
1991#endif
1992 });
1993 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
1994 return slide;
1995}
1996
1997// this is only used by dlsym() at runtime. All other binding is done when the closure is built.
1998bool MachOParser::hasExportedSymbol(const char* symbolName, DependentFinder finder, void** result) const
1999{
2000 typedef void* (*ResolverFunc)(void);
2001 ResolverFunc resolver;
2002 Diagnostics diag;
2003 FoundSymbol foundInfo;
2004 if ( findExportedSymbol(diag, symbolName, (void*)header(), foundInfo, finder) ) {
2005 switch ( foundInfo.kind ) {
2006 case FoundSymbol::Kind::headerOffset:
2007 *result = (uint8_t*)foundInfo.foundInDylib + foundInfo.value;
2008 break;
2009 case FoundSymbol::Kind::absolute:
2010 *result = (void*)(long)foundInfo.value;
2011 break;
2012 case FoundSymbol::Kind::resolverOffset:
2013 // foundInfo.value contains "stub".
2014 // in dlsym() we want to call resolver function to get final function address
2015 resolver = (ResolverFunc)((uint8_t*)foundInfo.foundInDylib + foundInfo.resolverFuncOffset);
2016 *result = (*resolver)();
2017 break;
2018 }
2019 return true;
2020 }
2021 return false;
2022}
2023
2024const char* MachOParser::segmentName(uint32_t targetSegIndex) const
2025{
2026 __block const char* result = nullptr;
2027 __block uint32_t segIndex = 0;
2028 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
2029 if ( segIndex == targetSegIndex ) {
2030 result = segName;
2031 stop = true;
2032 }
2033 ++segIndex;
2034 });
2035 return result;
2036}
2037
2038#else
2039
2040
2041bool MachOParser::uses16KPages() const
2042{
2043 return (header()->cputype == CPU_TYPE_ARM64);
2044}
2045
2046
2047bool MachOParser::isEncrypted() const
2048{
2049 __block bool result = false;
2050 Diagnostics diag;
2051 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2052 if ( cmd->cmd == LC_SEGMENT_64 ) {
2053 const segment_command_64* segCmd = (segment_command_64*)cmd;
2054 if ( segCmd->flags & SG_PROTECTED_VERSION_1 ) {
2055 result = true;
2056 stop = true;
2057 }
2058 }
2059 else if ( cmd->cmd == LC_SEGMENT ) {
2060 const segment_command* segCmd = (segment_command*)cmd;
2061 if ( segCmd->flags & SG_PROTECTED_VERSION_1 ) {
2062 result = true;
2063 stop = true;
2064 }
2065 }
2066 else if ( (cmd->cmd == LC_ENCRYPTION_INFO) || (cmd->cmd == LC_ENCRYPTION_INFO_64) ) {
2067 const encryption_info_command* encCmd = (encryption_info_command*)cmd;
2068 if ( encCmd->cryptid != 0 ) {
2069 result = true;
2070 stop = true;
2071 }
2072 }
2073 });
2074 return result;
2075}
2076
2077bool MachOParser::hasWeakDefs() const
2078{
2079 return (header()->flags & (MH_WEAK_DEFINES|MH_BINDS_TO_WEAK));
2080}
2081
2082bool MachOParser::hasObjC() const
2083{
2084 __block bool result = false;
2085 forEachSection(^(const char* segmentName, const char* sectionName, uint32_t flags, const void* content, size_t size, bool illegalSectionSize, bool& stop) {
2086 if ( (strncmp(sectionName, "__objc_imageinfo", 16) == 0) && (strncmp(segmentName, "__DATA", 6) == 0) ) {
2087 result = true;
2088 stop = true;
2089 }
2090 if ( (header()->cputype == CPU_TYPE_I386) && (strcmp(sectionName, "__image_info") == 0) && (strcmp(segmentName, "__OBJC") == 0) ) {
2091 result = true;
2092 stop = true;
2093 }
2094 });
2095 return result;
2096}
2097
2098bool MachOParser::hasPlusLoadMethod(Diagnostics& diag) const
2099{
2100#if 1
2101 __block bool result = false;
2102 forEachSection(^(const char* segmentName, const char* sectionName, uint32_t flags, uint64_t addr, const void* content, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& stop) {
2103 if ( ( (flags & SECTION_TYPE) == S_CSTRING_LITERALS ) ) {
2104 if (illegalSectionSize) {
2105 diag.error("cstring section %s/%s extends beyond the end of the segment", segmentName, sectionName);
2106 return;
2107 }
2108 const char* s = (char*)content;
2109 const char* end = s + size;
2110 while ( s < end ) {
2111 if ( strcmp(s, "load") == 0 ) {
2112 result = true;
2113 stop = true;
2114 return;
2115 }
2116 while (*s != '\0' )
2117 ++s;
2118 ++s;
2119 }
2120 }
2121 });
2122 return result;
2123#else
2124 LayoutInfo layout;
2125 getLayoutInfo(layout);
2126
2127 __block bool hasSwift = false;
2128 __block const void* classList = nullptr;
2129 __block size_t classListSize = 0;
2130 __block const void* objcData = nullptr;
2131 __block size_t objcDataSize = 0;
2132 __block const void* objcConstData = nullptr;
2133 __block size_t objcConstDataSize = 0;
2134 forEachSection(^(const char* segmentName, const char* sectionName, uint32_t flags, uint64_t addr, const void* content, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool& stop) {
2135 if ( (strcmp(sectionName, "__objc_classlist") == 0) && (strncmp(segmentName, "__DATA", 6) == 0) ) {
2136 classList = content;
2137 classListSize = size;
2138 }
2139 if ( (strcmp(sectionName, "__objc_imageinfo") == 0) && (strncmp(segmentName, "__DATA", 6) == 0) ) {
2140 const uint32_t* info = (uint32_t*)content;
2141 uint8_t swiftVersion = (info[1] >> 8) & 0xFF;
2142 if ( swiftVersion != 0 )
2143 hasSwift = true;
2144 }
2145 });
2146 if ( classList == nullptr )
2147 return false;
2148 // FIXME: might be objc and swift intermixed
2149 if ( hasSwift )
2150 return true;
2151 const bool p64 = is64();
2152 const uint32_t pointerSize = (p64 ? 8 : 4);
2153 const uint64_t* classArray64 = (uint64_t*)classList;
2154 const uint32_t* classArray32 = (uint32_t*)classList;
2155 const uint32_t classListCount = (uint32_t)(classListSize/pointerSize);
2156 for (uint32_t i=0; i < classListCount; ++i) {
2157 if ( p64 ) {
2158 uint64_t classObjAddr = classArray64[i];
2159 const uint64_t* classObjContent = (uint64_t*)getContentForVMAddr(layout, classObjAddr);
2160 uint64_t classROAddr = classObjContent[4];
2161 uint64_t metaClassObjAddr = classObjContent[0];
2162 const uint64_t* metaClassObjContent = (uint64_t*)getContentForVMAddr(layout, metaClassObjAddr);
2163 uint64_t metaClassROObjAddr = metaClassObjContent[4];
2164 const uint64_t* metaClassROObjContent = (uint64_t*)getContentForVMAddr(layout, metaClassROObjAddr);
2165 uint64_t metaClassMethodListAddr = metaClassROObjContent[4];
2166 if ( metaClassMethodListAddr != 0 ) {
2167 const uint64_t* metaClassMethodListContent = (uint64_t*)getContentForVMAddr(layout, metaClassMethodListAddr);
2168 const uint32_t methodListCount = ((uint32_t*)metaClassMethodListContent)[1];
2169 for (uint32_t m=0; m < methodListCount; ++m) {
2170 uint64_t methodNameAddr = metaClassMethodListContent[m*3+1];
2171 const char* methodNameContent = (char*)getContentForVMAddr(layout, methodNameAddr);
2172 if ( strcmp(methodNameContent, "load") == 0 ) {
2173 return true;
2174 }
2175 }
2176 }
2177 }
2178 else {
2179
2180 }
2181 }
2182
2183 return false;
2184#endif
2185}
2186
2187bool MachOParser::getCDHash(uint8_t cdHash[20])
2188{
2189 Diagnostics diag;
2190 LinkEditInfo leInfo;
2191 getLinkEditPointers(diag, leInfo);
2192 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2193 return false;
2194
2195 return cdHashOfCodeSignature(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff), leInfo.codeSig->datasize, cdHash);
2196 }
2197
2198bool MachOParser::usesLibraryValidation() const
2199{
2200 Diagnostics diag;
2201 LinkEditInfo leInfo;
2202 getLinkEditPointers(diag, leInfo);
2203 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2204 return false;
2205
2206 const CS_CodeDirectory* cd = (const CS_CodeDirectory*)findCodeDirectoryBlob(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff), leInfo.codeSig->datasize);
2207 if ( cd == nullptr )
2208 return false;
2209
2210 // check for CS_REQUIRE_LV in CS_CodeDirectory.flags
2211 return (htonl(cd->flags) & CS_REQUIRE_LV);
2212 }
2213
2214
2215bool MachOParser::isRestricted() const
2216{
2217 __block bool result = false;
2218 forEachSection(^(const char* segName, const char* sectionName, uint32_t flags, const void* content, size_t size, bool illegalSectionSize, bool& stop) {
2219 if ( (strcmp(segName, "__RESTRICT") == 0) && (strcmp(sectionName, "__restrict") == 0) ) {
2220 result = true;
2221 stop = true;
2222 }
2223
2224 });
2225 return result;
2226}
2227
2228bool MachOParser::hasCodeSignature(uint32_t& fileOffset, uint32_t& size)
2229{
2230 fileOffset = 0;
2231 size = 0;
2232
2233 // <rdar://problem/13622786> ignore code signatures in macOS binaries built with pre-10.9 tools
2234 Platform platform;
2235 uint32_t minOS;
2236 uint32_t sdk;
2237 if ( getPlatformAndVersion(&platform, &minOS, &sdk) ) {
2238 // if have LC_VERSION_MIN_MACOSX and it says SDK < 10.9, so ignore code signature
2239 if ( (platform == Platform::macOS) && (sdk < 0x000A0900) )
2240 return false;
2241 }
2242 else {
2243 switch ( header()->cputype ) {
2244 case CPU_TYPE_I386:
2245 case CPU_TYPE_X86_64:
2246 // old binary with no LC_VERSION_*, assume intel binaries are old macOS binaries (ignore code signature)
2247 return false;
2248 }
2249 }
2250
2251 Diagnostics diag;
2252 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2253 if ( cmd->cmd == LC_CODE_SIGNATURE ) {
2254 const linkedit_data_command* sigCmd = (linkedit_data_command*)cmd;
2255 fileOffset = sigCmd->dataoff;
2256 size = sigCmd->datasize;
2257 stop = true;
2258 }
2259 });
2260 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2261 return (fileOffset != 0);
2262}
2263
2264bool MachOParser::getEntry(uint32_t& offset, bool& usesCRT)
2265{
2266 Diagnostics diag;
2267 offset = 0;
2268 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2269 if ( cmd->cmd == LC_MAIN ) {
2270 entry_point_command* mainCmd = (entry_point_command*)cmd;
2271 usesCRT = false;
2272 offset = (uint32_t)mainCmd->entryoff;
2273 stop = true;
2274 }
2275 else if ( cmd->cmd == LC_UNIXTHREAD ) {
2276 stop = true;
2277 usesCRT = true;
2278 const uint32_t* regs32 = (uint32_t*)(((char*)cmd) + 16);
2279 const uint64_t* regs64 = (uint64_t*)(((char*)cmd) + 16);
2280 uint64_t startAddress = 0;
2281 switch ( header()->cputype ) {
2282 case CPU_TYPE_I386:
2283 startAddress = regs32[10]; // i386_thread_state_t.eip
2284 break;
2285 case CPU_TYPE_X86_64:
2286 startAddress = regs64[16]; // x86_thread_state64_t.rip
2287 break;
2288 case CPU_TYPE_ARM:
2289 startAddress = regs32[15]; // arm_thread_state_t.__pc
2290 break;
2291 case CPU_TYPE_ARM64:
2292 startAddress = regs64[32]; // arm_thread_state64_t.__pc
2293 break;
2294 }
2295 offset = (uint32_t)(startAddress - preferredLoadAddress());
2296 }
2297 });
2298 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2299 // FIXME: validate offset is into executable segment
2300 return (offset != 0);
2301}
2302
2303bool MachOParser::canBePlacedInDyldCache(const std::string& path) const {
2304 std::set<std::string> reasons;
2305 return canBePlacedInDyldCache(path, reasons);
2306}
2307
2308bool MachOParser::canBePlacedInDyldCache(const std::string& path, std::set<std::string>& reasons) const
2309{
2310 bool retval = true;
2311 // only dylibs can go in cache
2312 if ( fileType() != MH_DYLIB ) {
2313 reasons.insert("Not MH_DYLIB");
2314 return false; // cannot continue, installName() will assert() if not a dylib
2315 }
2316
2317 // only dylibs built for /usr/lib or /System/Library can go in cache
2318 const char* dylibName = installName();
2319 if ( (strncmp(dylibName, "/usr/lib/", 9) != 0) && (strncmp(dylibName, "/System/Library/", 16) != 0) ) {
2320 retval = false;
2321 reasons.insert("Not in '/usr/lib/' or '/System/Library/'");
2322 }
2323
2324 // flat namespace files cannot go in cache
2325 if ( (header()->flags & MH_TWOLEVEL) == 0 ) {
2326 retval = false;
2327 reasons.insert("Not built with two level namespaces");
2328 }
2329
2330 // don't put debug variants into dyld cache
2331 if ( endsWith(path, "_profile.dylib") || endsWith(path, "_debug.dylib") || endsWith(path, "_profile") || endsWith(path, "_debug") || endsWith(path, "/CoreADI") ) {
2332 retval = false;
2333 reasons.insert("Variant image");
2334 }
2335
2336 // dylib must have extra info for moving DATA and TEXT segments apart
2337 __block bool hasExtraInfo = false;
2338 __block bool hasDyldInfo = false;
2339 Diagnostics diag;
2340 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2341 if ( cmd->cmd == LC_SEGMENT_SPLIT_INFO )
2342 hasExtraInfo = true;
2343 if ( cmd->cmd == LC_DYLD_INFO_ONLY )
2344 hasDyldInfo = true;
2345 });
2346 if ( !hasExtraInfo ) {
2347 retval = false;
2348 reasons.insert("Missing split seg info");
2349 }
2350 if ( !hasDyldInfo ) {
2351 retval = false;
2352 reasons.insert("Old binary, missing dyld info");
2353 }
2354
2355 // dylib can only depend on other dylibs in the shared cache
2356 __block bool allDepPathsAreGood = true;
2357 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
2358 if ( (strncmp(loadPath, "/usr/lib/", 9) != 0) && (strncmp(loadPath, "/System/Library/", 16) != 0) ) {
2359 allDepPathsAreGood = false;
2360 stop = true;
2361 }
2362 });
2363 if ( !allDepPathsAreGood ) {
2364 retval = false;
2365 reasons.insert("Depends on cache inelegible dylibs");
2366 }
2367
2368 // dylibs with interposing info cannot be in cache
2369 __block bool hasInterposing = false;
2370 forEachInterposingTuple(diag, ^(uint32_t segIndex, uint64_t replacementSegOffset, uint64_t replaceeSegOffset, uint64_t replacementContent, bool& stop) {
2371 hasInterposing = true;
2372 });
2373 if ( hasInterposing ) {
2374 retval = false;
2375 reasons.insert("Has interposing tuples");
2376 }
2377
2378 return retval;
2379}
2380
2381bool MachOParser::isDynamicExecutable() const
2382{
2383 if ( fileType() != MH_EXECUTE )
2384 return false;
2385
2386 // static executables do not have dyld load command
2387 __block bool hasDyldLoad = false;
2388 Diagnostics diag;
2389 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2390 if ( cmd->cmd == LC_LOAD_DYLINKER ) {
2391 hasDyldLoad = true;
2392 stop = true;
2393 }
2394 });
2395 return hasDyldLoad;
2396}
2397
2398
2399bool MachOParser::isSlideable() const
2400{
2401 if ( header()->filetype == MH_DYLIB )
2402 return true;
2403 if ( header()->filetype == MH_BUNDLE )
2404 return true;
2405 if ( (header()->filetype == MH_EXECUTE) && (header()->flags & MH_PIE) )
2406 return true;
2407
2408 return false;
2409}
2410
2411
2412
2413bool MachOParser::hasInitializer(Diagnostics& diag) const
2414{
2415 __block bool result = false;
2416 forEachInitializer(diag, ^(uint32_t offset) {
2417 result = true;
2418 });
2419 return result;
2420}
2421
2422void MachOParser::forEachInitializer(Diagnostics& diag, void (^callback)(uint32_t offset)) const
2423{
2424 __block uint64_t textSegAddrStart = 0;
2425 __block uint64_t textSegAddrEnd = 0;
2426
2427 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
2428 if ( strcmp(segName, "__TEXT") == 0 ) {
2429 textSegAddrStart = vmAddr;
2430 textSegAddrEnd = vmAddr + vmSize;
2431 stop = true;
2432 }
2433 });
2434 if ( textSegAddrStart == textSegAddrEnd ) {
2435 diag.error("no __TEXT segment");
2436 return;
2437 }
2438
2439 // if dylib linked with -init linker option, that initializer is first
2440 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2441 if ( cmd->cmd == LC_ROUTINES ) {
2442 const routines_command* routines = (routines_command*)cmd;
2443 uint64_t dashInit = routines->init_address;
2444 if ( (textSegAddrStart < dashInit) && (dashInit < textSegAddrEnd) )
2445 callback((uint32_t)(dashInit - textSegAddrStart));
2446 else
2447 diag.error("-init does not point within __TEXT segment");
2448 }
2449 else if ( cmd->cmd == LC_ROUTINES_64 ) {
2450 const routines_command_64* routines = (routines_command_64*)cmd;
2451 uint64_t dashInit = routines->init_address;
2452 if ( (textSegAddrStart < dashInit) && (dashInit < textSegAddrEnd) )
2453 callback((uint32_t)(dashInit - textSegAddrStart));
2454 else
2455 diag.error("-init does not point within __TEXT segment");
2456 }
2457 });
2458
2459 // next any function pointers in mod-init section
2460 bool p64 = is64();
2461 unsigned pointerSize = p64 ? 8 : 4;
2462 forEachSection(^(const char* segmentName, const char* sectionName, uint32_t flags, const void* content, size_t size, bool illegalSectionSize, bool& stop) {
2463 if ( (flags & SECTION_TYPE) == S_MOD_INIT_FUNC_POINTERS ) {
2464 if ( (size % pointerSize) != 0 ) {
2465 diag.error("initializer section %s/%s has bad size", segmentName, sectionName);
2466 stop = true;
2467 return;
2468 }
2469 if ( illegalSectionSize ) {
2470 diag.error("initializer section %s/%s extends beyond the end of the segment", segmentName, sectionName);
2471 stop = true;
2472 return;
2473 }
2474 if ( ((long)content % pointerSize) != 0 ) {
2475 diag.error("initializer section %s/%s is not pointer aligned", segmentName, sectionName);
2476 stop = true;
2477 return;
2478 }
2479 if ( p64 ) {
2480 const uint64_t* initsStart = (uint64_t*)content;
2481 const uint64_t* initsEnd = (uint64_t*)((uint8_t*)content + size);
2482 for (const uint64_t* p=initsStart; p < initsEnd; ++p) {
2483 uint64_t anInit = *p;
2484 if ( (anInit <= textSegAddrStart) || (anInit > textSegAddrEnd) ) {
2485 diag.error("initializer 0x%0llX does not point within __TEXT segment", anInit);
2486 stop = true;
2487 break;
2488 }
2489 callback((uint32_t)(anInit - textSegAddrStart));
2490 }
2491 }
2492 else {
2493 const uint32_t* initsStart = (uint32_t*)content;
2494 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + size);
2495 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2496 uint32_t anInit = *p;
2497 if ( (anInit <= textSegAddrStart) || (anInit > textSegAddrEnd) ) {
2498 diag.error("initializer 0x%0X does not point within __TEXT segment", anInit);
2499 stop = true;
2500 break;
2501 }
2502 callback(anInit - (uint32_t)textSegAddrStart);
2503 }
2504 }
2505 }
2506 });
2507}
2508
2509void MachOParser::forEachDOFSection(Diagnostics& diag, void (^callback)(uint32_t offset)) const
2510{
2511 forEachSection(^(const char* segmentName, const char* sectionName, uint32_t flags, const void* content, size_t size, bool illegalSectionSize, bool& stop) {
2512 if ( ( (flags & SECTION_TYPE) == S_DTRACE_DOF ) && !illegalSectionSize ) {
2513 callback((uint32_t)((uintptr_t)content - (uintptr_t)header()));
2514 }
2515 });
2516}
2517
2518
2519uint32_t MachOParser::segmentCount() const
2520{
2521 __block uint32_t count = 0;
2522 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
2523 ++count;
2524 });
2525 return count;
2526}
2527
2528void MachOParser::forEachSegment(void (^callback)(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, uint32_t segIndex, uint64_t sizeOfSections, uint8_t p2align, bool& stop)) const
2529{
2530 Diagnostics diag;
2531 __block uint32_t segIndex = 0;
2532 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2533 if ( cmd->cmd == LC_SEGMENT_64 ) {
2534 const segment_command_64* segCmd = (segment_command_64*)cmd;
2535 uint64_t sizeOfSections = segCmd->vmsize;
2536 uint8_t p2align = 0;
2537 const section_64* const sectionsStart = (section_64*)((char*)segCmd + sizeof(struct segment_command_64));
2538 const section_64* const sectionsEnd = &sectionsStart[segCmd->nsects];
2539 for (const section_64* sect=sectionsStart; sect < sectionsEnd; ++sect) {
2540 sizeOfSections = sect->addr + sect->size - segCmd->vmaddr;
2541 if ( sect->align > p2align )
2542 p2align = sect->align;
2543 }
2544 callback(segCmd->segname, (uint32_t)segCmd->fileoff, (uint32_t)segCmd->filesize, segCmd->vmaddr, segCmd->vmsize, segCmd->initprot, segIndex, sizeOfSections, p2align, stop);
2545 ++segIndex;
2546 }
2547 else if ( cmd->cmd == LC_SEGMENT ) {
2548 const segment_command* segCmd = (segment_command*)cmd;
2549 uint64_t sizeOfSections = segCmd->vmsize;
2550 uint8_t p2align = 0;
2551 const section* const sectionsStart = (section*)((char*)segCmd + sizeof(struct segment_command));
2552 const section* const sectionsEnd = &sectionsStart[segCmd->nsects];
2553 for (const section* sect=sectionsStart; sect < sectionsEnd; ++sect) {
2554 sizeOfSections = sect->addr + sect->size - segCmd->vmaddr;
2555 if ( sect->align > p2align )
2556 p2align = sect->align;
2557 }
2558 callback(segCmd->segname, (uint32_t)segCmd->fileoff, (uint32_t)segCmd->filesize, segCmd->vmaddr, segCmd->vmsize, segCmd->initprot, segIndex, sizeOfSections, p2align, stop);
2559 ++segIndex;
2560 }
2561 });
2562 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2563}
2564
2565void MachOParser::forEachExportedSymbol(Diagnostics diag, void (^handler)(const char* symbolName, uint64_t imageOffset, bool isReExport, bool& stop)) const
2566{
2567 LinkEditInfo leInfo;
2568 getLinkEditPointers(diag, leInfo);
2569 if ( diag.hasError() )
2570 return;
2571
2572 if ( leInfo.dyldInfo != nullptr ) {
2573 const uint8_t* trieStart = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->export_off);
2574 const uint8_t* trieEnd = trieStart + leInfo.dyldInfo->export_size;
2575 std::vector<ExportInfoTrie::Entry> exports;
2576 if ( !ExportInfoTrie::parseTrie(trieStart, trieEnd, exports) ) {
2577 diag.error("malformed exports trie");
2578 return;
2579 }
2580 bool stop = false;
2581 for (const ExportInfoTrie::Entry& exp : exports) {
2582 bool isReExport = (exp.info.flags & EXPORT_SYMBOL_FLAGS_REEXPORT);
2583 handler(exp.name.c_str(), exp.info.address, isReExport, stop);
2584 if ( stop )
2585 break;
2586 }
2587 }
2588}
2589
2590bool MachOParser::invalidRebaseState(Diagnostics& diag, const char* opcodeName, const MachOParser::LinkEditInfo& leInfo,
2591 bool segIndexSet, uint32_t pointerSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type) const
2592{
2593 if ( !segIndexSet ) {
2594 diag.error("%s missing preceding REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", opcodeName);
2595 return true;
2596 }
2597 if ( segmentIndex >= leInfo.layout.segmentCount ) {
2598 diag.error("%s segment index %d too large", opcodeName, segmentIndex);
2599 return true;
2600 }
2601 if ( segmentOffset > (leInfo.layout.segments[segmentIndex].segSize-pointerSize) ) {
2602 diag.error("%s current segment offset 0x%08llX beyond segment size (0x%08llX)", opcodeName, segmentOffset, leInfo.layout.segments[segmentIndex].segSize);
2603 return true;
2604 }
2605 switch ( type ) {
2606 case REBASE_TYPE_POINTER:
2607 if ( !leInfo.layout.segments[segmentIndex].writable ) {
2608 diag.error("%s pointer rebase is in non-writable segment", opcodeName);
2609 return true;
2610 }
2611 if ( leInfo.layout.segments[segmentIndex].executable ) {
2612 diag.error("%s pointer rebase is in executable segment", opcodeName);
2613 return true;
2614 }
2615 break;
2616 case REBASE_TYPE_TEXT_ABSOLUTE32:
2617 case REBASE_TYPE_TEXT_PCREL32:
2618 if ( !leInfo.layout.segments[segmentIndex].textRelocsAllowed ) {
2619 diag.error("%s text rebase is in segment that does not support text relocations", opcodeName);
2620 return true;
2621 }
2622 if ( leInfo.layout.segments[segmentIndex].writable ) {
2623 diag.error("%s text rebase is in writable segment", opcodeName);
2624 return true;
2625 }
2626 if ( !leInfo.layout.segments[segmentIndex].executable ) {
2627 diag.error("%s pointer rebase is in non-executable segment", opcodeName);
2628 return true;
2629 }
2630 break;
2631 default:
2632 diag.error("%s unknown rebase type %d", opcodeName, type);
2633 return true;
2634 }
2635 return false;
2636}
2637
2638void MachOParser::forEachRebase(Diagnostics& diag, void (^handler)(uint32_t segIndex, uint64_t segOffset, uint8_t type, bool& stop)) const
2639{
2640 LinkEditInfo leInfo;
2641 getLinkEditPointers(diag, leInfo);
2642 if ( diag.hasError() )
2643 return;
2644
2645 if ( leInfo.dyldInfo != nullptr ) {
2646 // work around linker bug that laid down rebase opcodes for lazy pointer section when -bind_at_load used
2647 __block int lpSegIndex = 0;
2648 __block uint64_t lpSegOffsetStart = 0;
2649 __block uint64_t lpSegOffsetEnd = 0;
2650 bool hasWeakBinds = (leInfo.dyldInfo->weak_bind_size != 0);
2651 if ( leInfo.dyldInfo->lazy_bind_size == 0 ) {
2652 __block uint64_t lpAddr = 0;
2653 __block uint64_t lpSize = 0;
2654 forEachSection(^(const char* segName, const char* sectionName, uint32_t flags, uint64_t addr, const void* content, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& sectStop) {
2655 if ( (flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS ) {
2656 lpAddr = addr;
2657 lpSize = size;
2658 sectStop = true;
2659 }
2660 });
2661 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& segStop) {
2662 if ( (vmAddr <= lpAddr) && (vmAddr+vmSize >= lpAddr+lpSize) ) {
2663 lpSegOffsetStart = lpAddr - vmAddr;
2664 lpSegOffsetEnd = lpSegOffsetStart + lpSize;
2665 segStop = true;
2666 return;
2667 }
2668 ++lpSegIndex;
2669 });
2670 }
2671 // don't remove rebase if there is a weak-bind at pointer location
2672 bool (^weakBindAt)(uint64_t segOffset) = ^(uint64_t segOffset) {
2673 if ( !hasWeakBinds )
2674 return false;
2675 __block bool result = false;
2676 Diagnostics weakDiag;
2677 forEachWeakDef(weakDiag, ^(bool strongDef, uint32_t dataSegIndex, uint64_t dataSegOffset, uint64_t addend, const char* symbolName, bool& weakStop) {
2678 if ( segOffset == dataSegOffset ) {
2679 result = true;
2680 weakStop = true;
2681 }
2682 });
2683 return result;
2684 };
2685
2686
2687 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->rebase_off);
2688 const uint8_t* end = p + leInfo.dyldInfo->rebase_size;
2689 const uint32_t pointerSize = (is64() ? 8 : 4);
2690 uint8_t type = 0;
2691 int segIndex = 0;
2692 uint64_t segOffset = 0;
2693 uint64_t count;
2694 uint64_t skip;
2695 bool segIndexSet = false;
2696 bool stop = false;
2697 while ( !stop && diag.noError() && (p < end) ) {
2698 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
2699 uint8_t opcode = *p & REBASE_OPCODE_MASK;
2700 ++p;
2701 switch (opcode) {
2702 case REBASE_OPCODE_DONE:
2703 stop = true;
2704 break;
2705 case REBASE_OPCODE_SET_TYPE_IMM:
2706 type = immediate;
2707 break;
2708 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
2709 segIndex = immediate;
2710 segOffset = read_uleb128(diag, p, end);
2711 segIndexSet = true;
2712 break;
2713 case REBASE_OPCODE_ADD_ADDR_ULEB:
2714 segOffset += read_uleb128(diag, p, end);
2715 break;
2716 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
2717 segOffset += immediate*pointerSize;
2718 break;
2719 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
2720 for (int i=0; i < immediate; ++i) {
2721 if ( invalidRebaseState(diag, "REBASE_OPCODE_DO_REBASE_IMM_TIMES", leInfo, segIndexSet, pointerSize, segIndex, segOffset, type) )
2722 return;
2723 if ( (segIndex != lpSegIndex) || (segOffset > lpSegOffsetEnd) || (segOffset < lpSegOffsetStart) || weakBindAt(segOffset) )
2724 handler(segIndex, segOffset, type, stop);
2725 segOffset += pointerSize;
2726 }
2727 break;
2728 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
2729 count = read_uleb128(diag, p, end);
2730 for (uint32_t i=0; i < count; ++i) {
2731 if ( invalidRebaseState(diag, "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segIndexSet, pointerSize, segIndex, segOffset, type) )
2732 return;
2733 if ( (segIndex != lpSegIndex) || (segOffset > lpSegOffsetEnd) || (segOffset < lpSegOffsetStart) || weakBindAt(segOffset) )
2734 handler(segIndex, segOffset, type, stop);
2735 segOffset += pointerSize;
2736 }
2737 break;
2738 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
2739 if ( invalidRebaseState(diag, "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segIndexSet, pointerSize, segIndex, segOffset, type) )
2740 return;
2741 handler(segIndex, segOffset, type, stop);
2742 segOffset += read_uleb128(diag, p, end) + pointerSize;
2743 break;
2744 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
2745 count = read_uleb128(diag, p, end);
2746 if ( diag.hasError() )
2747 break;
2748 skip = read_uleb128(diag, p, end);
2749 for (uint32_t i=0; i < count; ++i) {
2750 if ( invalidRebaseState(diag, "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB", leInfo, segIndexSet, pointerSize, segIndex, segOffset, type) )
2751 return;
2752 handler(segIndex, segOffset, type, stop);
2753 segOffset += skip + pointerSize;
2754 }
2755 break;
2756 default:
2757 diag.error("unknown rebase opcode 0x%02X", opcode);
2758 }
2759 }
2760 }
2761 else {
2762 // old binary
2763 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->locreloff);
2764 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nlocrel];
2765 bool stop = false;
2766 const uint8_t relocSize = (is64() ? 3 : 2);
2767 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
2768 if ( reloc->r_length != relocSize ) {
2769 diag.error("local relocation has wrong r_length");
2770 break;
2771 }
2772 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
2773 diag.error("local relocation has wrong r_type");
2774 break;
2775 }
2776 doLocalReloc(diag, reloc->r_address, stop, handler);
2777 }
2778 // then process indirect symbols
2779 forEachIndirectPointer(diag, ^(uint32_t segIndex, uint64_t segOffset, bool bind, int bindLibOrdinal,
2780 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
2781 if ( !bind && !bindLazy )
2782 handler(segIndex, segOffset, REBASE_TYPE_POINTER, indStop);
2783 });
2784 }
2785}
2786
2787bool MachOParser::doLocalReloc(Diagnostics& diag, uint32_t r_address, bool& stop, void (^handler)(uint32_t segIndex, uint64_t segOffset, uint8_t type, bool& stop)) const
2788{
2789 bool firstWritable = (header()->cputype == CPU_TYPE_X86_64);
2790 __block uint64_t relocBaseAddress = 0;
2791 __block bool baseFound = false;
2792 __block uint32_t segIndex = 0;
2793 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool &stopSeg) {
2794 if ( !baseFound ) {
2795 if ( !firstWritable || (protections & VM_PROT_WRITE) ) {
2796 baseFound = true;
2797 relocBaseAddress = vmAddr;
2798 }
2799 }
2800 if ( baseFound && (vmAddr < relocBaseAddress+r_address) && (relocBaseAddress+r_address < vmAddr+vmSize) ) {
2801 uint8_t type = REBASE_TYPE_POINTER;
2802 uint64_t segOffset = relocBaseAddress + r_address - vmAddr;
2803 handler(segIndex, segOffset, type, stop);
2804 stopSeg = true;
2805 }
2806 ++segIndex;
2807 });
2808
2809 return false;
2810}
2811
2812int MachOParser::libOrdinalFromDesc(uint16_t n_desc) const
2813{
2814 // -flat_namespace is always flat lookup
2815 if ( (header()->flags & MH_TWOLEVEL) == 0 )
2816 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
2817
2818 // extract byte from undefined symbol entry
2819 int libIndex = GET_LIBRARY_ORDINAL(n_desc);
2820 switch ( libIndex ) {
2821 case SELF_LIBRARY_ORDINAL:
2822 return BIND_SPECIAL_DYLIB_SELF;
2823
2824 case DYNAMIC_LOOKUP_ORDINAL:
2825 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
2826
2827 case EXECUTABLE_ORDINAL:
2828 return BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
2829 }
2830
2831 return libIndex;
2832}
2833
2834bool MachOParser::doExternalReloc(Diagnostics& diag, uint32_t r_address, uint32_t r_symbolnum, LinkEditInfo& leInfo, bool& stop,
2835 void (^handler)(uint32_t dataSegIndex, uint64_t dataSegOffset, uint8_t type, int libOrdinal,
2836 uint64_t addend, const char* symbolName, bool weakImport, bool lazy, bool& stop)) const
2837{
2838 const bool firstWritable = (header()->cputype == CPU_TYPE_X86_64);
2839 const bool is64Bit = is64();
2840 __block uint64_t relocBaseAddress = 0;
2841 __block bool baseFound = false;
2842 __block uint32_t segIndex = 0;
2843 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool &stopSeg) {
2844 if ( !baseFound ) {
2845 if ( !firstWritable || (protections & VM_PROT_WRITE) ) {
2846 baseFound = true;
2847 relocBaseAddress = vmAddr;
2848 }
2849 }
2850 if ( baseFound && (vmAddr < relocBaseAddress+r_address) && (relocBaseAddress+r_address < vmAddr+vmSize) ) {
2851 uint8_t type = BIND_TYPE_POINTER;
2852 uint64_t segOffset = relocBaseAddress + r_address - vmAddr;
2853 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
2854 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
2855 const struct nlist* symbols32 = (struct nlist*)symbolTable;
2856 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
2857 uint32_t symCount = leInfo.symTab->nsyms;
2858 uint32_t poolSize = leInfo.symTab->strsize;
2859 if ( r_symbolnum < symCount ) {
2860 uint16_t n_desc = is64Bit ? symbols64[r_symbolnum].n_desc : symbols32[r_symbolnum].n_desc;
2861 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
2862 uint32_t strOffset = is64Bit ? symbols64[r_symbolnum].n_un.n_strx : symbols32[r_symbolnum].n_un.n_strx;
2863 if ( strOffset < poolSize ) {
2864 const char* symbolName = stringPool + strOffset;
2865 bool weakImport = (n_desc & N_WEAK_REF);
2866 bool lazy = false;
2867 uint64_t addend = is64Bit ? (*((uint64_t*)((char*)header()+fileOffset+segOffset))) : (*((uint32_t*)((char*)header()+fileOffset+segOffset)));
2868 handler(segIndex, segOffset, type, libOrdinal, addend, symbolName, weakImport, lazy, stop);
2869 stopSeg = true;
2870 }
2871 }
2872 }
2873 ++segIndex;
2874 });
2875
2876 return false;
2877}
2878
2879bool MachOParser::invalidBindState(Diagnostics& diag, const char* opcodeName, const LinkEditInfo& leInfo, bool segIndexSet, bool libraryOrdinalSet,
2880 uint32_t dylibCount, int libOrdinal, uint32_t pointerSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, const char* symbolName) const
2881{
2882 if ( !segIndexSet ) {
2883 diag.error("%s missing preceding BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", opcodeName);
2884 return true;
2885 }
2886 if ( segmentIndex >= leInfo.layout.segmentCount ) {
2887 diag.error("%s segment index %d too large", opcodeName, segmentIndex);
2888 return true;
2889 }
2890 if ( segmentOffset > (leInfo.layout.segments[segmentIndex].segSize-pointerSize) ) {
2891 diag.error("%s current segment offset 0x%08llX beyond segment size (0x%08llX)", opcodeName, segmentOffset, leInfo.layout.segments[segmentIndex].segSize);
2892 return true;
2893 }
2894 if ( symbolName == NULL ) {
2895 diag.error("%s missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", opcodeName);
2896 return true;
2897 }
2898 if ( !libraryOrdinalSet ) {
2899 diag.error("%s missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL", opcodeName);
2900 return true;
2901 }
2902 if ( libOrdinal > (int)dylibCount ) {
2903 diag.error("%s has library ordinal too large (%d) max (%d)", opcodeName, libOrdinal, dylibCount);
2904 return true;
2905 }
2906 if ( libOrdinal < -2 ) {
2907 diag.error("%s has unknown library special ordinal (%d)", opcodeName, libOrdinal);
2908 return true;
2909 }
2910 switch ( type ) {
2911 case BIND_TYPE_POINTER:
2912 if ( !leInfo.layout.segments[segmentIndex].writable ) {
2913 diag.error("%s pointer bind is in non-writable segment", opcodeName);
2914 return true;
2915 }
2916 if ( leInfo.layout.segments[segmentIndex].executable ) {
2917 diag.error("%s pointer bind is in executable segment", opcodeName);
2918 return true;
2919 }
2920 break;
2921 case BIND_TYPE_TEXT_ABSOLUTE32:
2922 case BIND_TYPE_TEXT_PCREL32:
2923 if ( !leInfo.layout.segments[segmentIndex].textRelocsAllowed ) {
2924 diag.error("%s text bind is in segment that does not support text relocations", opcodeName);
2925 return true;
2926 }
2927 if ( leInfo.layout.segments[segmentIndex].writable ) {
2928 diag.error("%s text bind is in writable segment", opcodeName);
2929 return true;
2930 }
2931 if ( !leInfo.layout.segments[segmentIndex].executable ) {
2932 diag.error("%s pointer bind is in non-executable segment", opcodeName);
2933 return true;
2934 }
2935 break;
2936 default:
2937 diag.error("%s unknown bind type %d", opcodeName, type);
2938 return true;
2939 }
2940 return false;
2941}
2942
2943void MachOParser::forEachBind(Diagnostics& diag, void (^handler)(uint32_t dataSegIndex, uint64_t dataSegOffset, uint8_t type,
2944 int libOrdinal, uint64_t addend, const char* symbolName, bool weakImport, bool lazy, bool& stop)) const
2945{
2946 LinkEditInfo leInfo;
2947 getLinkEditPointers(diag, leInfo);
2948 if ( diag.hasError() )
2949 return;
2950 const uint32_t dylibCount = dependentDylibCount();
2951
2952 if ( leInfo.dyldInfo != nullptr ) {
2953 // process bind opcodes
2954 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
2955 const uint8_t* end = p + leInfo.dyldInfo->bind_size;
2956 const uint32_t pointerSize = (is64() ? 8 : 4);
2957 uint8_t type = 0;
2958 uint64_t segmentOffset = 0;
2959 uint8_t segmentIndex = 0;
2960 const char* symbolName = NULL;
2961 int libraryOrdinal = 0;
2962 bool segIndexSet = false;
2963 bool libraryOrdinalSet = false;
2964
2965 int64_t addend = 0;
2966 uint64_t count;
2967 uint64_t skip;
2968 bool weakImport = false;
2969 bool done = false;
2970 bool stop = false;
2971 while ( !done && !stop && diag.noError() && (p < end) ) {
2972 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
2973 uint8_t opcode = *p & BIND_OPCODE_MASK;
2974 ++p;
2975 switch (opcode) {
2976 case BIND_OPCODE_DONE:
2977 done = true;
2978 break;
2979 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
2980 libraryOrdinal = immediate;
2981 libraryOrdinalSet = true;
2982 break;
2983 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
2984 libraryOrdinal = (int)read_uleb128(diag, p, end);
2985 libraryOrdinalSet = true;
2986 break;
2987 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
2988 // the special ordinals are negative numbers
2989 if ( immediate == 0 )
2990 libraryOrdinal = 0;
2991 else {
2992 int8_t signExtended = BIND_OPCODE_MASK | immediate;
2993 libraryOrdinal = signExtended;
2994 }
2995 libraryOrdinalSet = true;
2996 break;
2997 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
2998 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
2999 symbolName = (char*)p;
3000 while (*p != '\0')
3001 ++p;
3002 ++p;
3003 break;
3004 case BIND_OPCODE_SET_TYPE_IMM:
3005 type = immediate;
3006 break;
3007 case BIND_OPCODE_SET_ADDEND_SLEB:
3008 addend = read_sleb128(diag, p, end);
3009 break;
3010 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
3011 segmentIndex = immediate;
3012 segmentOffset = read_uleb128(diag, p, end);
3013 segIndexSet = true;
3014 break;
3015 case BIND_OPCODE_ADD_ADDR_ULEB:
3016 segmentOffset += read_uleb128(diag, p, end);
3017 break;
3018 case BIND_OPCODE_DO_BIND:
3019 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND", leInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3020 return;
3021 handler(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, weakImport, false, stop);
3022 segmentOffset += pointerSize;
3023 break;
3024 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
3025 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", leInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3026 return;
3027 handler(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, weakImport, false, stop);
3028 segmentOffset += read_uleb128(diag, p, end) + pointerSize;
3029 break;
3030 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
3031 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", leInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3032 return;
3033 handler(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, weakImport, false, stop);
3034 segmentOffset += immediate*pointerSize + pointerSize;
3035 break;
3036 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
3037 count = read_uleb128(diag, p, end);
3038 skip = read_uleb128(diag, p, end);
3039 for (uint32_t i=0; i < count; ++i) {
3040 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", leInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3041 return;
3042 handler(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, weakImport, false, stop);
3043 segmentOffset += skip + pointerSize;
3044 }
3045 break;
3046 default:
3047 diag.error("bad bind opcode 0x%02X", *p);
3048 }
3049 }
3050 if ( diag.hasError() || stop )
3051 return;
3052 // process lazy bind opcodes
3053 if ( leInfo.dyldInfo->lazy_bind_size != 0 ) {
3054 p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->lazy_bind_off);
3055 end = p + leInfo.dyldInfo->lazy_bind_size;
3056 type = BIND_TYPE_POINTER;
3057 segmentOffset = 0;
3058 segmentIndex = 0;
3059 symbolName = NULL;
3060 libraryOrdinal = 0;
3061 segIndexSet = false;
3062 libraryOrdinalSet= false;
3063 addend = 0;
3064 weakImport = false;
3065 stop = false;
3066 while ( !stop && diag.noError() && (p < end) ) {
3067 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
3068 uint8_t opcode = *p & BIND_OPCODE_MASK;
3069 ++p;
3070 switch (opcode) {
3071 case BIND_OPCODE_DONE:
3072 // this opcode marks the end of each lazy pointer binding
3073 break;
3074 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
3075 libraryOrdinal = immediate;
3076 libraryOrdinalSet = true;
3077 break;
3078 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
3079 libraryOrdinal = (int)read_uleb128(diag, p, end);
3080 libraryOrdinalSet = true;
3081 break;
3082 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
3083 // the special ordinals are negative numbers
3084 if ( immediate == 0 )
3085 libraryOrdinal = 0;
3086 else {
3087 int8_t signExtended = BIND_OPCODE_MASK | immediate;
3088 libraryOrdinal = signExtended;
3089 }
3090 libraryOrdinalSet = true;
3091 break;
3092 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
3093 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
3094 symbolName = (char*)p;
3095 while (*p != '\0')
3096 ++p;
3097 ++p;
3098 break;
3099 case BIND_OPCODE_SET_ADDEND_SLEB:
3100 addend = read_sleb128(diag, p, end);
3101 break;
3102 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
3103 segmentIndex = immediate;
3104 segmentOffset = read_uleb128(diag, p, end);
3105 segIndexSet = true;
3106 break;
3107 case BIND_OPCODE_DO_BIND:
3108 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND", leInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3109 return;
3110 handler(segmentIndex, segmentOffset, type, libraryOrdinal, addend, symbolName, weakImport, true, stop);
3111 segmentOffset += pointerSize;
3112 break;
3113 case BIND_OPCODE_SET_TYPE_IMM:
3114 case BIND_OPCODE_ADD_ADDR_ULEB:
3115 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
3116 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
3117 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
3118 default:
3119 diag.error("bad lazy bind opcode 0x%02X", opcode);
3120 break;
3121 }
3122 }
3123 }
3124 }
3125 else {
3126 // old binary, first process relocation
3127 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->extreloff);
3128 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nextrel];
3129 bool stop = false;
3130 const uint8_t relocSize = (is64() ? 3 : 2);
3131 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
3132 if ( reloc->r_length != relocSize ) {
3133 diag.error("external relocation has wrong r_length");
3134 break;
3135 }
3136 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
3137 diag.error("external relocation has wrong r_type");
3138 break;
3139 }
3140 doExternalReloc(diag, reloc->r_address, reloc->r_symbolnum, leInfo, stop, handler);
3141 }
3142 // then process indirect symbols
3143 forEachIndirectPointer(diag, ^(uint32_t segIndex, uint64_t segOffset, bool bind, int bindLibOrdinal,
3144 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
3145 if ( bind )
3146 handler(segIndex, segOffset, (selfModifyingStub ? BIND_TYPE_IMPORT_JMP_REL32 : BIND_TYPE_POINTER), bindLibOrdinal, 0, bindSymbolName, bindWeakImport, bindLazy, indStop);
3147 });
3148 }
3149}
3150
3151
3152void MachOParser::forEachWeakDef(Diagnostics& diag, void (^handler)(bool strongDef, uint32_t dataSegIndex, uint64_t dataSegOffset,
3153 uint64_t addend, const char* symbolName, bool& stop)) const
3154{
3155 LinkEditInfo leInfo;
3156 getLinkEditPointers(diag, leInfo);
3157 if ( diag.hasError() )
3158 return;
3159
3160 const uint32_t dylibCount = dependentDylibCount();
3161 if ( leInfo.dyldInfo != nullptr ) {
3162 // process weak bind opcodes
3163 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->weak_bind_off);
3164 const uint8_t* end = p + leInfo.dyldInfo->weak_bind_size;
3165 const uint32_t pointerSize = (is64() ? 8 : 4);
3166 uint8_t type = 0;
3167 uint64_t segmentOffset = 0;
3168 uint8_t segmentIndex = 0;
3169 const char* symbolName = NULL;
3170 int64_t addend = 0;
3171 uint64_t count;
3172 uint64_t skip;
3173 bool segIndexSet = false;
3174 bool done = false;
3175 bool stop = false;
3176 while ( !done && !stop && diag.noError() && (p < end) ) {
3177 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
3178 uint8_t opcode = *p & BIND_OPCODE_MASK;
3179 ++p;
3180 switch (opcode) {
3181 case BIND_OPCODE_DONE:
3182 done = true;
3183 break;
3184 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
3185 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
3186 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
3187 diag.error("unexpected dylib ordinal in weak binding info");
3188 return;
3189 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
3190 symbolName = (char*)p;
3191 while (*p != '\0')
3192 ++p;
3193 ++p;
3194 if ( (immediate & BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) != 0 )
3195 handler(true, 0, 0, 0, symbolName, stop);
3196 break;
3197 case BIND_OPCODE_SET_TYPE_IMM:
3198 type = immediate;
3199 break;
3200 case BIND_OPCODE_SET_ADDEND_SLEB:
3201 addend = read_sleb128(diag, p, end);
3202 break;
3203 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
3204 segmentIndex = immediate;
3205 segmentOffset = read_uleb128(diag, p, end);
3206 segIndexSet = true;
3207 break;
3208 case BIND_OPCODE_ADD_ADDR_ULEB:
3209 segmentOffset += read_uleb128(diag, p, end);
3210 break;
3211 case BIND_OPCODE_DO_BIND:
3212 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND", leInfo, segIndexSet, true, dylibCount, -2, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3213 return;
3214 handler(false, segmentIndex, segmentOffset, addend, symbolName, stop);
3215 segmentOffset += pointerSize;
3216 break;
3217 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
3218 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND", leInfo, segIndexSet, true, dylibCount, -2, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3219 return;
3220 handler(false, segmentIndex, segmentOffset, addend, symbolName, stop);
3221 segmentOffset += read_uleb128(diag, p, end) + pointerSize;
3222 break;
3223 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
3224 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND", leInfo, segIndexSet, true, dylibCount, -2, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3225 return;
3226 handler(false, segmentIndex, segmentOffset, addend, symbolName, stop);
3227 segmentOffset += immediate*pointerSize + pointerSize;
3228 break;
3229 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
3230 count = read_uleb128(diag, p, end);
3231 skip = read_uleb128(diag, p, end);
3232 for (uint32_t i=0; i < count; ++i) {
3233 if ( invalidBindState(diag, "BIND_OPCODE_DO_BIND", leInfo, segIndexSet, true, dylibCount, -2, pointerSize, segmentIndex, segmentOffset, type, symbolName) )
3234 return;
3235 handler(false, segmentIndex, segmentOffset, addend, symbolName, stop);
3236 segmentOffset += skip + pointerSize;
3237 }
3238 break;
3239 default:
3240 diag.error("bad weak bind opcode 0x%02X", *p);
3241 }
3242 }
3243 if ( diag.hasError() || stop )
3244 return;
3245 }
3246 else {
3247 // old binary
3248 //assert(0 && "weak defs not supported for old binaries yet");
3249 }
3250}
3251
3252
3253
3254void MachOParser::forEachIndirectPointer(Diagnostics& diag, void (^handler)(uint32_t dataSegIndex, uint64_t dataSegOffset, bool bind, int bindLibOrdinal,
3255 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& stop)) const
3256{
3257 LinkEditInfo leInfo;
3258 getLinkEditPointers(diag, leInfo);
3259 if ( diag.hasError() )
3260 return;
3261
3262 // find lazy and non-lazy pointer sections
3263 const bool is64Bit = is64();
3264 const uint32_t* const indirectSymbolTable = (uint32_t*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->indirectsymoff);
3265 const uint32_t indirectSymbolTableCount = leInfo.dynSymTab->nindirectsyms;
3266 const uint32_t pointerSize = is64Bit ? 8 : 4;
3267 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
3268 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
3269 const struct nlist* symbols32 = (struct nlist*)symbolTable;
3270 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
3271 uint32_t symCount = leInfo.symTab->nsyms;
3272 uint32_t poolSize = leInfo.symTab->strsize;
3273 __block bool stop = false;
3274 forEachSection(^(const char* segName, const char* sectionName, uint32_t flags, uint64_t addr, const void* content,
3275 uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& sectionStop) {
3276 uint8_t sectionType = (flags & SECTION_TYPE);
3277 if ( (sectionType != S_LAZY_SYMBOL_POINTERS) && (sectionType != S_NON_LAZY_SYMBOL_POINTERS) && (sectionType != S_SYMBOL_STUBS) )
3278 return;
3279 bool selfModifyingStub = (sectionType == S_SYMBOL_STUBS) && (flags & S_ATTR_SELF_MODIFYING_CODE) && (reserved2 == 5) && (header()->cputype == CPU_TYPE_I386);
3280 if ( (flags & S_ATTR_SELF_MODIFYING_CODE) && !selfModifyingStub ) {
3281 diag.error("S_ATTR_SELF_MODIFYING_CODE section type only valid in old i386 binaries");
3282 sectionStop = true;
3283 return;
3284 }
3285 uint32_t elementSize = selfModifyingStub ? reserved2 : pointerSize;
3286 uint32_t elementCount = (uint32_t)(size/elementSize);
3287 if (greaterThanAddOrOverflow(reserved1, elementCount, indirectSymbolTableCount)) {
3288 diag.error("section %s overflows indirect symbol table", sectionName);
3289 sectionStop = true;
3290 return;
3291 }
3292 __block uint32_t index = 0;
3293 __block uint32_t segIndex = 0;
3294 __block uint64_t sectionSegOffset;
3295 forEachSegment(^(const char* segmentName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool &segStop) {
3296 if ( (vmAddr <= addr) && (addr < vmAddr+vmSize) ) {
3297 sectionSegOffset = addr - vmAddr;
3298 segIndex = index;
3299 segStop = true;
3300 }
3301 ++index;
3302 });
3303
3304 for (int i=0; (i < elementCount) && !stop; ++i) {
3305 uint32_t symNum = indirectSymbolTable[reserved1 + i];
3306 if ( symNum == INDIRECT_SYMBOL_ABS )
3307 continue;
3308 uint64_t segOffset = sectionSegOffset+i*elementSize;
3309 if ( symNum == INDIRECT_SYMBOL_LOCAL ) {
3310 handler(segIndex, segOffset, false, 0, "", false, false, false, stop);
3311 continue;
3312 }
3313 if ( symNum > symCount ) {
3314 diag.error("indirect symbol[%d] = %d which is invalid symbol index", reserved1 + i, symNum);
3315 sectionStop = true;
3316 return;
3317 }
3318 uint16_t n_desc = is64Bit ? symbols64[symNum].n_desc : symbols32[symNum].n_desc;
3319 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
3320 uint32_t strOffset = is64Bit ? symbols64[symNum].n_un.n_strx : symbols32[symNum].n_un.n_strx;
3321 if ( strOffset > poolSize ) {
3322 diag.error("symbol[%d] string offset out of range", reserved1 + i);
3323 sectionStop = true;
3324 return;
3325 }
3326 const char* symbolName = stringPool + strOffset;
3327 bool weakImport = (n_desc & N_WEAK_REF);
3328 bool lazy = (sectionType == S_LAZY_SYMBOL_POINTERS);
3329 handler(segIndex, segOffset, true, libOrdinal, symbolName, weakImport, lazy, selfModifyingStub, stop);
3330 }
3331 sectionStop = stop;
3332 });
3333}
3334
3335void MachOParser::forEachInterposingTuple(Diagnostics& diag, void (^handler)(uint32_t segIndex, uint64_t replacementSegOffset, uint64_t replaceeSegOffset, uint64_t replacementContent, bool& stop)) const
3336{
3337 const bool is64Bit = is64();
3338 const unsigned entrySize = is64Bit ? 16 : 8;
3339 const unsigned pointerSize = is64Bit ? 8 : 4;
3340 forEachSection(^(const char* segmentName, const char* sectionName, uint32_t flags, uint64_t addr, const void* content, uint64_t size, uint32_t alignP2, uint32_t reserved1, uint32_t reserved2, bool illegalSectionSize, bool& secStop) {
3341 if ( ((flags & SECTION_TYPE) == S_INTERPOSING) || ((strcmp(sectionName, "__interpose") == 0) && (strcmp(segmentName, "__DATA") == 0)) ) {
3342 if ( (size % entrySize) != 0 ) {
3343 diag.error("interposing section %s/%s has bad size", segmentName, sectionName);
3344 secStop = true;
3345 return;
3346 }
3347 if ( illegalSectionSize ) {
3348 diag.error("interposing section %s/%s extends beyond the end of the segment", segmentName, sectionName);
3349 secStop = true;
3350 return;
3351 }
3352 if ( ((long)content % pointerSize) != 0 ) {
3353 diag.error("interposing section %s/%s is not pointer aligned", segmentName, sectionName);
3354 secStop = true;
3355 return;
3356 }
3357 __block uint32_t sectionSegIndex = 0;
3358 __block uint64_t sectionSegOffset = 0;
3359 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, uint32_t segIndex, uint64_t sizeOfSections, uint8_t p2align, bool& segStop) {
3360 if ( (vmAddr <= addr) && (addr < vmAddr+vmSize) ) {
3361 sectionSegIndex = segIndex;
3362 sectionSegOffset = addr - vmAddr;
3363 segStop = true;
3364 }
3365 });
3366 if ( sectionSegIndex == 0 ) {
3367 diag.error("interposing section %s/%s is not in a segment", segmentName, sectionName);
3368 secStop = true;
3369 return;
3370 }
3371 uint32_t offset = 0;
3372 bool tupleStop = false;
3373 for (int i=0; i < (size/entrySize); ++i) {
3374 uint64_t replacementContent = is64Bit ? (*(uint64_t*)((char*)content + offset)) : (*(uint32_t*)((char*)content + offset));
3375 handler(sectionSegIndex, sectionSegOffset+offset, sectionSegOffset+offset+pointerSize, replacementContent, tupleStop);
3376 offset += entrySize;
3377 if ( tupleStop )
3378 break;
3379 }
3380 }
3381 });
3382}
3383
3384
3385const void* MachOParser::content(uint64_t vmOffset)
3386{
3387 __block const void* result = nullptr;
3388 __block uint32_t firstSegFileOffset = 0;
3389 __block uint64_t firstSegVmAddr = 0;
3390 if ( isRaw() ) {
3391 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, uint32_t segIndex, uint64_t sizeOfSections, uint8_t p2align, bool &stop) {
3392 if ( firstSegFileOffset == 0) {
3393 if ( fileSize == 0 )
3394 return; // skip __PAGEZERO
3395 firstSegFileOffset = fileOffset;
3396 firstSegVmAddr = vmAddr;
3397 }
3398 uint64_t segVmOffset = vmAddr - firstSegVmAddr;
3399 if ( (vmOffset >= segVmOffset) && (vmOffset < segVmOffset+vmSize) ) {
3400 result = (char*)(header()) + (fileOffset - firstSegFileOffset) + (vmOffset - segVmOffset);
3401 stop = true;
3402 }
3403 });
3404 }
3405 else if ( inRawCache() ) {
3406 forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, uint32_t segIndex, uint64_t sizeOfSections, uint8_t p2align, bool &stop) {
3407 if ( firstSegFileOffset == 0 ) {
3408 firstSegFileOffset = fileOffset;
3409 firstSegVmAddr = vmAddr;
3410 }
3411 uint64_t segVmOffset = vmAddr - firstSegVmAddr;
3412 if ( (vmOffset >= segVmOffset) && (vmOffset < segVmOffset+vmSize) ) {
3413 result = (char*)(header()) + (fileOffset - firstSegFileOffset) + (vmOffset - segVmOffset);
3414 stop = true;
3415 }
3416 });
3417 }
3418 else {
3419 // non-raw cache is easy
3420 result = (char*)(header()) + vmOffset;
3421 }
3422 return result;
3423}
3424
3425#endif // !DYLD_IN_PROCESS
3426
3427bool MachOParser::isFairPlayEncrypted(uint32_t& textOffset, uint32_t& size)
3428{
3429 textOffset = 0;
3430 size = 0;
3431 Diagnostics diag;
3432 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
3433 if ( (cmd->cmd == LC_ENCRYPTION_INFO) || (cmd->cmd == LC_ENCRYPTION_INFO_64) ) {
3434 const encryption_info_command* encCmd = (encryption_info_command*)cmd;
3435 if ( encCmd->cryptid == 1 ) {
3436 // Note: cryptid is 0 in just-built apps. The iTunes App Store sets cryptid to 1
3437 textOffset = encCmd->cryptoff;
3438 size = encCmd->cryptsize;
3439 }
3440 stop = true;
3441 }
3442 });
3443 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
3444 return (textOffset != 0);
3445}
3446
3447bool MachOParser::cdHashOfCodeSignature(const void* codeSigStart, size_t codeSignLen, uint8_t cdHash[20])
3448{
3449 const CS_CodeDirectory* cd = (const CS_CodeDirectory*)findCodeDirectoryBlob(codeSigStart, codeSignLen);
3450 if ( cd == nullptr )
3451 return false;
3452
3453 uint32_t cdLength = htonl(cd->length);
3454 if ( cd->hashType == CS_HASHTYPE_SHA256 ) {
3455 uint8_t digest[CC_SHA256_DIGEST_LENGTH];
3456 CC_SHA256(cd, cdLength, digest);
3457 // cd-hash of sigs that use SHA256 is the first 20 bytes of the SHA256 of the code digest
3458 memcpy(cdHash, digest, 20);
3459 return true;
3460 }
3461 else if ( cd->hashType == CS_HASHTYPE_SHA1 ) {
3462 // compute hash directly into return buffer
3463 CC_SHA1(cd, cdLength, cdHash);
3464 return true;
3465 }
3466
3467 return false;
3468}
3469
3470const void* MachOParser::findCodeDirectoryBlob(const void* codeSigStart, size_t codeSignLen)
3471{
3472 // verify min length of overall code signature
3473 if ( codeSignLen < sizeof(CS_SuperBlob) )
3474 return nullptr;
3475
3476 // verify magic at start
3477 const CS_SuperBlob* codeSuperBlob = (CS_SuperBlob*)codeSigStart;
3478 if ( codeSuperBlob->magic != htonl(CSMAGIC_EMBEDDED_SIGNATURE) )
3479 return nullptr;
3480
3481 // verify count of sub-blobs not too large
3482 uint32_t subBlobCount = htonl(codeSuperBlob->count);
3483 if ( (codeSignLen-sizeof(CS_SuperBlob))/sizeof(CS_BlobIndex) < subBlobCount )
3484 return nullptr;
3485
3486 // walk each sub blob, looking at ones with type CSSLOT_CODEDIRECTORY
3487 for (uint32_t i=0; i < subBlobCount; ++i) {
3488 if ( codeSuperBlob->index[i].type != htonl(CSSLOT_CODEDIRECTORY) )
3489 continue;
3490 uint32_t cdOffset = htonl(codeSuperBlob->index[i].offset);
3491 // verify offset is not out of range
3492 if ( cdOffset > (codeSignLen - sizeof(CS_CodeDirectory)) )
3493 return nullptr;
3494 const CS_CodeDirectory* cd = (CS_CodeDirectory*)((uint8_t*)codeSuperBlob + cdOffset);
3495 uint32_t cdLength = htonl(cd->length);
3496 // verify code directory length not out of range
3497 if ( cdLength > (codeSignLen - cdOffset) )
3498 return nullptr;
3499 if ( cd->magic == htonl(CSMAGIC_CODEDIRECTORY) )
3500 return cd;
3501 }
3502 return nullptr;
3503}
3504
3505
3506
3507
3508} // namespace dyld3
3509