dyld-625.13.tar.gz
[apple/dyld.git] / dyld3 / MachOAnalyzer.cpp
1 /*
2 * Copyright (c) 2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 #include <sys/types.h>
25 #include <mach/mach.h>
26 #include <assert.h>
27 #include <limits.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <mach-o/reloc.h>
32 #include <mach-o/nlist.h>
33 #include <TargetConditionals.h>
34
35 #include "MachOAnalyzer.h"
36 #include "CodeSigningTypes.h"
37 #include "Array.h"
38
39 #include <stdio.h>
40
41
42 #ifndef BIND_OPCODE_THREADED
43 #define BIND_OPCODE_THREADED 0xD0
44 #endif
45
46 #ifndef BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB
47 #define BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB 0x00
48 #endif
49
50 #ifndef BIND_SUBOPCODE_THREADED_APPLY
51 #define BIND_SUBOPCODE_THREADED_APPLY 0x01
52 #endif
53
54
55 namespace dyld3 {
56
57
58 const MachOAnalyzer* MachOAnalyzer::validMainExecutable(Diagnostics& diag, const mach_header* mh, const char* path, uint64_t sliceLength, const char* reqArchName, Platform reqPlatform)
59 {
60 const MachOAnalyzer* result = (const MachOAnalyzer*)mh;
61 if ( !result->validMachOForArchAndPlatform(diag, (size_t)sliceLength, path, reqArchName, reqPlatform) )
62 return nullptr;
63 if ( !result->isDynamicExecutable() )
64 return nullptr;
65
66 return result;
67 }
68
69
70 closure::LoadedFileInfo MachOAnalyzer::load(Diagnostics& diag, const closure::FileSystem& fileSystem, const char* path, const char* reqArchName, Platform reqPlatform)
71 {
72 closure::LoadedFileInfo info;
73 char realerPath[MAXPATHLEN];
74 if (!fileSystem.loadFile(path, info, realerPath, ^(const char *format, ...) {
75 va_list list;
76 va_start(list, format);
77 diag.error(format, list);
78 va_end(list);
79 })) {
80 return closure::LoadedFileInfo();
81 }
82
83 // if fat, remap just slice needed
84 bool fatButMissingSlice;
85 const FatFile* fh = (FatFile*)info.fileContent;
86 uint64_t sliceOffset = info.sliceOffset;
87 uint64_t sliceLen = info.sliceLen;
88 if ( fh->isFatFileWithSlice(diag, info.fileContentLen, reqArchName, sliceOffset, sliceLen, fatButMissingSlice) ) {
89 if ( (sliceOffset & 0xFFF) != 0 ) {
90 // slice not page aligned
91 if ( strncmp((char*)info.fileContent + sliceOffset, "!<arch>", 7) == 0 )
92 diag.error("file is static library");
93 else
94 diag.error("slice is not page aligned");
95 fileSystem.unloadFile(info);
96 return closure::LoadedFileInfo();
97 }
98 else {
99 // unmap anything before slice
100 fileSystem.unloadPartialFile(info, sliceOffset, sliceLen);
101 // Update the info to keep track of the new slice offset.
102 info.sliceOffset = sliceOffset;
103 info.sliceLen = sliceLen;
104 }
105 }
106 else if ( fatButMissingSlice ) {
107 diag.error("missing required arch %s in %s", reqArchName, path);
108 fileSystem.unloadFile(info);
109 return closure::LoadedFileInfo();
110 }
111
112 const MachOAnalyzer* mh = (MachOAnalyzer*)info.fileContent;
113
114 // validate is mach-o of requested arch and platform
115 if ( !mh->validMachOForArchAndPlatform(diag, (size_t)info.sliceLen, path, reqArchName, reqPlatform) ) {
116 fileSystem.unloadFile(info);
117 return closure::LoadedFileInfo();
118 }
119
120 // if has zero-fill expansion, re-map
121 mh = mh->remapIfZeroFill(diag, fileSystem, info);
122
123 // on error, remove mappings and return nullptr
124 if ( diag.hasError() ) {
125 fileSystem.unloadFile(info);
126 return closure::LoadedFileInfo();
127 }
128
129 // now that LINKEDIT is at expected offset, finish validation
130 mh->validLinkedit(diag, path);
131
132 // on error, remove mappings and return nullptr
133 if ( diag.hasError() ) {
134 fileSystem.unloadFile(info);
135 return closure::LoadedFileInfo();
136 }
137
138 return info;
139 }
140
141 #if DEBUG
142 // only used in debug builds of cache builder to verify segment moves are valid
143 void MachOAnalyzer::validateDyldCacheDylib(Diagnostics& diag, const char* path) const
144 {
145 validLinkedit(diag, path);
146 validSegments(diag, path, 0xffffffff);
147 }
148 #endif
149
150 uint64_t MachOAnalyzer::mappedSize() const
151 {
152 const uint32_t pageSize = uses16KPages() ? 0x4000 : 0x1000;
153 __block uint64_t textSegVmAddr = 0;
154 __block uint64_t vmSpaceRequired = 0;
155 forEachSegment(^(const SegmentInfo& info, bool& stop) {
156 if ( strcmp(info.segName, "__TEXT") == 0 ) {
157 textSegVmAddr = info.vmAddr;
158 }
159 else if ( strcmp(info.segName, "__LINKEDIT") == 0 ) {
160 vmSpaceRequired = info.vmAddr + ((info.vmSize + (pageSize-1)) & (-pageSize)) - textSegVmAddr;
161 stop = true;
162 }
163 });
164
165 return vmSpaceRequired;
166 }
167
168 bool MachOAnalyzer::validMachOForArchAndPlatform(Diagnostics& diag, size_t sliceLength, const char* path, const char* reqArchName, Platform reqPlatform) const
169 {
170 // must start with mach-o magic value
171 if ( (this->magic != MH_MAGIC) && (this->magic != MH_MAGIC_64) ) {
172 diag.error("could not use '%s' because it is not a mach-o file, 0x%08X", path, this->magic);
173 return false;
174 }
175
176 // must match requested architecture, if specified
177 if ( reqArchName != nullptr ) {
178 if ( !this->isArch(reqArchName)) {
179 // except when looking for x86_64h, fallback to x86_64
180 if ( (strcmp(reqArchName, "x86_64h") != 0) || !this->isArch("x86_64") ) {
181 #if SUPPORT_ARCH_arm64e
182 // except when looking for arm64e, fallback to arm64
183 if ( (strcmp(reqArchName, "arm64e") != 0) || !this->isArch("arm64") ) {
184 #endif
185 diag.error("could not use '%s' because it does not contain required architecture %s", path, reqArchName);
186 return false;
187 #if SUPPORT_ARCH_arm64e
188 }
189 #endif
190 }
191 }
192 }
193
194 // must be a filetype dyld can load
195 switch ( this->filetype ) {
196 case MH_EXECUTE:
197 case MH_DYLIB:
198 case MH_BUNDLE:
199 break;
200 default:
201 diag.error("could not use '%s' because it is not a dylib, bundle, or executable", path);
202 return false;
203 }
204
205 // validate load commands structure
206 if ( !this->validLoadCommands(diag, path, sliceLength) ) {
207 return false;
208 }
209
210 // filter out static executables
211 if ( (this->filetype == MH_EXECUTE) && !isDynamicExecutable() ) {
212 diag.error("could not use '%s' because it is a static executable", path);
213 return false;
214 }
215
216 // must match requested platform (do this after load commands are validated)
217 if ( !this->supportsPlatform(reqPlatform) ) {
218 diag.error("could not use '%s' because it was built for a different platform", path);
219 return false;
220 }
221
222 // validate dylib loads
223 if ( !validEmbeddedPaths(diag, path) )
224 return false;
225
226 // validate segments
227 if ( !validSegments(diag, path, sliceLength) )
228 return false;
229
230 // validate entry
231 if ( this->filetype == MH_EXECUTE ) {
232 if ( !validMain(diag, path) )
233 return false;
234 }
235
236 // further validations done in validLinkedit()
237
238 return true;
239 }
240
241 bool MachOAnalyzer::validLinkedit(Diagnostics& diag, const char* path) const
242 {
243 // validate LINKEDIT layout
244 if ( !validLinkeditLayout(diag, path) )
245 return false;
246
247 if ( hasChainedFixups() ) {
248 if ( !validChainedFixupsInfo(diag, path) )
249 return false;
250 }
251 else {
252 // validate rebasing info
253 if ( !validRebaseInfo(diag, path) )
254 return false;
255
256 // validate binding info
257 if ( !validBindInfo(diag, path) )
258 return false;
259 }
260
261 return true;
262 }
263
264 bool MachOAnalyzer::validLoadCommands(Diagnostics& diag, const char* path, size_t fileLen) const
265 {
266 // check load command don't exceed file length
267 if ( this->sizeofcmds + sizeof(mach_header_64) > fileLen ) {
268 diag.error("in '%s' load commands exceed length of file", path);
269 return false;
270 }
271
272 // walk all load commands and sanity check them
273 Diagnostics walkDiag;
274 forEachLoadCommand(walkDiag, ^(const load_command* cmd, bool& stop) {});
275 if ( walkDiag.hasError() ) {
276 #if BUILDING_CACHE_BUILDER
277 diag.error("in '%s' %s", path, walkDiag.errorMessage().c_str());
278 #else
279 diag.error("in '%s' %s", path, walkDiag.errorMessage());
280 #endif
281 return false;
282 }
283
284 // check load commands fit in TEXT segment
285 __block bool foundTEXT = false;
286 forEachSegment(^(const SegmentInfo& info, bool& stop) {
287 if ( strcmp(info.segName, "__TEXT") == 0 ) {
288 foundTEXT = true;
289 if ( this->sizeofcmds + sizeof(mach_header_64) > info.fileSize ) {
290 diag.error("in '%s' load commands exceed length of __TEXT segment", path);
291 }
292 if ( info.fileOffset != 0 ) {
293 diag.error("in '%s' __TEXT segment not start of mach-o", path);
294 }
295 stop = true;
296 }
297 });
298 if ( !diag.noError() && !foundTEXT ) {
299 diag.error("in '%s' __TEXT segment not found", path);
300 return false;
301 }
302
303 return true;
304 }
305
306 const MachOAnalyzer* MachOAnalyzer::remapIfZeroFill(Diagnostics& diag, const closure::FileSystem& fileSystem, closure::LoadedFileInfo& info) const
307 {
308 uint64_t vmSpaceRequired;
309 auto hasZeroFill = [this, &vmSpaceRequired]() {
310 __block bool hasZeroFill = false;
311 __block uint64_t textSegVmAddr = 0;
312 forEachSegment(^(const SegmentInfo& segmentInfo, bool& stop) {
313 if ( strcmp(segmentInfo.segName, "__TEXT") == 0 ) {
314 textSegVmAddr = segmentInfo.vmAddr;
315 }
316 else if ( strcmp(segmentInfo.segName, "__LINKEDIT") == 0 ) {
317 uint64_t vmOffset = segmentInfo.vmAddr - textSegVmAddr;
318 // A zero fill page in the __DATA segment means the file offset of __LINKEDIT is less than its vm offset
319 if ( segmentInfo.fileOffset != vmOffset )
320 hasZeroFill = true;
321 vmSpaceRequired = segmentInfo.vmAddr + segmentInfo.vmSize - textSegVmAddr;
322 stop = true;
323 }
324 });
325 return hasZeroFill;
326 };
327
328 if (hasZeroFill()) {
329 vm_address_t newMappedAddr;
330 if ( ::vm_allocate(mach_task_self(), &newMappedAddr, (size_t)vmSpaceRequired, VM_FLAGS_ANYWHERE) != 0 ) {
331 diag.error("vm_allocate failure");
332 return nullptr;
333 }
334 // mmap() each segment read-only with standard layout
335 __block uint64_t textSegVmAddr;
336 forEachSegment(^(const SegmentInfo& segmentInfo, bool& stop) {
337 if ( strcmp(segmentInfo.segName, "__TEXT") == 0 )
338 textSegVmAddr = segmentInfo.vmAddr;
339 if ( segmentInfo.fileSize != 0 ) {
340 kern_return_t r = vm_copy(mach_task_self(), (vm_address_t)((long)info.fileContent+segmentInfo.fileOffset), (vm_size_t)segmentInfo.fileSize, (vm_address_t)(newMappedAddr+segmentInfo.vmAddr-textSegVmAddr));
341 if ( r != KERN_SUCCESS ) {
342 diag.error("vm_copy() failure");
343 stop = true;
344 }
345 }
346 });
347 if ( diag.noError() ) {
348 // remove original mapping and return new mapping
349 fileSystem.unloadFile(info);
350
351 // Set vm_deallocate as the unload method.
352 info.unload = [](const closure::LoadedFileInfo& info) {
353 ::vm_deallocate(mach_task_self(), (vm_address_t)info.fileContent, (size_t)info.fileContentLen);
354 };
355
356 // And update the file content to the new location
357 info.fileContent = (const void*)newMappedAddr;
358 info.fileContentLen = vmSpaceRequired;
359 return (const MachOAnalyzer*)info.fileContent;
360 }
361 else {
362 // new mapping failed, return old mapping with an error in diag
363 ::vm_deallocate(mach_task_self(), newMappedAddr, (size_t)vmSpaceRequired);
364 return nullptr;
365 }
366 }
367
368 return this;
369 }
370
371 bool MachOAnalyzer::enforceFormat(Malformed kind) const
372 {
373 #if TARGET_OS_OSX
374 __block bool result = false;
375 forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) {
376 if ( platform == Platform::macOS ) {
377 switch (kind) {
378 case Malformed::linkeditOrder:
379 case Malformed::linkeditAlignment:
380 case Malformed::dyldInfoAndlocalRelocs:
381 // enforce these checks on new binaries only
382 result = (sdk >= 0x000A0E00); // macOS 10.14
383 }
384 }
385 });
386 // if binary is so old, there is no platform info, don't enforce malformed errors
387 return result;
388 #else
389 return true;
390 #endif
391 }
392
393 bool MachOAnalyzer::validEmbeddedPaths(Diagnostics& diag, const char* path) const
394 {
395 __block int index = 1;
396 __block bool allGood = true;
397 __block bool foundInstallName = false;
398 __block int dependentsCount = 0;
399 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
400 const dylib_command* dylibCmd;
401 const rpath_command* rpathCmd;
402 switch ( cmd->cmd ) {
403 case LC_ID_DYLIB:
404 foundInstallName = true;
405 // fall through
406 case LC_LOAD_DYLIB:
407 case LC_LOAD_WEAK_DYLIB:
408 case LC_REEXPORT_DYLIB:
409 case LC_LOAD_UPWARD_DYLIB:
410 dylibCmd = (dylib_command*)cmd;
411 if ( dylibCmd->dylib.name.offset > cmd->cmdsize ) {
412 diag.error("in '%s' load command #%d name offset (%u) outside its size (%u)", path, index, dylibCmd->dylib.name.offset, cmd->cmdsize);
413 stop = true;
414 allGood = false;
415 }
416 else {
417 bool foundEnd = false;
418 const char* start = (char*)dylibCmd + dylibCmd->dylib.name.offset;
419 const char* end = (char*)dylibCmd + cmd->cmdsize;
420 for (const char* s=start; s < end; ++s) {
421 if ( *s == '\0' ) {
422 foundEnd = true;
423 break;
424 }
425 }
426 if ( !foundEnd ) {
427 diag.error("in '%s' load command #%d string extends beyond end of load command", path, index);
428 stop = true;
429 allGood = false;
430 }
431 }
432 if ( cmd->cmd != LC_ID_DYLIB )
433 ++dependentsCount;
434 break;
435 case LC_RPATH:
436 rpathCmd = (rpath_command*)cmd;
437 if ( rpathCmd->path.offset > cmd->cmdsize ) {
438 diag.error("in '%s' load command #%d path offset (%u) outside its size (%u)", path, index, rpathCmd->path.offset, cmd->cmdsize);
439 stop = true;
440 allGood = false;
441 }
442 else {
443 bool foundEnd = false;
444 const char* start = (char*)rpathCmd + rpathCmd->path.offset;
445 const char* end = (char*)rpathCmd + cmd->cmdsize;
446 for (const char* s=start; s < end; ++s) {
447 if ( *s == '\0' ) {
448 foundEnd = true;
449 break;
450 }
451 }
452 if ( !foundEnd ) {
453 diag.error("in '%s' load command #%d string extends beyond end of load command", path, index);
454 stop = true;
455 allGood = false;
456 }
457 }
458 break;
459 }
460 ++index;
461 });
462 if ( !allGood )
463 return false;
464
465 if ( this->filetype == MH_DYLIB ) {
466 if ( !foundInstallName ) {
467 diag.error("in '%s' MH_DYLIB is missing LC_ID_DYLIB", path);
468 return false;
469 }
470 }
471 else {
472 if ( foundInstallName ) {
473 diag.error("in '%s' LC_ID_DYLIB found in non-MH_DYLIB", path);
474 return false;
475 }
476 }
477
478 if ( (dependentsCount == 0) && (this->filetype == MH_EXECUTE) ) {
479 diag.error("in '%s' missing LC_LOAD_DYLIB (must link with at least libSystem.dylib)", path);
480 return false;
481 }
482
483 return true;
484 }
485
486 bool MachOAnalyzer::validSegments(Diagnostics& diag, const char* path, size_t fileLen) const
487 {
488 // check segment load command size
489 __block bool badSegmentLoadCommand = false;
490 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
491 if ( cmd->cmd == LC_SEGMENT_64 ) {
492 const segment_command_64* seg = (segment_command_64*)cmd;
493 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command_64);
494 if ( sectionsSpace < 0 ) {
495 diag.error("in '%s' load command size too small for LC_SEGMENT_64", path);
496 badSegmentLoadCommand = true;
497 stop = true;
498 }
499 else if ( (sectionsSpace % sizeof(section_64)) != 0 ) {
500 diag.error("in '%s' segment load command size 0x%X will not fit whole number of sections", path, cmd->cmdsize);
501 badSegmentLoadCommand = true;
502 stop = true;
503 }
504 else if ( sectionsSpace != (seg->nsects * sizeof(section_64)) ) {
505 diag.error("in '%s' load command size 0x%X does not match nsects %d", path, cmd->cmdsize, seg->nsects);
506 badSegmentLoadCommand = true;
507 stop = true;
508 }
509 else if ( greaterThanAddOrOverflow(seg->fileoff, seg->filesize, fileLen) ) {
510 diag.error("in '%s' segment load command content extends beyond end of file", path);
511 badSegmentLoadCommand = true;
512 stop = true;
513 }
514 else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
515 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
516 diag.error("in '%s' segment filesize exceeds vmsize", path);
517 badSegmentLoadCommand = true;
518 stop = true;
519 }
520 }
521 else if ( cmd->cmd == LC_SEGMENT ) {
522 const segment_command* seg = (segment_command*)cmd;
523 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command);
524 if ( sectionsSpace < 0 ) {
525 diag.error("in '%s' load command size too small for LC_SEGMENT", path);
526 badSegmentLoadCommand = true;
527 stop = true;
528 }
529 else if ( (sectionsSpace % sizeof(section)) != 0 ) {
530 diag.error("in '%s' segment load command size 0x%X will not fit whole number of sections", path, cmd->cmdsize);
531 badSegmentLoadCommand = true;
532 stop = true;
533 }
534 else if ( sectionsSpace != (seg->nsects * sizeof(section)) ) {
535 diag.error("in '%s' load command size 0x%X does not match nsects %d", path, cmd->cmdsize, seg->nsects);
536 badSegmentLoadCommand = true;
537 stop = true;
538 }
539 else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
540 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
541 diag.error("in '%s' segment filesize exceeds vmsize", path);
542 badSegmentLoadCommand = true;
543 stop = true;
544 }
545 }
546 });
547 if ( badSegmentLoadCommand )
548 return false;
549
550 // check mapping permissions of segments
551 __block bool badPermissions = false;
552 __block bool badSize = false;
553 __block bool hasTEXT = false;
554 __block bool hasLINKEDIT = false;
555 forEachSegment(^(const SegmentInfo& info, bool& stop) {
556 if ( strcmp(info.segName, "__TEXT") == 0 ) {
557 if ( info.protections != (VM_PROT_READ|VM_PROT_EXECUTE) ) {
558 diag.error("in '%s' __TEXT segment permissions is not 'r-x'", path);
559 badPermissions = true;
560 stop = true;
561 }
562 hasTEXT = true;
563 }
564 else if ( strcmp(info.segName, "__LINKEDIT") == 0 ) {
565 if ( info.protections != VM_PROT_READ ) {
566 diag.error("in '%s' __LINKEDIT segment permissions is not 'r--'", path);
567 badPermissions = true;
568 stop = true;
569 }
570 hasLINKEDIT = true;
571 }
572 else if ( (info.protections & 0xFFFFFFF8) != 0 ) {
573 diag.error("in '%s' %s segment permissions has invalid bits set", path, info.segName);
574 badPermissions = true;
575 stop = true;
576 }
577 if ( greaterThanAddOrOverflow(info.fileOffset, info.fileSize, fileLen) ) {
578 diag.error("in '%s' %s segment content extends beyond end of file", path, info.segName);
579 badSize = true;
580 stop = true;
581 }
582 if ( is64() ) {
583 if ( info.vmAddr+info.vmSize < info.vmAddr ) {
584 diag.error("in '%s' %s segment vm range wraps", path, info.segName);
585 badSize = true;
586 stop = true;
587 }
588 }
589 else {
590 if ( (uint32_t)(info.vmAddr+info.vmSize) < (uint32_t)(info.vmAddr) ) {
591 diag.error("in '%s' %s segment vm range wraps", path, info.segName);
592 badSize = true;
593 stop = true;
594 }
595 }
596 });
597 if ( badPermissions || badSize )
598 return false;
599 if ( !hasTEXT ) {
600 diag.error("in '%s' missing __TEXT segment", path);
601 return false;
602 }
603 if ( !hasLINKEDIT ) {
604 diag.error("in '%s' missing __LINKEDIT segment", path);
605 return false;
606 }
607
608 // check for overlapping segments
609 __block bool badSegments = false;
610 forEachSegment(^(const SegmentInfo& info1, bool& stop1) {
611 uint64_t seg1vmEnd = info1.vmAddr + info1.vmSize;
612 uint64_t seg1FileEnd = info1.fileOffset + info1.fileSize;
613 forEachSegment(^(const SegmentInfo& info2, bool& stop2) {
614 if ( info1.segIndex == info2.segIndex )
615 return;
616 uint64_t seg2vmEnd = info2.vmAddr + info2.vmSize;
617 uint64_t seg2FileEnd = info2.fileOffset + info2.fileSize;
618 if ( ((info2.vmAddr <= info1.vmAddr) && (seg2vmEnd > info1.vmAddr) && (seg1vmEnd > info1.vmAddr )) || ((info2.vmAddr >= info1.vmAddr ) && (info2.vmAddr < seg1vmEnd) && (seg2vmEnd > info2.vmAddr)) ) {
619 diag.error("in '%s' segment %s vm range overlaps segment %s", path, info1.segName, info2.segName);
620 badSegments = true;
621 stop1 = true;
622 stop2 = true;
623 }
624 if ( ((info2.fileOffset <= info1.fileOffset) && (seg2FileEnd > info1.fileOffset) && (seg1FileEnd > info1.fileOffset)) || ((info2.fileOffset >= info1.fileOffset) && (info2.fileOffset < seg1FileEnd) && (seg2FileEnd > info2.fileOffset )) ) {
625 diag.error("in '%s' segment %s file content overlaps segment %s", path, info1.segName, info2.segName);
626 badSegments = true;
627 stop1 = true;
628 stop2 = true;
629 }
630 if ( (info1.segIndex < info2.segIndex) && !stop1 ) {
631 if ( (info1.vmAddr > info2.vmAddr) || ((info1.fileOffset > info2.fileOffset ) && (info1.fileOffset != 0) && (info2.fileOffset != 0)) ){
632 if ( !inDyldCache() ) {
633 // dyld cache __DATA_* segments are moved around
634 diag.error("in '%s' segment load commands out of order with respect to layout for %s and %s", path, info1.segName, info2.segName);
635 badSegments = true;
636 stop1 = true;
637 stop2 = true;
638 }
639 }
640 }
641 });
642 });
643 if ( badSegments )
644 return false;
645
646 // check sections are within segment
647 __block bool badSections = false;
648 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
649 if ( cmd->cmd == LC_SEGMENT_64 ) {
650 const segment_command_64* seg = (segment_command_64*)cmd;
651 const section_64* const sectionsStart = (section_64*)((char*)seg + sizeof(struct segment_command_64));
652 const section_64* const sectionsEnd = &sectionsStart[seg->nsects];
653 for (const section_64* sect=sectionsStart; (sect < sectionsEnd); ++sect) {
654 if ( (int64_t)(sect->size) < 0 ) {
655 diag.error("in '%s' section %s size too large 0x%llX", path, sect->sectname, sect->size);
656 badSections = true;
657 }
658 else if ( sect->addr < seg->vmaddr ) {
659 diag.error("in '%s' section %s start address 0x%llX is before containing segment's address 0x%0llX", path, sect->sectname, sect->addr, seg->vmaddr);
660 badSections = true;
661 }
662 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
663 diag.error("in '%s' section %s end address 0x%llX is beyond containing segment's end address 0x%0llX", path, sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
664 badSections = true;
665 }
666 }
667 }
668 else if ( cmd->cmd == LC_SEGMENT ) {
669 const segment_command* seg = (segment_command*)cmd;
670 const section* const sectionsStart = (section*)((char*)seg + sizeof(struct segment_command));
671 const section* const sectionsEnd = &sectionsStart[seg->nsects];
672 for (const section* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) {
673 if ( (int64_t)(sect->size) < 0 ) {
674 diag.error("in '%s' section %s size too large 0x%X", path, sect->sectname, sect->size);
675 badSections = true;
676 }
677 else if ( sect->addr < seg->vmaddr ) {
678 diag.error("in '%s' section %s start address 0x%X is before containing segment's address 0x%0X", path, sect->sectname, sect->addr, seg->vmaddr);
679 badSections = true;
680 }
681 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
682 diag.error("in '%s' section %s end address 0x%X is beyond containing segment's end address 0x%0X", path, sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
683 badSections = true;
684 }
685 }
686 }
687 });
688
689 return !badSections;
690 }
691
692
693 bool MachOAnalyzer::validMain(Diagnostics& diag, const char* path) const
694 {
695 __block uint64_t textSegStartAddr = 0;
696 __block uint64_t textSegStartSize = 0;
697 forEachSegment(^(const SegmentInfo& info, bool& stop) {
698 if ( strcmp(info.segName, "__TEXT") == 0 ) {
699 textSegStartAddr = info.vmAddr;
700 textSegStartSize = info.vmSize;
701 stop = true;
702 }
703 });
704
705 __block int mainCount = 0;
706 __block int threadCount = 0;
707 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
708 entry_point_command* mainCmd;
709 uint64_t startAddress;
710 switch (cmd->cmd) {
711 case LC_MAIN:
712 ++mainCount;
713 mainCmd = (entry_point_command*)cmd;
714 if ( mainCmd->entryoff > textSegStartSize ) {
715 diag.error("LC_MAIN points outside of __TEXT segment");
716 stop = true;
717 }
718 break;
719 case LC_UNIXTHREAD:
720 ++threadCount;
721 startAddress = entryAddrFromThreadCmd((thread_command*)cmd);
722 if ( startAddress == 0 ) {
723 diag.error("LC_UNIXTHREAD not valid for arch %s", archName());
724 stop = true;
725 }
726 else if ( (startAddress < textSegStartAddr) || (startAddress > textSegStartAddr+textSegStartSize) ) {
727 diag.error("LC_UNIXTHREAD entry not in __TEXT segment");
728 stop = true;
729 }
730 break;
731 }
732 });
733 if ( diag.hasError() )
734 return false;
735 if ( diag.noError() && (mainCount+threadCount == 1) )
736 return true;
737
738 if ( mainCount + threadCount == 0 )
739 diag.error("missing LC_MAIN or LC_UNIXTHREAD");
740 else
741 diag.error("only one LC_MAIN or LC_UNIXTHREAD is allowed");
742 return false;
743 }
744
745
746 namespace {
747 struct LinkEditContentChunk
748 {
749 const char* name;
750 uint32_t stdOrder;
751 uint32_t fileOffsetStart;
752 uint32_t size;
753
754 static int compareByFileOffset(const void* l, const void* r) {
755 if ( ((LinkEditContentChunk*)l)->fileOffsetStart < ((LinkEditContentChunk*)r)->fileOffsetStart )
756 return -1;
757 else
758 return 1;
759 }
760 static int compareByStandardOrder(const void* l, const void* r) {
761 if ( ((LinkEditContentChunk*)l)->stdOrder < ((LinkEditContentChunk*)r)->stdOrder )
762 return -1;
763 else
764 return 1;
765 }
766 };
767 } // anonymous namespace
768
769
770
771 bool MachOAnalyzer::validLinkeditLayout(Diagnostics& diag, const char* path) const
772 {
773 LinkEditInfo leInfo;
774 getLinkEditPointers(diag, leInfo);
775 if ( diag.hasError() )
776 return false;
777 const uint32_t ptrSize = pointerSize();
778
779 // build vector of all blobs in LINKEDIT
780 LinkEditContentChunk blobs[32];
781 LinkEditContentChunk* bp = blobs;
782 if ( leInfo.dyldInfo != nullptr ) {
783 if ( leInfo.dyldInfo->rebase_size != 0 )
784 *bp++ = {"rebase opcodes", 1, leInfo.dyldInfo->rebase_off, leInfo.dyldInfo->rebase_size};
785 if ( leInfo.dyldInfo->bind_size != 0 )
786 *bp++ = {"bind opcodes", 2, leInfo.dyldInfo->bind_off, leInfo.dyldInfo->bind_size};
787 if ( leInfo.dyldInfo->weak_bind_size != 0 )
788 *bp++ = {"weak bind opcodes", 3, leInfo.dyldInfo->weak_bind_off, leInfo.dyldInfo->weak_bind_size};
789 if ( leInfo.dyldInfo->lazy_bind_size != 0 )
790 *bp++ = {"lazy bind opcodes", 4, leInfo.dyldInfo->lazy_bind_off, leInfo.dyldInfo->lazy_bind_size};
791 if ( leInfo.dyldInfo->export_size!= 0 )
792 *bp++ = {"exports trie", 5, leInfo.dyldInfo->export_off, leInfo.dyldInfo->export_size};
793 }
794 if ( leInfo.dynSymTab != nullptr ) {
795 if ( leInfo.dynSymTab->nlocrel != 0 )
796 *bp++ = {"local relocations", 6, leInfo.dynSymTab->locreloff, static_cast<uint32_t>(leInfo.dynSymTab->nlocrel*sizeof(relocation_info))};
797 if ( leInfo.dynSymTab->nextrel != 0 )
798 *bp++ = {"external relocations", 11, leInfo.dynSymTab->extreloff, static_cast<uint32_t>(leInfo.dynSymTab->nextrel*sizeof(relocation_info))};
799 if ( leInfo.dynSymTab->nindirectsyms != 0 )
800 *bp++ = {"indirect symbol table", 12, leInfo.dynSymTab->indirectsymoff, leInfo.dynSymTab->nindirectsyms*4};
801 }
802 if ( leInfo.splitSegInfo != nullptr ) {
803 if ( leInfo.splitSegInfo->datasize != 0 )
804 *bp++ = {"shared cache info", 6, leInfo.splitSegInfo->dataoff, leInfo.splitSegInfo->datasize};
805 }
806 if ( leInfo.functionStarts != nullptr ) {
807 if ( leInfo.functionStarts->datasize != 0 )
808 *bp++ = {"function starts", 7, leInfo.functionStarts->dataoff, leInfo.functionStarts->datasize};
809 }
810 if ( leInfo.dataInCode != nullptr ) {
811 if ( leInfo.dataInCode->datasize != 0 )
812 *bp++ = {"data in code", 8, leInfo.dataInCode->dataoff, leInfo.dataInCode->datasize};
813 }
814 if ( leInfo.symTab != nullptr ) {
815 if ( leInfo.symTab->nsyms != 0 )
816 *bp++ = {"symbol table", 10, leInfo.symTab->symoff, static_cast<uint32_t>(leInfo.symTab->nsyms*(ptrSize == 8 ? sizeof(nlist_64) : sizeof(struct nlist)))};
817 if ( leInfo.symTab->strsize != 0 )
818 *bp++ = {"symbol table strings", 20, leInfo.symTab->stroff, leInfo.symTab->strsize};
819 }
820 if ( leInfo.codeSig != nullptr ) {
821 if ( leInfo.codeSig->datasize != 0 )
822 *bp++ = {"code signature", 21, leInfo.codeSig->dataoff, leInfo.codeSig->datasize};
823 }
824
825 // check for bad combinations
826 if ( (leInfo.dyldInfo != nullptr) && (leInfo.dyldInfo->cmd == LC_DYLD_INFO_ONLY) && (leInfo.dynSymTab != nullptr) ) {
827 if ( (leInfo.dynSymTab->nlocrel != 0) && enforceFormat(Malformed::dyldInfoAndlocalRelocs) ) {
828 diag.error("in '%s' malformed mach-o contains LC_DYLD_INFO_ONLY and local relocations", path);
829 return false;
830 }
831 if ( leInfo.dynSymTab->nextrel != 0 ) {
832 diag.error("in '%s' malformed mach-o contains LC_DYLD_INFO_ONLY and external relocations", path);
833 return false;
834 }
835 }
836 if ( (leInfo.dyldInfo == nullptr) && (leInfo.dynSymTab == nullptr) ) {
837 diag.error("in '%s' malformed mach-o misssing LC_DYLD_INFO and LC_DYSYMTAB", path);
838 return false;
839 }
840 const unsigned long blobCount = bp - blobs;
841 if ( blobCount == 0 ) {
842 diag.error("in '%s' malformed mach-o misssing LINKEDIT", path);
843 return false;
844 }
845
846 uint32_t linkeditFileEnd = leInfo.layout.linkeditFileOffset + leInfo.layout.linkeditFileSize;
847
848
849 // sort blobs by file-offset and error on overlaps
850 ::qsort(blobs, blobCount, sizeof(LinkEditContentChunk), &LinkEditContentChunk::compareByFileOffset);
851 uint32_t prevEnd = leInfo.layout.linkeditFileOffset;
852 const char* prevName = "start of LINKEDIT";
853 for (unsigned long i=0; i < blobCount; ++i) {
854 const LinkEditContentChunk& blob = blobs[i];
855 if ( blob.fileOffsetStart < prevEnd ) {
856 diag.error("in '%s' LINKEDIT overlap of %s and %s", path, prevName, blob.name);
857 return false;
858 }
859 if (greaterThanAddOrOverflow(blob.fileOffsetStart, blob.size, linkeditFileEnd)) {
860 diag.error("in '%s' LINKEDIT content '%s' extends beyond end of segment", path, blob.name);
861 return false;
862 }
863 prevEnd = blob.fileOffsetStart + blob.size;
864 prevName = blob.name;
865 }
866
867 // sort vector by order and warn on non standard order or mis-alignment
868 ::qsort(blobs, blobCount, sizeof(LinkEditContentChunk), &LinkEditContentChunk::compareByStandardOrder);
869 prevEnd = leInfo.layout.linkeditFileOffset;
870 for (unsigned long i=0; i < blobCount; ++i) {
871 const LinkEditContentChunk& blob = blobs[i];
872 if ( ((blob.fileOffsetStart & (ptrSize-1)) != 0) && (blob.stdOrder != 20) && enforceFormat(Malformed::linkeditAlignment) ) // ok for "symbol table strings" to be mis-aligned
873 diag.error("in '%s' mis-aligned LINKEDIT content '%s'", path, blob.name);
874 if ( (blob.fileOffsetStart < prevEnd) && enforceFormat(Malformed::linkeditOrder) ) {
875 diag.error("in '%s' LINKEDIT out of order %s", path, blob.name);
876 }
877 prevEnd = blob.fileOffsetStart;
878 }
879
880 // Check for invalid symbol table sizes
881 if ( leInfo.symTab != nullptr ) {
882 if ( leInfo.symTab->nsyms > 0x10000000 ) {
883 diag.error("in '%s' malformed mach-o image: symbol table too large", path);
884 return false;
885 }
886 if ( leInfo.dynSymTab != nullptr ) {
887 // validate indirect symbol table
888 if ( leInfo.dynSymTab->nindirectsyms != 0 ) {
889 if ( leInfo.dynSymTab->nindirectsyms > 0x10000000 ) {
890 diag.error("in '%s' malformed mach-o image: indirect symbol table too large", path);
891 return false;
892 }
893 }
894 if ( (leInfo.dynSymTab->nlocalsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->ilocalsym > leInfo.symTab->nsyms) ) {
895 diag.error("in '%s' malformed mach-o image: indirect symbol table local symbol count exceeds total symbols", path);
896 return false;
897 }
898 if ( leInfo.dynSymTab->ilocalsym + leInfo.dynSymTab->nlocalsym < leInfo.dynSymTab->ilocalsym ) {
899 diag.error("in '%s' malformed mach-o image: indirect symbol table local symbol count wraps", path);
900 return false;
901 }
902 if ( (leInfo.dynSymTab->nextdefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iextdefsym > leInfo.symTab->nsyms) ) {
903 diag.error("in '%s' malformed mach-o image: indirect symbol table extern symbol count exceeds total symbols", path);
904 return false;
905 }
906 if ( leInfo.dynSymTab->iextdefsym + leInfo.dynSymTab->nextdefsym < leInfo.dynSymTab->iextdefsym ) {
907 diag.error("in '%s' malformed mach-o image: indirect symbol table extern symbol count wraps", path);
908 return false;
909 }
910 if ( (leInfo.dynSymTab->nundefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iundefsym > leInfo.symTab->nsyms) ) {
911 diag.error("in '%s' malformed mach-o image: indirect symbol table undefined symbol count exceeds total symbols", path);
912 return false;
913 }
914 if ( leInfo.dynSymTab->iundefsym + leInfo.dynSymTab->nundefsym < leInfo.dynSymTab->iundefsym ) {
915 diag.error("in '%s' malformed mach-o image: indirect symbol table undefined symbol count wraps", path);
916 return false;
917 }
918 }
919 }
920
921 return true;
922 }
923
924
925
926 bool MachOAnalyzer::invalidRebaseState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
927 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type) const
928 {
929 if ( !segIndexSet ) {
930 diag.error("in '%s' %s missing preceding REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path, opcodeName);
931 return true;
932 }
933 if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
934 diag.error("in '%s' %s segment index %d too large", path, opcodeName, segmentIndex);
935 return true;
936 }
937 if ( segmentOffset > (segments[segmentIndex].vmSize-ptrSize) ) {
938 diag.error("in '%s' %s current segment offset 0x%08llX beyond segment size (0x%08llX)", path, opcodeName, segmentOffset, segments[segmentIndex].vmSize);
939 return true;
940 }
941 switch ( type ) {
942 case REBASE_TYPE_POINTER:
943 if ( !segments[segmentIndex].writable() ) {
944 diag.error("in '%s' %s pointer rebase is in non-writable segment", path, opcodeName);
945 return true;
946 }
947 if ( segments[segmentIndex].executable() ) {
948 diag.error("in '%s' %s pointer rebase is in executable segment", path, opcodeName);
949 return true;
950 }
951 break;
952 case REBASE_TYPE_TEXT_ABSOLUTE32:
953 case REBASE_TYPE_TEXT_PCREL32:
954 if ( !segments[segmentIndex].textRelocs ) {
955 diag.error("in '%s' %s text rebase is in segment that does not support text relocations", path, opcodeName);
956 return true;
957 }
958 if ( segments[segmentIndex].writable() ) {
959 diag.error("in '%s' %s text rebase is in writable segment", path, opcodeName);
960 return true;
961 }
962 if ( !segments[segmentIndex].executable() ) {
963 diag.error("in '%s' %s pointer rebase is in non-executable segment", path, opcodeName);
964 return true;
965 }
966 break;
967 default:
968 diag.error("in '%s' %s unknown rebase type %d", path, opcodeName, type);
969 return true;
970 }
971 return false;
972 }
973
974
975 void MachOAnalyzer::getAllSegmentsInfos(Diagnostics& diag, SegmentInfo segments[]) const
976 {
977 forEachSegment(^(const SegmentInfo& info, bool& stop) {
978 segments[info.segIndex] = info;
979 });
980 }
981
982
983 bool MachOAnalyzer::validRebaseInfo(Diagnostics& diag, const char* path) const
984 {
985 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
986 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
987 if ( invalidRebaseState(diag, opcodeName, path, leInfo, segments, segIndexSet, ptrSize, segmentIndex, segmentOffset, type) )
988 stop = true;
989 });
990 return diag.noError();
991 }
992
993
994 void MachOAnalyzer::forEachTextRebase(Diagnostics& diag, void (^handler)(uint64_t runtimeOffset, bool& stop)) const
995 {
996 __block bool startVmAddrSet = false;
997 __block uint64_t startVmAddr = 0;
998 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
999 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1000 if ( type != REBASE_TYPE_TEXT_ABSOLUTE32 )
1001 return;
1002 if ( !startVmAddrSet ) {
1003 for (int i=0; i <= segmentIndex; ++i) {
1004 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1005 startVmAddr = segments[i].vmAddr;
1006 startVmAddrSet = true;
1007 break;
1008 }
1009 }
1010 }
1011 uint64_t rebaseVmAddr = segments[segmentIndex].vmAddr + segmentOffset;
1012 uint64_t runtimeOffset = rebaseVmAddr - startVmAddr;
1013 handler(runtimeOffset, stop);
1014 });
1015 }
1016
1017
1018 void MachOAnalyzer::forEachRebase(Diagnostics& diag, bool ignoreLazyPointers, void (^handler)(uint64_t runtimeOffset, bool& stop)) const
1019 {
1020 __block bool startVmAddrSet = false;
1021 __block uint64_t startVmAddr = 0;
1022 __block uint64_t lpVmAddr = 0;
1023 __block uint64_t lpEndVmAddr = 0;
1024 __block uint64_t shVmAddr = 0;
1025 __block uint64_t shEndVmAddr = 0;
1026 if ( ignoreLazyPointers ) {
1027 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
1028 if ( (info.sectFlags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS ) {
1029 lpVmAddr = info.sectAddr;
1030 lpEndVmAddr = info.sectAddr + info.sectSize;
1031 }
1032 else if ( (info.sectFlags & S_ATTR_PURE_INSTRUCTIONS) && (strcmp(info.sectName, "__stub_helper") == 0) ) {
1033 shVmAddr = info.sectAddr;
1034 shEndVmAddr = info.sectAddr + info.sectSize;
1035 }
1036 });
1037 }
1038 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1039 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1040 if ( type != REBASE_TYPE_POINTER )
1041 return;
1042 if ( !startVmAddrSet ) {
1043 for (int i=0; i < segmentIndex; ++i) {
1044 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1045 startVmAddr = segments[i].vmAddr;
1046 startVmAddrSet = true;
1047 break;
1048 }
1049 }
1050 }
1051 uint64_t rebaseVmAddr = segments[segmentIndex].vmAddr + segmentOffset;
1052 bool skipRebase = false;
1053 if ( (rebaseVmAddr >= lpVmAddr) && (rebaseVmAddr < lpEndVmAddr) ) {
1054 // rebase is in lazy pointer section
1055 uint64_t lpValue = 0;
1056 if ( ptrSize == 8 )
1057 lpValue = *((uint64_t*)(rebaseVmAddr-startVmAddr+(uint8_t*)this));
1058 else
1059 lpValue = *((uint32_t*)(rebaseVmAddr-startVmAddr+(uint8_t*)this));
1060 if ( (lpValue >= shVmAddr) && (lpValue < shEndVmAddr) ) {
1061 // content is into stub_helper section
1062 uint64_t lpTargetImageOffset = lpValue - startVmAddr;
1063 const uint8_t* helperContent = (uint8_t*)this + lpTargetImageOffset;
1064 bool isLazyStub = contentIsRegularStub(helperContent);
1065 // ignore rebases for normal lazy pointers, but leave rebase for resolver helper stub
1066 if ( isLazyStub )
1067 skipRebase = true;
1068 }
1069 else {
1070 // if lazy pointer does not point into stub_helper, then it points to weak-def symbol and we need rebase
1071 }
1072 }
1073 if ( !skipRebase ) {
1074 uint64_t runtimeOffset = rebaseVmAddr - startVmAddr;
1075 handler(runtimeOffset, stop);
1076 }
1077 });
1078 }
1079
1080
1081 bool MachOAnalyzer::contentIsRegularStub(const uint8_t* helperContent) const
1082 {
1083 switch (this->cputype) {
1084 case CPU_TYPE_X86_64:
1085 return ( (helperContent[0] == 0x68) && (helperContent[5] == 0xE9) ); // push $xxx / JMP pcRel
1086 case CPU_TYPE_I386:
1087 return ( (helperContent[0] == 0x68) && (helperContent[5] == 0xFF) && (helperContent[2] == 0x26) ); // push $xxx / JMP *pcRel
1088 case CPU_TYPE_ARM:
1089 return ( (helperContent[0] == 0x00) && (helperContent[1] == 0xC0) && (helperContent[2] == 0x9F) && (helperContent[3] == 0xE5) ); // ldr ip, [pc, #0]
1090 case CPU_TYPE_ARM64:
1091 return ( (helperContent[0] == 0x50) && (helperContent[1] == 0x00) && (helperContent[2] == 0x00) && (helperContent[3] == 0x18) ); // ldr w16, L0
1092
1093 }
1094 return false;
1095 }
1096
1097 static int uint32Sorter(const void* l, const void* r) {
1098 if ( *((uint32_t*)l) < *((uint32_t*)r) )
1099 return -1;
1100 else
1101 return 1;
1102 }
1103
1104
1105 void MachOAnalyzer::forEachRebase(Diagnostics& diag,
1106 void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1107 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1108 uint8_t type, bool& stop)) const
1109 {
1110 LinkEditInfo leInfo;
1111 getLinkEditPointers(diag, leInfo);
1112 if ( diag.hasError() )
1113 return;
1114
1115 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1116 getAllSegmentsInfos(diag, segmentsInfo);
1117 if ( diag.hasError() )
1118 return;
1119
1120 if ( leInfo.dyldInfo != nullptr ) {
1121 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->rebase_off);
1122 const uint8_t* end = p + leInfo.dyldInfo->rebase_size;
1123 const uint32_t ptrSize = pointerSize();
1124 uint8_t type = 0;
1125 int segIndex = 0;
1126 uint64_t segOffset = 0;
1127 uint64_t count;
1128 uint64_t skip;
1129 bool segIndexSet = false;
1130 bool stop = false;
1131 while ( !stop && diag.noError() && (p < end) ) {
1132 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
1133 uint8_t opcode = *p & REBASE_OPCODE_MASK;
1134 ++p;
1135 switch (opcode) {
1136 case REBASE_OPCODE_DONE:
1137 stop = true;
1138 break;
1139 case REBASE_OPCODE_SET_TYPE_IMM:
1140 type = immediate;
1141 break;
1142 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1143 segIndex = immediate;
1144 segOffset = read_uleb128(diag, p, end);
1145 segIndexSet = true;
1146 break;
1147 case REBASE_OPCODE_ADD_ADDR_ULEB:
1148 segOffset += read_uleb128(diag, p, end);
1149 break;
1150 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
1151 segOffset += immediate*ptrSize;
1152 break;
1153 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
1154 for (int i=0; i < immediate; ++i) {
1155 handler("REBASE_OPCODE_DO_REBASE_IMM_TIMES", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1156 segOffset += ptrSize;
1157 if ( stop )
1158 break;
1159 }
1160 break;
1161 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
1162 count = read_uleb128(diag, p, end);
1163 for (uint32_t i=0; i < count; ++i) {
1164 handler("REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1165 segOffset += ptrSize;
1166 if ( stop )
1167 break;
1168 }
1169 break;
1170 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
1171 handler("REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1172 segOffset += read_uleb128(diag, p, end) + ptrSize;
1173 break;
1174 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
1175 count = read_uleb128(diag, p, end);
1176 if ( diag.hasError() )
1177 break;
1178 skip = read_uleb128(diag, p, end);
1179 for (uint32_t i=0; i < count; ++i) {
1180 handler("REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1181 segOffset += skip + ptrSize;
1182 if ( stop )
1183 break;
1184 }
1185 break;
1186 default:
1187 diag.error("unknown rebase opcode 0x%02X", opcode);
1188 }
1189 }
1190 }
1191 else {
1192 // old binary, walk relocations
1193 const uint64_t relocsStartAddress = relocBaseAddress(segmentsInfo, leInfo.layout.linkeditSegIndex);
1194 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->locreloff);
1195 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nlocrel];
1196 bool stop = false;
1197 const uint8_t relocSize = (is64() ? 3 : 2);
1198 const uint8_t ptrSize = pointerSize();
1199 STACK_ALLOC_OVERFLOW_SAFE_ARRAY(uint32_t, relocAddrs, 2048);
1200 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
1201 if ( reloc->r_length != relocSize ) {
1202 diag.error("local relocation has wrong r_length");
1203 break;
1204 }
1205 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
1206 diag.error("local relocation has wrong r_type");
1207 break;
1208 }
1209 relocAddrs.push_back(reloc->r_address);
1210 }
1211 if ( !relocAddrs.empty() ) {
1212 ::qsort(&relocAddrs[0], relocAddrs.count(), sizeof(uint32_t), &uint32Sorter);
1213 for (uint32_t addrOff : relocAddrs) {
1214 uint32_t segIndex = 0;
1215 uint64_t segOffset = 0;
1216 if ( segIndexAndOffsetForAddress(relocsStartAddress+addrOff, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1217 uint8_t type = REBASE_TYPE_POINTER;
1218 if ( this->cputype == CPU_TYPE_I386 ) {
1219 if ( segmentsInfo[segIndex].executable() )
1220 type = REBASE_TYPE_TEXT_ABSOLUTE32;
1221 }
1222 handler("local relocation", leInfo, segmentsInfo, true, ptrSize, segIndex, segOffset, type , stop);
1223 }
1224 else {
1225 diag.error("local relocation has out of range r_address");
1226 break;
1227 }
1228 }
1229 }
1230 // then process indirect symbols
1231 forEachIndirectPointer(diag, ^(uint64_t address, bool bind, int bindLibOrdinal,
1232 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
1233 if ( bind )
1234 return;
1235 uint32_t segIndex = 0;
1236 uint64_t segOffset = 0;
1237 if ( segIndexAndOffsetForAddress(address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1238 handler("local relocation", leInfo, segmentsInfo, true, ptrSize, segIndex, segOffset, REBASE_TYPE_POINTER, indStop);
1239 }
1240 else {
1241 diag.error("local relocation has out of range r_address");
1242 indStop = true;
1243 }
1244 });
1245 }
1246 }
1247
1248 bool MachOAnalyzer::segIndexAndOffsetForAddress(uint64_t addr, const SegmentInfo segmentsInfos[], uint32_t segCount, uint32_t& segIndex, uint64_t& segOffset) const
1249 {
1250 for (uint32_t i=0; i < segCount; ++i) {
1251 if ( (segmentsInfos[i].vmAddr <= addr) && (addr < segmentsInfos[i].vmAddr+segmentsInfos[i].vmSize) ) {
1252 segIndex = i;
1253 segOffset = addr - segmentsInfos[i].vmAddr;
1254 return true;
1255 }
1256 }
1257 return false;
1258 }
1259
1260 uint64_t MachOAnalyzer::relocBaseAddress(const SegmentInfo segmentsInfos[], uint32_t segCount) const
1261 {
1262 if ( is64() ) {
1263 // x86_64 reloc base address is first writable segment
1264 for (uint32_t i=0; i < segCount; ++i) {
1265 if ( segmentsInfos[i].writable() )
1266 return segmentsInfos[i].vmAddr;
1267 }
1268 }
1269 return segmentsInfos[0].vmAddr;
1270 }
1271
1272
1273
1274 void MachOAnalyzer::forEachIndirectPointer(Diagnostics& diag, void (^handler)(uint64_t pointerAddress, bool bind, int bindLibOrdinal, const char* bindSymbolName,
1275 bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& stop)) const
1276 {
1277 LinkEditInfo leInfo;
1278 getLinkEditPointers(diag, leInfo);
1279 if ( diag.hasError() )
1280 return;
1281
1282 // find lazy and non-lazy pointer sections
1283 const bool is64Bit = is64();
1284 const uint32_t* const indirectSymbolTable = (uint32_t*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->indirectsymoff);
1285 const uint32_t indirectSymbolTableCount = leInfo.dynSymTab->nindirectsyms;
1286 const uint32_t ptrSize = pointerSize();
1287 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
1288 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
1289 const struct nlist* symbols32 = (struct nlist*)symbolTable;
1290 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1291 uint32_t symCount = leInfo.symTab->nsyms;
1292 uint32_t poolSize = leInfo.symTab->strsize;
1293 __block bool stop = false;
1294 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectInfo, bool malformedSectionRange, bool& sectionStop) {
1295 uint8_t sectionType = (sectInfo.sectFlags & SECTION_TYPE);
1296 bool selfModifyingStub = (sectionType == S_SYMBOL_STUBS) && (sectInfo.sectFlags & S_ATTR_SELF_MODIFYING_CODE) && (sectInfo.reserved2 == 5) && (this->cputype == CPU_TYPE_I386);
1297 if ( (sectionType != S_LAZY_SYMBOL_POINTERS) && (sectionType != S_NON_LAZY_SYMBOL_POINTERS) && !selfModifyingStub )
1298 return;
1299 if ( (flags & S_ATTR_SELF_MODIFYING_CODE) && !selfModifyingStub ) {
1300 diag.error("S_ATTR_SELF_MODIFYING_CODE section type only valid in old i386 binaries");
1301 sectionStop = true;
1302 return;
1303 }
1304 uint32_t elementSize = selfModifyingStub ? sectInfo.reserved2 : ptrSize;
1305 uint32_t elementCount = (uint32_t)(sectInfo.sectSize/elementSize);
1306 if ( greaterThanAddOrOverflow(sectInfo.reserved1, elementCount, indirectSymbolTableCount) ) {
1307 diag.error("section %s overflows indirect symbol table", sectInfo.sectName);
1308 sectionStop = true;
1309 return;
1310 }
1311
1312 for (uint32_t i=0; (i < elementCount) && !stop; ++i) {
1313 uint32_t symNum = indirectSymbolTable[sectInfo.reserved1 + i];
1314 if ( symNum == INDIRECT_SYMBOL_ABS )
1315 continue;
1316 if ( symNum == INDIRECT_SYMBOL_LOCAL ) {
1317 handler(sectInfo.sectAddr+i*elementSize, false, 0, "", false, false, false, stop);
1318 continue;
1319 }
1320 if ( symNum > symCount ) {
1321 diag.error("indirect symbol[%d] = %d which is invalid symbol index", sectInfo.reserved1 + i, symNum);
1322 sectionStop = true;
1323 return;
1324 }
1325 uint16_t n_desc = is64Bit ? symbols64[symNum].n_desc : symbols32[symNum].n_desc;
1326 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
1327 uint32_t strOffset = is64Bit ? symbols64[symNum].n_un.n_strx : symbols32[symNum].n_un.n_strx;
1328 if ( strOffset > poolSize ) {
1329 diag.error("symbol[%d] string offset out of range", sectInfo.reserved1 + i);
1330 sectionStop = true;
1331 return;
1332 }
1333 const char* symbolName = stringPool + strOffset;
1334 bool weakImport = (n_desc & N_WEAK_REF);
1335 bool lazy = (sectionType == S_LAZY_SYMBOL_POINTERS);
1336 handler(sectInfo.sectAddr+i*elementSize, true, libOrdinal, symbolName, weakImport, lazy, selfModifyingStub, stop);
1337 }
1338 sectionStop = stop;
1339 });
1340 }
1341
1342 int MachOAnalyzer::libOrdinalFromDesc(uint16_t n_desc) const
1343 {
1344 // -flat_namespace is always flat lookup
1345 if ( (this->flags & MH_TWOLEVEL) == 0 )
1346 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
1347
1348 // extract byte from undefined symbol entry
1349 int libIndex = GET_LIBRARY_ORDINAL(n_desc);
1350 switch ( libIndex ) {
1351 case SELF_LIBRARY_ORDINAL:
1352 return BIND_SPECIAL_DYLIB_SELF;
1353
1354 case DYNAMIC_LOOKUP_ORDINAL:
1355 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
1356
1357 case EXECUTABLE_ORDINAL:
1358 return BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
1359 }
1360
1361 return libIndex;
1362 }
1363
1364 bool MachOAnalyzer::validBindInfo(Diagnostics& diag, const char* path) const
1365 {
1366 forEachBind(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1367 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1368 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1369 uint8_t type, const char* symbolName, bool weakImport, uint64_t addend, bool& stop) {
1370 if ( invalidBindState(diag, opcodeName, path, leInfo, segments, segIndexSet, libraryOrdinalSet, dylibCount,
1371 libOrdinal, ptrSize, segmentIndex, segmentOffset, type, symbolName) ) {
1372 stop = true;
1373 }
1374 }, ^(const char* symbolName) {
1375 });
1376 return diag.noError();
1377 }
1378
1379 bool MachOAnalyzer::invalidBindState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1380 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint32_t ptrSize,
1381 uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, const char* symbolName) const
1382 {
1383 if ( !segIndexSet ) {
1384 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path, opcodeName);
1385 return true;
1386 }
1387 if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
1388 diag.error("in '%s' %s segment index %d too large", path, opcodeName, segmentIndex);
1389 return true;
1390 }
1391 if ( segmentOffset > (segments[segmentIndex].vmSize-ptrSize) ) {
1392 diag.error("in '%s' %s current segment offset 0x%08llX beyond segment size (0x%08llX)", path, opcodeName, segmentOffset, segments[segmentIndex].vmSize);
1393 return true;
1394 }
1395 if ( symbolName == NULL ) {
1396 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", path, opcodeName);
1397 return true;
1398 }
1399 if ( !libraryOrdinalSet ) {
1400 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL", path, opcodeName);
1401 return true;
1402 }
1403 if ( libOrdinal > (int)dylibCount ) {
1404 diag.error("in '%s' %s has library ordinal too large (%d) max (%d)", path, opcodeName, libOrdinal, dylibCount);
1405 return true;
1406 }
1407 if ( libOrdinal < BIND_SPECIAL_DYLIB_WEAK_DEF_COALESCE ) {
1408 diag.error("in '%s' %s has unknown library special ordinal (%d)", path, opcodeName, libOrdinal);
1409 return true;
1410 }
1411 switch ( type ) {
1412 case BIND_TYPE_POINTER:
1413 if ( !segments[segmentIndex].writable() ) {
1414 diag.error("in '%s' %s pointer bind is in non-writable segment", path, opcodeName);
1415 return true;
1416 }
1417 if ( segments[segmentIndex].executable() ) {
1418 diag.error("in '%s' %s pointer bind is in executable segment", path, opcodeName);
1419 return true;
1420 }
1421 break;
1422 case BIND_TYPE_TEXT_ABSOLUTE32:
1423 case BIND_TYPE_TEXT_PCREL32:
1424 if ( !segments[segmentIndex].textRelocs ) {
1425 diag.error("in '%s' %s text bind is in segment that does not support text relocations", path, opcodeName);
1426 return true;
1427 }
1428 if ( segments[segmentIndex].writable() ) {
1429 diag.error("in '%s' %s text bind is in writable segment", path, opcodeName);
1430 return true;
1431 }
1432 if ( !segments[segmentIndex].executable() ) {
1433 diag.error("in '%s' %s pointer bind is in non-executable segment", path, opcodeName);
1434 return true;
1435 }
1436 break;
1437 default:
1438 diag.error("in '%s' %s unknown bind type %d", path, opcodeName, type);
1439 return true;
1440 }
1441 return false;
1442 }
1443
1444 void MachOAnalyzer::forEachBind(Diagnostics& diag, void (^handler)(uint64_t runtimeOffset, int libOrdinal, const char* symbolName,
1445 bool weakImport, uint64_t addend, bool& stop),
1446 void (^strongHandler)(const char* symbolName)) const
1447 {
1448 __block bool startVmAddrSet = false;
1449 __block uint64_t startVmAddr = 0;
1450 forEachBind(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1451 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1452 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1453 uint8_t type, const char* symbolName, bool weakImport, uint64_t addend, bool& stop) {
1454 if ( !startVmAddrSet ) {
1455 for (int i=0; i <= segmentIndex; ++i) {
1456 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1457 startVmAddr = segments[i].vmAddr;
1458 startVmAddrSet = true;
1459 break;
1460 }
1461 }
1462 }
1463 uint64_t bindVmOffset = segments[segmentIndex].vmAddr + segmentOffset;
1464 uint64_t runtimeOffset = bindVmOffset - startVmAddr;
1465 handler(runtimeOffset, libOrdinal, symbolName, weakImport, addend, stop);
1466 }, ^(const char* symbolName) {
1467 strongHandler(symbolName);
1468 });
1469 }
1470
1471 void MachOAnalyzer::forEachBind(Diagnostics& diag,
1472 void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1473 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1474 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1475 uint8_t type, const char* symbolName, bool weakImport, uint64_t addend, bool& stop),
1476 void (^strongHandler)(const char* symbolName)) const
1477 {
1478 const uint32_t ptrSize = this->pointerSize();
1479 bool stop = false;
1480
1481 LinkEditInfo leInfo;
1482 getLinkEditPointers(diag, leInfo);
1483 if ( diag.hasError() )
1484 return;
1485
1486 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1487 getAllSegmentsInfos(diag, segmentsInfo);
1488 if ( diag.hasError() )
1489 return;
1490
1491
1492
1493 const uint32_t dylibCount = dependentDylibCount();
1494
1495 if ( leInfo.dyldInfo != nullptr ) {
1496 // process bind opcodes
1497 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
1498 const uint8_t* end = p + leInfo.dyldInfo->bind_size;
1499 uint8_t type = 0;
1500 uint64_t segmentOffset = 0;
1501 uint8_t segmentIndex = 0;
1502 const char* symbolName = NULL;
1503 int libraryOrdinal = 0;
1504 bool segIndexSet = false;
1505 bool libraryOrdinalSet = false;
1506
1507 int64_t addend = 0;
1508 uint64_t count;
1509 uint64_t skip;
1510 bool weakImport = false;
1511 while ( !stop && diag.noError() && (p < end) ) {
1512 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1513 uint8_t opcode = *p & BIND_OPCODE_MASK;
1514 ++p;
1515 switch (opcode) {
1516 case BIND_OPCODE_DONE:
1517 stop = true;
1518 break;
1519 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1520 libraryOrdinal = immediate;
1521 libraryOrdinalSet = true;
1522 break;
1523 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1524 libraryOrdinal = (int)read_uleb128(diag, p, end);
1525 libraryOrdinalSet = true;
1526 break;
1527 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1528 // the special ordinals are negative numbers
1529 if ( immediate == 0 )
1530 libraryOrdinal = 0;
1531 else {
1532 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1533 libraryOrdinal = signExtended;
1534 }
1535 libraryOrdinalSet = true;
1536 break;
1537 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1538 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1539 symbolName = (char*)p;
1540 while (*p != '\0')
1541 ++p;
1542 ++p;
1543 break;
1544 case BIND_OPCODE_SET_TYPE_IMM:
1545 type = immediate;
1546 break;
1547 case BIND_OPCODE_SET_ADDEND_SLEB:
1548 addend = read_sleb128(diag, p, end);
1549 break;
1550 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1551 segmentIndex = immediate;
1552 segmentOffset = read_uleb128(diag, p, end);
1553 segIndexSet = true;
1554 break;
1555 case BIND_OPCODE_ADD_ADDR_ULEB:
1556 segmentOffset += read_uleb128(diag, p, end);
1557 break;
1558 case BIND_OPCODE_DO_BIND:
1559 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1560 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1561 segmentOffset += ptrSize;
1562 break;
1563 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1564 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1565 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1566 segmentOffset += read_uleb128(diag, p, end) + ptrSize;
1567 break;
1568 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1569 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1570 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1571 segmentOffset += immediate*ptrSize + ptrSize;
1572 break;
1573 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1574 count = read_uleb128(diag, p, end);
1575 skip = read_uleb128(diag, p, end);
1576 for (uint32_t i=0; i < count; ++i) {
1577 handler("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1578 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1579 segmentOffset += skip + ptrSize;
1580 if ( stop )
1581 break;
1582 }
1583 break;
1584 default:
1585 diag.error("bad bind opcode 0x%02X", *p);
1586 }
1587 }
1588 if ( diag.hasError() )
1589 return;
1590
1591 // process lazy bind opcodes
1592 if ( leInfo.dyldInfo->lazy_bind_size != 0 ) {
1593 p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->lazy_bind_off);
1594 end = p + leInfo.dyldInfo->lazy_bind_size;
1595 type = BIND_TYPE_POINTER;
1596 segmentOffset = 0;
1597 segmentIndex = 0;
1598 symbolName = NULL;
1599 libraryOrdinal = 0;
1600 segIndexSet = false;
1601 libraryOrdinalSet= false;
1602 addend = 0;
1603 weakImport = false;
1604 stop = false;
1605 while ( !stop && diag.noError() && (p < end) ) {
1606 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1607 uint8_t opcode = *p & BIND_OPCODE_MASK;
1608 ++p;
1609 switch (opcode) {
1610 case BIND_OPCODE_DONE:
1611 // this opcode marks the end of each lazy pointer binding
1612 break;
1613 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1614 libraryOrdinal = immediate;
1615 libraryOrdinalSet = true;
1616 break;
1617 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1618 libraryOrdinal = (int)read_uleb128(diag, p, end);
1619 libraryOrdinalSet = true;
1620 break;
1621 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1622 // the special ordinals are negative numbers
1623 if ( immediate == 0 )
1624 libraryOrdinal = 0;
1625 else {
1626 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1627 libraryOrdinal = signExtended;
1628 }
1629 libraryOrdinalSet = true;
1630 break;
1631 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1632 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1633 symbolName = (char*)p;
1634 while (*p != '\0')
1635 ++p;
1636 ++p;
1637 break;
1638 case BIND_OPCODE_SET_ADDEND_SLEB:
1639 addend = read_sleb128(diag, p, end);
1640 break;
1641 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1642 segmentIndex = immediate;
1643 segmentOffset = read_uleb128(diag, p, end);
1644 segIndexSet = true;
1645 break;
1646 case BIND_OPCODE_DO_BIND:
1647 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1648 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1649 segmentOffset += ptrSize;
1650 break;
1651 case BIND_OPCODE_SET_TYPE_IMM:
1652 case BIND_OPCODE_ADD_ADDR_ULEB:
1653 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1654 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1655 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1656 default:
1657 diag.error("bad lazy bind opcode 0x%02X", opcode);
1658 break;
1659 }
1660 }
1661 }
1662 if ( diag.hasError() )
1663 return;
1664
1665 // process weak bind info
1666 if ( leInfo.dyldInfo->weak_bind_size != 0 ) {
1667 p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->weak_bind_off);
1668 end = p + leInfo.dyldInfo->weak_bind_size;
1669 type = BIND_TYPE_POINTER;
1670 segmentOffset = 0;
1671 segmentIndex = 0;
1672 symbolName = NULL;
1673 libraryOrdinal = BIND_SPECIAL_DYLIB_WEAK_DEF_COALESCE;
1674 segIndexSet = false;
1675 libraryOrdinalSet= true;
1676 addend = 0;
1677 weakImport = false;
1678 stop = false;
1679 while ( !stop && diag.noError() && (p < end) ) {
1680 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1681 uint8_t opcode = *p & BIND_OPCODE_MASK;
1682 ++p;
1683 switch (opcode) {
1684 case BIND_OPCODE_DONE:
1685 stop = true;
1686 break;
1687 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1688 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1689 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1690 diag.error("unexpected dylib ordinal in weak_bind");
1691 break;
1692 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1693 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1694 symbolName = (char*)p;
1695 while (*p != '\0')
1696 ++p;
1697 ++p;
1698 if ( immediate & BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION ) {
1699 strongHandler(symbolName);
1700 }
1701 break;
1702 case BIND_OPCODE_SET_TYPE_IMM:
1703 type = immediate;
1704 break;
1705 case BIND_OPCODE_SET_ADDEND_SLEB:
1706 addend = read_sleb128(diag, p, end);
1707 break;
1708 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1709 segmentIndex = immediate;
1710 segmentOffset = read_uleb128(diag, p, end);
1711 segIndexSet = true;
1712 break;
1713 case BIND_OPCODE_ADD_ADDR_ULEB:
1714 segmentOffset += read_uleb128(diag, p, end);
1715 break;
1716 case BIND_OPCODE_DO_BIND:
1717 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1718 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1719 segmentOffset += ptrSize;
1720 break;
1721 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1722 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1723 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1724 segmentOffset += read_uleb128(diag, p, end) + ptrSize;
1725 break;
1726 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1727 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1728 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1729 segmentOffset += immediate*ptrSize + ptrSize;
1730 break;
1731 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1732 count = read_uleb128(diag, p, end);
1733 skip = read_uleb128(diag, p, end);
1734 for (uint32_t i=0; i < count; ++i) {
1735 handler("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1736 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1737 segmentOffset += skip + ptrSize;
1738 if ( stop )
1739 break;
1740 }
1741 break;
1742 default:
1743 diag.error("bad bind opcode 0x%02X", *p);
1744 }
1745 }
1746 }
1747 }
1748 else {
1749 // old binary, process external relocations
1750 const uint64_t relocsStartAddress = relocBaseAddress(segmentsInfo, leInfo.layout.linkeditSegIndex);
1751 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->extreloff);
1752 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nextrel];
1753 bool is64Bit = is64() ;
1754 const uint8_t relocSize = (is64Bit ? 3 : 2);
1755 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
1756 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
1757 const struct nlist* symbols32 = (struct nlist*)symbolTable;
1758 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1759 uint32_t symCount = leInfo.symTab->nsyms;
1760 uint32_t poolSize = leInfo.symTab->strsize;
1761 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
1762 if ( reloc->r_length != relocSize ) {
1763 diag.error("external relocation has wrong r_length");
1764 break;
1765 }
1766 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
1767 diag.error("external relocation has wrong r_type");
1768 break;
1769 }
1770 uint32_t segIndex = 0;
1771 uint64_t segOffset = 0;
1772 if ( segIndexAndOffsetForAddress(relocsStartAddress+reloc->r_address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1773 uint32_t symbolIndex = reloc->r_symbolnum;
1774 if ( symbolIndex > symCount ) {
1775 diag.error("external relocation has out of range r_symbolnum");
1776 break;
1777 }
1778 else {
1779 uint32_t strOffset = is64Bit ? symbols64[symbolIndex].n_un.n_strx : symbols32[symbolIndex].n_un.n_strx;
1780 uint16_t n_desc = is64Bit ? symbols64[symbolIndex].n_desc : symbols32[symbolIndex].n_desc;
1781 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
1782 if ( strOffset >= poolSize ) {
1783 diag.error("external relocation has r_symbolnum=%d which has out of range n_strx", symbolIndex);
1784 break;
1785 }
1786 else {
1787 const char* symbolName = stringPool + strOffset;
1788 bool weakImport = (n_desc & N_WEAK_REF);
1789 const uint8_t* content = (uint8_t*)this + segmentsInfo[segIndex].vmAddr - leInfo.layout.textUnslidVMAddr + segOffset;
1790 uint64_t addend = is64Bit ? *((uint64_t*)content) : *((uint32_t*)content);
1791 handler("external relocation", leInfo, segmentsInfo, true, true, dylibCount, libOrdinal,
1792 ptrSize, segIndex, segOffset, BIND_TYPE_POINTER, symbolName, weakImport, addend, stop);
1793 }
1794 }
1795 }
1796 else {
1797 diag.error("local relocation has out of range r_address");
1798 break;
1799 }
1800 }
1801 // then process indirect symbols
1802 forEachIndirectPointer(diag, ^(uint64_t address, bool bind, int bindLibOrdinal,
1803 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
1804 if ( !bind )
1805 return;
1806 uint32_t segIndex = 0;
1807 uint64_t segOffset = 0;
1808 if ( segIndexAndOffsetForAddress(address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1809 handler("indirect symbol", leInfo, segmentsInfo, true, true, dylibCount, bindLibOrdinal,
1810 ptrSize, segIndex, segOffset, BIND_TYPE_POINTER, bindSymbolName, bindWeakImport, 0, indStop);
1811 }
1812 else {
1813 diag.error("indirect symbol has out of range address");
1814 indStop = true;
1815 }
1816 });
1817 }
1818
1819 }
1820
1821
1822 bool MachOAnalyzer::validChainedFixupsInfo(Diagnostics& diag, const char* path) const
1823 {
1824 __block uint32_t maxTargetCount = 0;
1825 __block uint32_t currentTargetCount = 0;
1826 forEachChainedFixup(diag,
1827 ^(uint32_t totalTargets, bool& stop) {
1828 maxTargetCount = totalTargets;
1829 },
1830 ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop) {
1831 if ( symbolName == NULL ) {
1832 diag.error("in '%s' missing BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", path);
1833 }
1834 else if ( !libraryOrdinalSet ) {
1835 diag.error("in '%s' missing BIND_OPCODE_SET_DYLIB_ORDINAL", path);
1836 }
1837 else if ( libOrdinal > (int)dylibCount ) {
1838 diag.error("in '%s' has library ordinal too large (%d) max (%d)", path, libOrdinal, dylibCount);
1839 }
1840 else if ( libOrdinal < BIND_SPECIAL_DYLIB_WEAK_DEF_COALESCE ) {
1841 diag.error("in '%s' has unknown library special ordinal (%d)", path, libOrdinal);
1842 }
1843 else if ( type != BIND_TYPE_POINTER ) {
1844 diag.error("in '%s' unknown bind type %d", path, type);
1845 }
1846 else if ( currentTargetCount > maxTargetCount ) {
1847 diag.error("in '%s' chained target counts exceeds BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB", path);
1848 }
1849 ++currentTargetCount;
1850 if ( diag.hasError() )
1851 stop = true;
1852 },
1853 ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, bool& stop) {
1854 if ( !segIndexSet ) {
1855 diag.error("in '%s' missing BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path);
1856 }
1857 else if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
1858 diag.error("in '%s' segment index %d too large", path, segmentIndex);
1859 }
1860 else if ( segmentOffset > (segments[segmentIndex].vmSize-8) ) {
1861 diag.error("in '%s' current segment offset 0x%08llX beyond segment size (0x%08llX)", path, segmentOffset, segments[segmentIndex].vmSize);
1862 }
1863 else if ( !segments[segmentIndex].writable() ) {
1864 diag.error("in '%s' pointer bind is in non-writable segment", path);
1865 }
1866 else if ( segments[segmentIndex].executable() ) {
1867 diag.error("in '%s' pointer bind is in executable segment", path);
1868 }
1869 if ( diag.hasError() )
1870 stop = true;
1871 }
1872 );
1873
1874 return diag.noError();
1875 }
1876
1877
1878 void MachOAnalyzer::forEachChainedFixup(Diagnostics& diag, void (^targetCount)(uint32_t totalTargets, bool& stop),
1879 void (^addTarget)(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop),
1880 void (^addChainStart)(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, bool& stop)) const
1881 {
1882 bool stop = false;
1883
1884 LinkEditInfo leInfo;
1885 getLinkEditPointers(diag, leInfo);
1886 if ( diag.hasError() )
1887 return;
1888
1889 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1890 getAllSegmentsInfos(diag, segmentsInfo);
1891 if ( diag.hasError() )
1892 return;
1893
1894 const uint32_t dylibCount = dependentDylibCount();
1895
1896 if ( leInfo.dyldInfo != nullptr ) {
1897 // process bind opcodes
1898 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
1899 const uint8_t* end = p + leInfo.dyldInfo->bind_size;
1900 uint8_t type = 0;
1901 uint64_t segmentOffset = 0;
1902 uint8_t segmentIndex = 0;
1903 const char* symbolName = NULL;
1904 int libraryOrdinal = 0;
1905 bool segIndexSet = false;
1906 bool libraryOrdinalSet = false;
1907 uint64_t targetTableCount;
1908 uint64_t addend = 0;
1909 bool weakImport = false;
1910 while ( !stop && diag.noError() && (p < end) ) {
1911 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1912 uint8_t opcode = *p & BIND_OPCODE_MASK;
1913 ++p;
1914 switch (opcode) {
1915 case BIND_OPCODE_DONE:
1916 stop = true;
1917 break;
1918 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1919 libraryOrdinal = immediate;
1920 libraryOrdinalSet = true;
1921 break;
1922 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1923 libraryOrdinal = (int)read_uleb128(diag, p, end);
1924 libraryOrdinalSet = true;
1925 break;
1926 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1927 // the special ordinals are negative numbers
1928 if ( immediate == 0 )
1929 libraryOrdinal = 0;
1930 else {
1931 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1932 libraryOrdinal = signExtended;
1933 }
1934 libraryOrdinalSet = true;
1935 break;
1936 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1937 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1938 symbolName = (char*)p;
1939 while (*p != '\0')
1940 ++p;
1941 ++p;
1942 break;
1943 case BIND_OPCODE_SET_TYPE_IMM:
1944 type = immediate;
1945 break;
1946 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1947 segmentIndex = immediate;
1948 segmentOffset = read_uleb128(diag, p, end);
1949 segIndexSet = true;
1950 break;
1951 case BIND_OPCODE_SET_ADDEND_SLEB:
1952 addend = read_sleb128(diag, p, end);
1953 break;
1954 case BIND_OPCODE_DO_BIND:
1955 if ( addTarget )
1956 addTarget(leInfo, segmentsInfo, libraryOrdinalSet, dylibCount, libraryOrdinal, type, symbolName, addend, weakImport, stop);
1957 break;
1958 case BIND_OPCODE_THREADED:
1959 switch (immediate) {
1960 case BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB:
1961 targetTableCount = read_uleb128(diag, p, end);
1962 if ( targetTableCount > 65535 ) {
1963 diag.error("BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB size too large");
1964 stop = true;
1965 }
1966 else {
1967 if ( targetCount )
1968 targetCount((uint32_t)targetTableCount, stop);
1969 }
1970 break;
1971 case BIND_SUBOPCODE_THREADED_APPLY:
1972 if ( addChainStart )
1973 addChainStart(leInfo, segmentsInfo, segmentIndex, segIndexSet, segmentOffset, stop);
1974 break;
1975 default:
1976 diag.error("bad BIND_OPCODE_THREADED sub-opcode 0x%02X", immediate);
1977 }
1978 break;
1979 default:
1980 diag.error("bad bind opcode 0x%02X", immediate);
1981 }
1982 }
1983 if ( diag.hasError() )
1984 return;
1985 }
1986 }
1987
1988 void MachOAnalyzer::forEachChainedFixupStart(Diagnostics& diag, void (^callback)(uint64_t runtimeOffset, bool& stop)) const
1989 {
1990 __block bool startVmAddrSet = false;
1991 __block uint64_t startVmAddr = 0;
1992 forEachChainedFixup(diag, nullptr, nullptr, ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, bool& stop) {
1993 if ( !startVmAddrSet ) {
1994 for (int i=0; i <= segmentIndex; ++i) {
1995 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1996 startVmAddr = segments[i].vmAddr;
1997 startVmAddrSet = true;
1998 break;
1999 }
2000 }
2001 }
2002 uint64_t startVmOffset = segments[segmentIndex].vmAddr + segmentOffset;
2003 uint64_t runtimeOffset = startVmOffset - startVmAddr;
2004 callback((uint32_t)runtimeOffset, stop);
2005 });
2006 }
2007
2008 void MachOAnalyzer::forEachChainedFixupTarget(Diagnostics& diag, void (^callback)(int libOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop)) const
2009 {
2010 forEachChainedFixup(diag, nullptr, ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount,
2011 int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop){
2012 callback(libOrdinal, symbolName, addend, weakImport, stop);
2013 }, nullptr);
2014 }
2015
2016 uint32_t MachOAnalyzer::segmentCount() const
2017 {
2018 __block uint32_t count = 0;
2019 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2020 ++count;
2021 });
2022 return count;
2023 }
2024
2025 bool MachOAnalyzer::hasCodeSignature(uint32_t& fileOffset, uint32_t& size) const
2026 {
2027 fileOffset = 0;
2028 size = 0;
2029
2030 Diagnostics diag;
2031 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2032 if ( cmd->cmd == LC_CODE_SIGNATURE ) {
2033 const linkedit_data_command* sigCmd = (linkedit_data_command*)cmd;
2034 fileOffset = sigCmd->dataoff;
2035 size = sigCmd->datasize;
2036 stop = true;
2037 }
2038 });
2039 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2040
2041 // early exist if no LC_CODE_SIGNATURE
2042 if ( fileOffset == 0 )
2043 return false;
2044
2045 // <rdar://problem/13622786> ignore code signatures in macOS binaries built with pre-10.9 tools
2046 __block bool goodSignature = true;
2047 if ( (this->cputype == CPU_TYPE_X86_64) || (this->cputype == CPU_TYPE_I386) ) {
2048 forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) {
2049 if ( (platform == Platform::macOS) && (sdk < 0x000A0900) )
2050 goodSignature = false;
2051 });
2052 }
2053
2054 return goodSignature;
2055 }
2056
2057 bool MachOAnalyzer::hasInitializer(Diagnostics& diag, bool contentRebased, const void* dyldCache) const
2058 {
2059 __block bool result = false;
2060 forEachInitializer(diag, contentRebased, ^(uint32_t offset) {
2061 result = true;
2062 }, dyldCache);
2063 return result;
2064 }
2065
2066 void MachOAnalyzer::forEachInitializer(Diagnostics& diag, bool contentRebased, void (^callback)(uint32_t offset), const void* dyldCache) const
2067 {
2068 __block uint64_t prefTextSegAddrStart = 0;
2069 __block uint64_t prefTextSegAddrEnd = 0;
2070
2071 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2072 if ( strcmp(info.segName, "__TEXT") == 0 ) {
2073 prefTextSegAddrStart = info.vmAddr;
2074 prefTextSegAddrEnd = info.vmAddr + info.vmSize;
2075 stop = true;
2076 }
2077 });
2078 if ( prefTextSegAddrStart == prefTextSegAddrEnd ) {
2079 diag.error("no __TEXT segment");
2080 return;
2081 }
2082 uint64_t slide = (long)this - prefTextSegAddrStart;
2083
2084 // if dylib linked with -init linker option, that initializer is first
2085 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2086 if ( cmd->cmd == LC_ROUTINES ) {
2087 const routines_command* routines = (routines_command*)cmd;
2088 uint64_t dashInit = routines->init_address;
2089 if ( (prefTextSegAddrStart < dashInit) && (dashInit < prefTextSegAddrEnd) )
2090 callback((uint32_t)(dashInit - prefTextSegAddrStart));
2091 else
2092 diag.error("-init does not point within __TEXT segment");
2093 }
2094 else if ( cmd->cmd == LC_ROUTINES_64 ) {
2095 const routines_command_64* routines = (routines_command_64*)cmd;
2096 uint64_t dashInit = routines->init_address;
2097 if ( (prefTextSegAddrStart < dashInit) && (dashInit < prefTextSegAddrEnd) )
2098 callback((uint32_t)(dashInit - prefTextSegAddrStart));
2099 else
2100 diag.error("-init does not point within __TEXT segment");
2101 }
2102 });
2103
2104 // next any function pointers in mod-init section
2105 unsigned ptrSize = pointerSize();
2106 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2107 if ( (info.sectFlags & SECTION_TYPE) == S_MOD_INIT_FUNC_POINTERS ) {
2108 const uint8_t* content;
2109 content = (uint8_t*)(info.sectAddr + slide);
2110 if ( (info.sectSize % ptrSize) != 0 ) {
2111 diag.error("initializer section %s/%s has bad size", info.segInfo.segName, info.sectName);
2112 stop = true;
2113 return;
2114 }
2115 if ( malformedSectionRange ) {
2116 diag.error("initializer section %s/%s extends beyond its segment", info.segInfo.segName, info.sectName);
2117 stop = true;
2118 return;
2119 }
2120 if ( ((long)content % ptrSize) != 0 ) {
2121 diag.error("initializer section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2122 stop = true;
2123 return;
2124 }
2125 if ( ptrSize == 8 ) {
2126 const uint64_t* initsStart = (uint64_t*)content;
2127 const uint64_t* initsEnd = (uint64_t*)((uint8_t*)content + info.sectSize);
2128 for (const uint64_t* p=initsStart; p < initsEnd; ++p) {
2129 uint64_t anInit = *p;
2130 if ( contentRebased )
2131 anInit -= slide;
2132 if ( hasChainedFixups() ) {
2133 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2134 if ( aChainedInit->authBind.bind )
2135 diag.error("initializer uses bind");
2136 if ( aChainedInit->authRebase.auth ) {
2137 anInit = aChainedInit->authRebase.target;
2138 }
2139 else {
2140 anInit = aChainedInit->plainRebase.signExtendedTarget();
2141 }
2142 }
2143 if ( (anInit <= prefTextSegAddrStart) || (anInit > prefTextSegAddrEnd) ) {
2144 diag.error("initializer 0x%0llX does not point within __TEXT segment", anInit);
2145 stop = true;
2146 break;
2147 }
2148 callback((uint32_t)(anInit - prefTextSegAddrStart));
2149 }
2150 }
2151 else {
2152 const uint32_t* initsStart = (uint32_t*)content;
2153 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + info.sectSize);
2154 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2155 uint32_t anInit = *p;
2156 if ( contentRebased )
2157 anInit -= slide;
2158 if ( (anInit <= prefTextSegAddrStart) || (anInit > prefTextSegAddrEnd) ) {
2159 diag.error("initializer 0x%0X does not point within __TEXT segment", anInit);
2160 stop = true;
2161 break;
2162 }
2163 callback(anInit - (uint32_t)prefTextSegAddrStart);
2164 }
2165 }
2166 }
2167 });
2168 }
2169
2170
2171 void MachOAnalyzer::forEachRPath(void (^callback)(const char* rPath, bool& stop)) const
2172 {
2173 Diagnostics diag;
2174 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2175 if ( cmd->cmd == LC_RPATH ) {
2176 const char* rpath = (char*)cmd + ((struct rpath_command*)cmd)->path.offset;
2177 callback(rpath, stop);
2178 }
2179 });
2180 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2181 }
2182
2183
2184 bool MachOAnalyzer::hasObjC() const
2185 {
2186 __block bool result = false;
2187 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2188 if ( (strcmp(info.sectName, "__objc_imageinfo") == 0) && (strncmp(info.segInfo.segName, "__DATA", 6) == 0) ) {
2189 result = true;
2190 stop = true;
2191 }
2192 if ( (this->cputype == CPU_TYPE_I386) && (strcmp(info.sectName, "__image_info") == 0) && (strcmp(info.segInfo.segName, "__OBJC") == 0) ) {
2193 result = true;
2194 stop = true;
2195 }
2196 });
2197 return result;
2198 }
2199
2200 bool MachOAnalyzer::hasPlusLoadMethod(Diagnostics& diag) const
2201 {
2202 __block bool result = false;
2203 if ( (this->cputype == CPU_TYPE_I386) && supportsPlatform(Platform::macOS) ) {
2204 // old objc runtime has no special section for +load methods, scan for string
2205 int64_t slide = getSlide();
2206 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2207 if ( ( (info.sectFlags & SECTION_TYPE) == S_CSTRING_LITERALS ) ) {
2208 if ( malformedSectionRange ) {
2209 diag.error("cstring section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2210 stop = true;
2211 return;
2212 }
2213 const uint8_t* content = (uint8_t*)(info.sectAddr + slide);
2214 const char* s = (char*)content;
2215 const char* end = s + info.sectSize;
2216 while ( s < end ) {
2217 if ( strcmp(s, "load") == 0 ) {
2218 result = true;
2219 stop = true;
2220 return;
2221 }
2222 while (*s != '\0' )
2223 ++s;
2224 ++s;
2225 }
2226 }
2227 });
2228 }
2229 else {
2230 // in new objc runtime compiler puts classes/categories with +load method in specical section
2231 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2232 if ( strncmp(info.segInfo.segName, "__DATA", 6) != 0 )
2233 return;
2234 if ( (strcmp(info.sectName, "__objc_nlclslist") == 0) || (strcmp(info.sectName, "__objc_nlcatlist") == 0)) {
2235 result = true;
2236 stop = true;
2237 }
2238 });
2239 }
2240 return result;
2241 }
2242
2243 const void* MachOAnalyzer::getRebaseOpcodes(uint32_t& size) const
2244 {
2245 Diagnostics diag;
2246 LinkEditInfo leInfo;
2247 getLinkEditPointers(diag, leInfo);
2248 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2249 return nullptr;
2250
2251 size = leInfo.dyldInfo->rebase_size;
2252 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->rebase_off);
2253 }
2254
2255 const void* MachOAnalyzer::getBindOpcodes(uint32_t& size) const
2256 {
2257 Diagnostics diag;
2258 LinkEditInfo leInfo;
2259 getLinkEditPointers(diag, leInfo);
2260 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2261 return nullptr;
2262
2263 size = leInfo.dyldInfo->bind_size;
2264 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
2265 }
2266
2267 const void* MachOAnalyzer::getLazyBindOpcodes(uint32_t& size) const
2268 {
2269 Diagnostics diag;
2270 LinkEditInfo leInfo;
2271 getLinkEditPointers(diag, leInfo);
2272 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2273 return nullptr;
2274
2275 size = leInfo.dyldInfo->lazy_bind_size;
2276 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->lazy_bind_off);
2277 }
2278
2279
2280 uint64_t MachOAnalyzer::segAndOffsetToRuntimeOffset(uint8_t targetSegIndex, uint64_t targetSegOffset) const
2281 {
2282 __block uint64_t textVmAddr = 0;
2283 __block uint64_t result = 0;
2284 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2285 if ( strcmp(info.segName, "__TEXT") == 0 )
2286 textVmAddr = info.vmAddr;
2287 if ( info.segIndex == targetSegIndex ) {
2288 result = (info.vmAddr - textVmAddr) + targetSegOffset;
2289 }
2290 });
2291 return result;
2292 }
2293
2294 bool MachOAnalyzer::hasLazyPointers(uint32_t& runtimeOffset, uint32_t& size) const
2295 {
2296 size = 0;
2297 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2298 if ( (info.sectFlags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS ) {
2299 runtimeOffset = (uint32_t)(info.sectAddr - preferredLoadAddress());
2300 size = (uint32_t)info.sectSize;
2301 stop = true;
2302 }
2303 });
2304 return (size != 0);
2305 }
2306
2307 uint64_t MachOAnalyzer::preferredLoadAddress() const
2308 {
2309 __block uint64_t textVmAddr = 0;
2310 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2311 if ( strcmp(info.segName, "__TEXT") == 0 ) {
2312 textVmAddr = info.vmAddr;
2313 stop = true;
2314 }
2315 });
2316 return textVmAddr;
2317 }
2318
2319
2320 bool MachOAnalyzer::getEntry(uint32_t& offset, bool& usesCRT) const
2321 {
2322 Diagnostics diag;
2323 offset = 0;
2324 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2325 if ( cmd->cmd == LC_MAIN ) {
2326 entry_point_command* mainCmd = (entry_point_command*)cmd;
2327 usesCRT = false;
2328 offset = (uint32_t)mainCmd->entryoff;
2329 stop = true;
2330 }
2331 else if ( cmd->cmd == LC_UNIXTHREAD ) {
2332 stop = true;
2333 usesCRT = true;
2334 uint64_t startAddress = entryAddrFromThreadCmd((thread_command*)cmd);
2335 offset = (uint32_t)(startAddress - preferredLoadAddress());
2336 }
2337 });
2338 return (offset != 0);
2339 }
2340
2341 uint64_t MachOAnalyzer::entryAddrFromThreadCmd(const thread_command* cmd) const
2342 {
2343 assert(cmd->cmd == LC_UNIXTHREAD);
2344 const uint32_t* regs32 = (uint32_t*)(((char*)cmd) + 16);
2345 const uint64_t* regs64 = (uint64_t*)(((char*)cmd) + 16);
2346 uint64_t startAddress = 0;
2347 switch ( this->cputype ) {
2348 case CPU_TYPE_I386:
2349 startAddress = regs32[10]; // i386_thread_state_t.eip
2350 break;
2351 case CPU_TYPE_X86_64:
2352 startAddress = regs64[16]; // x86_thread_state64_t.rip
2353 break;
2354 }
2355 return startAddress;
2356 }
2357
2358
2359 void MachOAnalyzer::forEachInterposingSection(Diagnostics& diag, void (^handler)(uint64_t vmOffset, uint64_t vmSize, bool& stop)) const
2360 {
2361 const unsigned ptrSize = pointerSize();
2362 const unsigned entrySize = 2 * ptrSize;
2363 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2364 if ( ((info.sectFlags & SECTION_TYPE) == S_INTERPOSING) || ((strcmp(info.sectName, "__interpose") == 0) && (strcmp(info.segInfo.segName, "__DATA") == 0)) ) {
2365 if ( info.sectSize % entrySize != 0 ) {
2366 diag.error("interposing section %s/%s has bad size", info.segInfo.segName, info.sectName);
2367 stop = true;
2368 return;
2369 }
2370 if ( malformedSectionRange ) {
2371 diag.error("interposing section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2372 stop = true;
2373 return;
2374 }
2375 if ( (info.sectAddr % ptrSize) != 0 ) {
2376 diag.error("interposing section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2377 stop = true;
2378 return;
2379 }
2380 handler(info.sectAddr - preferredLoadAddress(), info.sectSize, stop);
2381 }
2382 });
2383 }
2384
2385 void MachOAnalyzer::forEachDOFSection(Diagnostics& diag, void (^callback)(uint32_t offset)) const
2386 {
2387 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2388 if ( ( (info.sectFlags & SECTION_TYPE) == S_DTRACE_DOF ) && !malformedSectionRange ) {
2389 callback((uint32_t)(info.sectAddr - info.segInfo.vmAddr));
2390 }
2391 });
2392 }
2393
2394 bool MachOAnalyzer::getCDHash(uint8_t cdHash[20]) const
2395 {
2396 Diagnostics diag;
2397 LinkEditInfo leInfo;
2398 getLinkEditPointers(diag, leInfo);
2399 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2400 return false;
2401
2402 return cdHashOfCodeSignature(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff), leInfo.codeSig->datasize, cdHash);
2403 }
2404
2405 bool MachOAnalyzer::isRestricted() const
2406 {
2407 __block bool result = false;
2408 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2409 if ( (strcmp(info.segInfo.segName, "__RESTRICT") == 0) && (strcmp(info.sectName, "__restrict") == 0) ) {
2410 result = true;
2411 stop = true;
2412 }
2413 });
2414 return result;
2415 }
2416
2417 bool MachOAnalyzer::usesLibraryValidation() const
2418 {
2419 Diagnostics diag;
2420 LinkEditInfo leInfo;
2421 getLinkEditPointers(diag, leInfo);
2422 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2423 return false;
2424
2425 const CS_CodeDirectory* cd = (const CS_CodeDirectory*)findCodeDirectoryBlob(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff), leInfo.codeSig->datasize);
2426 if ( cd == nullptr )
2427 return false;
2428
2429 // check for CS_REQUIRE_LV in CS_CodeDirectory.flags
2430 return (htonl(cd->flags) & CS_REQUIRE_LV);
2431 }
2432
2433 bool MachOAnalyzer::canHavePrecomputedDlopenClosure(const char* path, void (^failureReason)(const char*)) const
2434 {
2435 __block bool retval = true;
2436
2437 // only dylibs can go in cache
2438 if ( (this->filetype != MH_DYLIB) && (this->filetype != MH_BUNDLE) ) {
2439 retval = false;
2440 failureReason("not MH_DYLIB or MH_BUNDLE");
2441 }
2442
2443 // flat namespace files cannot go in cache
2444 if ( (this->flags & MH_TWOLEVEL) == 0 ) {
2445 retval = false;
2446 failureReason("not built with two level namespaces");
2447 }
2448
2449 // can only depend on other dylibs with absolute paths
2450 __block bool allDepPathsAreGood = true;
2451 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
2452 if ( loadPath[0] != '/' ) {
2453 allDepPathsAreGood = false;
2454 stop = true;
2455 }
2456 });
2457 if ( !allDepPathsAreGood ) {
2458 retval = false;
2459 failureReason("depends on dylibs that are not absolute paths");
2460 }
2461
2462 // dylibs with interposing info cannot have dlopen closure pre-computed
2463 __block bool hasInterposing = false;
2464 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool &stop) {
2465 if ( ((info.sectFlags & SECTION_TYPE) == S_INTERPOSING) || ((strcmp(info.sectName, "__interpose") == 0) && (strcmp(info.segInfo.segName, "__DATA") == 0)) )
2466 hasInterposing = true;
2467 });
2468 if ( hasInterposing ) {
2469 retval = false;
2470 failureReason("has interposing tuples");
2471 }
2472
2473 // images that use dynamic_lookup, bundle_loader, or have weak-defs cannot have dlopen closure pre-computed
2474 Diagnostics diag;
2475 auto checkBind = ^(int libOrdinal, bool& stop) {
2476 switch (libOrdinal) {
2477 case BIND_SPECIAL_DYLIB_WEAK_DEF_COALESCE:
2478 failureReason("has weak externals");
2479 retval = false;
2480 stop = true;
2481 break;
2482 case BIND_SPECIAL_DYLIB_FLAT_LOOKUP:
2483 failureReason("has dynamic_lookup binds");
2484 retval = false;
2485 stop = true;
2486 break;
2487 case BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE:
2488 failureReason("has reference to main executable (bundle loader)");
2489 retval = false;
2490 stop = true;
2491 break;
2492 }
2493 };
2494
2495 if (hasChainedFixups()) {
2496 forEachChainedFixupTarget(diag, ^(int libOrdinal, const char *symbolName, uint64_t addend, bool weakImport, bool &stop) {
2497 checkBind(libOrdinal, stop);
2498 });
2499 } else {
2500 forEachBind(diag, ^(uint64_t runtimeOffset, int libOrdinal, const char* symbolName, bool weakImport, uint64_t addend, bool& stop) {
2501 checkBind(libOrdinal, stop);
2502 },
2503 ^(const char* symbolName) {
2504 });
2505 }
2506
2507 // special system dylib overrides cannot have closure pre-computed
2508 if ( strncmp(path, "/usr/lib/system/introspection/", 30) == 0 ) {
2509 retval = false;
2510 failureReason("override of OS dylib");
2511 }
2512
2513 return retval;
2514 }
2515
2516 bool MachOAnalyzer::canBePlacedInDyldCache(const char* path, void (^failureReason)(const char*)) const
2517 {
2518 if (!MachOFile::canBePlacedInDyldCache(path, failureReason))
2519 return false;
2520 if ( !(isArch("x86_64") || isArch("x86_64h")) )
2521 return true;
2522
2523 // Kick dylibs out of the x86_64 cache if they are using TBI.
2524 __block bool rebasesOk = true;
2525 Diagnostics diag;
2526 uint64_t startVMAddr = preferredLoadAddress();
2527 uint64_t endVMAddr = startVMAddr + mappedSize();
2528 forEachRebase(diag, false, ^(uint64_t runtimeOffset, bool &stop) {
2529 uint64_t value = *(uint64_t*)((uint8_t*)this + runtimeOffset);
2530 if ( (value < startVMAddr) || (value >= endVMAddr) ) {
2531 failureReason("rebase value out of range of dylib");
2532 rebasesOk = false;
2533 stop = true;
2534 }
2535 });
2536 return rebasesOk;
2537 }
2538
2539 } // dyld3
2540
2541