dyld-640.2.tar.gz
[apple/dyld.git] / dyld3 / MachOAnalyzer.cpp
1 /*
2 * Copyright (c) 2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 #include <sys/types.h>
25 #include <mach/mach.h>
26 #include <assert.h>
27 #include <limits.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <mach-o/reloc.h>
32 #include <mach-o/nlist.h>
33 #include <TargetConditionals.h>
34
35 #include "MachOAnalyzer.h"
36 #include "CodeSigningTypes.h"
37 #include "Array.h"
38
39 #include <stdio.h>
40
41
42 #ifndef BIND_OPCODE_THREADED
43 #define BIND_OPCODE_THREADED 0xD0
44 #endif
45
46 #ifndef BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB
47 #define BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB 0x00
48 #endif
49
50 #ifndef BIND_SUBOPCODE_THREADED_APPLY
51 #define BIND_SUBOPCODE_THREADED_APPLY 0x01
52 #endif
53
54
55 namespace dyld3 {
56
57
58 const MachOAnalyzer* MachOAnalyzer::validMainExecutable(Diagnostics& diag, const mach_header* mh, const char* path, uint64_t sliceLength, const char* reqArchName, Platform reqPlatform)
59 {
60 const MachOAnalyzer* result = (const MachOAnalyzer*)mh;
61 if ( !result->validMachOForArchAndPlatform(diag, (size_t)sliceLength, path, reqArchName, reqPlatform) )
62 return nullptr;
63 if ( !result->isDynamicExecutable() )
64 return nullptr;
65
66 return result;
67 }
68
69
70 closure::LoadedFileInfo MachOAnalyzer::load(Diagnostics& diag, const closure::FileSystem& fileSystem, const char* path, const char* reqArchName, Platform reqPlatform)
71 {
72 // FIXME: This should probably be an assert, but if we happen to have a diagnostic here then something is wrong
73 // above us and we should quickly return instead of doing unnecessary work.
74 if (diag.hasError())
75 return closure::LoadedFileInfo();
76
77 closure::LoadedFileInfo info;
78 char realerPath[MAXPATHLEN];
79 if (!fileSystem.loadFile(path, info, realerPath, ^(const char *format, ...) {
80 va_list list;
81 va_start(list, format);
82 diag.error(format, list);
83 va_end(list);
84 })) {
85 return closure::LoadedFileInfo();
86 }
87
88 // If we now have an error, but succeeded, then we must have tried multiple paths, one of which errored, but
89 // then succeeded on a later path. So clear the error.
90 if (diag.hasError())
91 diag.clearError();
92
93 // if fat, remap just slice needed
94 bool fatButMissingSlice;
95 const FatFile* fh = (FatFile*)info.fileContent;
96 uint64_t sliceOffset = info.sliceOffset;
97 uint64_t sliceLen = info.sliceLen;
98 if ( fh->isFatFileWithSlice(diag, info.fileContentLen, reqArchName, sliceOffset, sliceLen, fatButMissingSlice) ) {
99 if ( (sliceOffset & 0xFFF) != 0 ) {
100 // slice not page aligned
101 if ( strncmp((char*)info.fileContent + sliceOffset, "!<arch>", 7) == 0 )
102 diag.error("file is static library");
103 else
104 diag.error("slice is not page aligned");
105 fileSystem.unloadFile(info);
106 return closure::LoadedFileInfo();
107 }
108 else {
109 // unmap anything before slice
110 fileSystem.unloadPartialFile(info, sliceOffset, sliceLen);
111 // Update the info to keep track of the new slice offset.
112 info.sliceOffset = sliceOffset;
113 info.sliceLen = sliceLen;
114 }
115 }
116 else if ( fatButMissingSlice ) {
117 diag.error("missing required arch %s in %s", reqArchName, path);
118 fileSystem.unloadFile(info);
119 return closure::LoadedFileInfo();
120 }
121
122 const MachOAnalyzer* mh = (MachOAnalyzer*)info.fileContent;
123
124 // validate is mach-o of requested arch and platform
125 if ( !mh->validMachOForArchAndPlatform(diag, (size_t)info.sliceLen, path, reqArchName, reqPlatform) ) {
126 fileSystem.unloadFile(info);
127 return closure::LoadedFileInfo();
128 }
129
130 // if has zero-fill expansion, re-map
131 mh = mh->remapIfZeroFill(diag, fileSystem, info);
132
133 // on error, remove mappings and return nullptr
134 if ( diag.hasError() ) {
135 fileSystem.unloadFile(info);
136 return closure::LoadedFileInfo();
137 }
138
139 // now that LINKEDIT is at expected offset, finish validation
140 mh->validLinkedit(diag, path);
141
142 // on error, remove mappings and return nullptr
143 if ( diag.hasError() ) {
144 fileSystem.unloadFile(info);
145 return closure::LoadedFileInfo();
146 }
147
148 return info;
149 }
150
151 #if DEBUG
152 // only used in debug builds of cache builder to verify segment moves are valid
153 void MachOAnalyzer::validateDyldCacheDylib(Diagnostics& diag, const char* path) const
154 {
155 validLinkedit(diag, path);
156 validSegments(diag, path, 0xffffffff);
157 }
158 #endif
159
160 uint64_t MachOAnalyzer::mappedSize() const
161 {
162 const uint32_t pageSize = uses16KPages() ? 0x4000 : 0x1000;
163 __block uint64_t textSegVmAddr = 0;
164 __block uint64_t vmSpaceRequired = 0;
165 forEachSegment(^(const SegmentInfo& info, bool& stop) {
166 if ( strcmp(info.segName, "__TEXT") == 0 ) {
167 textSegVmAddr = info.vmAddr;
168 }
169 else if ( strcmp(info.segName, "__LINKEDIT") == 0 ) {
170 vmSpaceRequired = info.vmAddr + ((info.vmSize + (pageSize-1)) & (-pageSize)) - textSegVmAddr;
171 stop = true;
172 }
173 });
174
175 return vmSpaceRequired;
176 }
177
178 bool MachOAnalyzer::validMachOForArchAndPlatform(Diagnostics& diag, size_t sliceLength, const char* path, const char* reqArchName, Platform reqPlatform) const
179 {
180 // must start with mach-o magic value
181 if ( (this->magic != MH_MAGIC) && (this->magic != MH_MAGIC_64) ) {
182 diag.error("could not use '%s' because it is not a mach-o file: 0x%08X 0x%08X", path, this->magic, this->cputype);
183 return false;
184 }
185
186 // must match requested architecture, if specified
187 if ( reqArchName != nullptr ) {
188 if ( !this->isArch(reqArchName)) {
189 // except when looking for x86_64h, fallback to x86_64
190 if ( (strcmp(reqArchName, "x86_64h") != 0) || !this->isArch("x86_64") ) {
191 #if SUPPORT_ARCH_arm64e
192 // except when looking for arm64e, fallback to arm64
193 if ( (strcmp(reqArchName, "arm64e") != 0) || !this->isArch("arm64") ) {
194 #endif
195 diag.error("could not use '%s' because it does not contain required architecture %s", path, reqArchName);
196 return false;
197 #if SUPPORT_ARCH_arm64e
198 }
199 #endif
200 }
201 }
202 }
203
204 // must be a filetype dyld can load
205 switch ( this->filetype ) {
206 case MH_EXECUTE:
207 case MH_DYLIB:
208 case MH_BUNDLE:
209 break;
210 default:
211 diag.error("could not use '%s' because it is not a dylib, bundle, or executable, filetype=0x%08X", path, this->filetype);
212 return false;
213 }
214
215 // validate load commands structure
216 if ( !this->validLoadCommands(diag, path, sliceLength) ) {
217 return false;
218 }
219
220 // filter out static executables
221 if ( (this->filetype == MH_EXECUTE) && !isDynamicExecutable() ) {
222 diag.error("could not use '%s' because it is a static executable", path);
223 return false;
224 }
225
226 // must match requested platform (do this after load commands are validated)
227 if ( !this->supportsPlatform(reqPlatform) ) {
228 diag.error("could not use '%s' because it was built for a different platform", path);
229 return false;
230 }
231
232 // validate dylib loads
233 if ( !validEmbeddedPaths(diag, path) )
234 return false;
235
236 // validate segments
237 if ( !validSegments(diag, path, sliceLength) )
238 return false;
239
240 // validate entry
241 if ( this->filetype == MH_EXECUTE ) {
242 if ( !validMain(diag, path) )
243 return false;
244 }
245
246 // <rdar://problem/45525884> to avoid heap smasher, don't load this dylib
247 if ( strcmp(path, "/usr/lib/libnetsnmp.5.2.1.dylib") == 0 )
248 return false;
249
250 // further validations done in validLinkedit()
251
252 return true;
253 }
254
255 bool MachOAnalyzer::validLinkedit(Diagnostics& diag, const char* path) const
256 {
257 // validate LINKEDIT layout
258 if ( !validLinkeditLayout(diag, path) )
259 return false;
260
261 if ( hasChainedFixups() ) {
262 if ( !validChainedFixupsInfo(diag, path) )
263 return false;
264 }
265 else {
266 // validate rebasing info
267 if ( !validRebaseInfo(diag, path) )
268 return false;
269
270 // validate binding info
271 if ( !validBindInfo(diag, path) )
272 return false;
273 }
274
275 return true;
276 }
277
278 bool MachOAnalyzer::validLoadCommands(Diagnostics& diag, const char* path, size_t fileLen) const
279 {
280 // check load command don't exceed file length
281 if ( this->sizeofcmds + sizeof(mach_header_64) > fileLen ) {
282 diag.error("in '%s' load commands exceed length of file", path);
283 return false;
284 }
285
286 // walk all load commands and sanity check them
287 Diagnostics walkDiag;
288 forEachLoadCommand(walkDiag, ^(const load_command* cmd, bool& stop) {});
289 if ( walkDiag.hasError() ) {
290 #if BUILDING_CACHE_BUILDER
291 diag.error("in '%s' %s", path, walkDiag.errorMessage().c_str());
292 #else
293 diag.error("in '%s' %s", path, walkDiag.errorMessage());
294 #endif
295 return false;
296 }
297
298 // check load commands fit in TEXT segment
299 __block bool foundTEXT = false;
300 forEachSegment(^(const SegmentInfo& info, bool& stop) {
301 if ( strcmp(info.segName, "__TEXT") == 0 ) {
302 foundTEXT = true;
303 if ( this->sizeofcmds + sizeof(mach_header_64) > info.fileSize ) {
304 diag.error("in '%s' load commands exceed length of __TEXT segment", path);
305 }
306 if ( info.fileOffset != 0 ) {
307 diag.error("in '%s' __TEXT segment not start of mach-o", path);
308 }
309 stop = true;
310 }
311 });
312 if ( !diag.noError() && !foundTEXT ) {
313 diag.error("in '%s' __TEXT segment not found", path);
314 return false;
315 }
316
317 return true;
318 }
319
320 const MachOAnalyzer* MachOAnalyzer::remapIfZeroFill(Diagnostics& diag, const closure::FileSystem& fileSystem, closure::LoadedFileInfo& info) const
321 {
322 uint64_t vmSpaceRequired;
323 auto hasZeroFill = [this, &vmSpaceRequired]() {
324 __block bool hasZeroFill = false;
325 __block uint64_t textSegVmAddr = 0;
326 forEachSegment(^(const SegmentInfo& segmentInfo, bool& stop) {
327 if ( strcmp(segmentInfo.segName, "__TEXT") == 0 ) {
328 textSegVmAddr = segmentInfo.vmAddr;
329 }
330 else if ( strcmp(segmentInfo.segName, "__LINKEDIT") == 0 ) {
331 uint64_t vmOffset = segmentInfo.vmAddr - textSegVmAddr;
332 // A zero fill page in the __DATA segment means the file offset of __LINKEDIT is less than its vm offset
333 if ( segmentInfo.fileOffset != vmOffset )
334 hasZeroFill = true;
335 vmSpaceRequired = segmentInfo.vmAddr + segmentInfo.vmSize - textSegVmAddr;
336 stop = true;
337 }
338 });
339 return hasZeroFill;
340 };
341
342 if (hasZeroFill()) {
343 vm_address_t newMappedAddr;
344 if ( ::vm_allocate(mach_task_self(), &newMappedAddr, (size_t)vmSpaceRequired, VM_FLAGS_ANYWHERE) != 0 ) {
345 diag.error("vm_allocate failure");
346 return nullptr;
347 }
348 // mmap() each segment read-only with standard layout
349 __block uint64_t textSegVmAddr;
350 forEachSegment(^(const SegmentInfo& segmentInfo, bool& stop) {
351 if ( strcmp(segmentInfo.segName, "__TEXT") == 0 )
352 textSegVmAddr = segmentInfo.vmAddr;
353 if ( segmentInfo.fileSize != 0 ) {
354 kern_return_t r = vm_copy(mach_task_self(), (vm_address_t)((long)info.fileContent+segmentInfo.fileOffset), (vm_size_t)segmentInfo.fileSize, (vm_address_t)(newMappedAddr+segmentInfo.vmAddr-textSegVmAddr));
355 if ( r != KERN_SUCCESS ) {
356 diag.error("vm_copy() failure");
357 stop = true;
358 }
359 }
360 });
361 if ( diag.noError() ) {
362 // remove original mapping and return new mapping
363 fileSystem.unloadFile(info);
364
365 // Set vm_deallocate as the unload method.
366 info.unload = [](const closure::LoadedFileInfo& info) {
367 ::vm_deallocate(mach_task_self(), (vm_address_t)info.fileContent, (size_t)info.fileContentLen);
368 };
369
370 // And update the file content to the new location
371 info.fileContent = (const void*)newMappedAddr;
372 info.fileContentLen = vmSpaceRequired;
373 return (const MachOAnalyzer*)info.fileContent;
374 }
375 else {
376 // new mapping failed, return old mapping with an error in diag
377 ::vm_deallocate(mach_task_self(), newMappedAddr, (size_t)vmSpaceRequired);
378 return nullptr;
379 }
380 }
381
382 return this;
383 }
384
385 bool MachOAnalyzer::enforceFormat(Malformed kind) const
386 {
387 #if TARGET_OS_OSX
388 __block bool result = false;
389 forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) {
390 if ( platform == Platform::macOS ) {
391 switch (kind) {
392 case Malformed::linkeditOrder:
393 case Malformed::linkeditAlignment:
394 case Malformed::dyldInfoAndlocalRelocs:
395 // enforce these checks on new binaries only
396 result = (sdk >= 0x000A0E00); // macOS 10.14
397 }
398 }
399 });
400 // if binary is so old, there is no platform info, don't enforce malformed errors
401 return result;
402 #else
403 return true;
404 #endif
405 }
406
407 bool MachOAnalyzer::validEmbeddedPaths(Diagnostics& diag, const char* path) const
408 {
409 __block int index = 1;
410 __block bool allGood = true;
411 __block bool foundInstallName = false;
412 __block int dependentsCount = 0;
413 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
414 const dylib_command* dylibCmd;
415 const rpath_command* rpathCmd;
416 switch ( cmd->cmd ) {
417 case LC_ID_DYLIB:
418 foundInstallName = true;
419 // fall through
420 case LC_LOAD_DYLIB:
421 case LC_LOAD_WEAK_DYLIB:
422 case LC_REEXPORT_DYLIB:
423 case LC_LOAD_UPWARD_DYLIB:
424 dylibCmd = (dylib_command*)cmd;
425 if ( dylibCmd->dylib.name.offset > cmd->cmdsize ) {
426 diag.error("in '%s' load command #%d name offset (%u) outside its size (%u)", path, index, dylibCmd->dylib.name.offset, cmd->cmdsize);
427 stop = true;
428 allGood = false;
429 }
430 else {
431 bool foundEnd = false;
432 const char* start = (char*)dylibCmd + dylibCmd->dylib.name.offset;
433 const char* end = (char*)dylibCmd + cmd->cmdsize;
434 for (const char* s=start; s < end; ++s) {
435 if ( *s == '\0' ) {
436 foundEnd = true;
437 break;
438 }
439 }
440 if ( !foundEnd ) {
441 diag.error("in '%s' load command #%d string extends beyond end of load command", path, index);
442 stop = true;
443 allGood = false;
444 }
445 }
446 if ( cmd->cmd != LC_ID_DYLIB )
447 ++dependentsCount;
448 break;
449 case LC_RPATH:
450 rpathCmd = (rpath_command*)cmd;
451 if ( rpathCmd->path.offset > cmd->cmdsize ) {
452 diag.error("in '%s' load command #%d path offset (%u) outside its size (%u)", path, index, rpathCmd->path.offset, cmd->cmdsize);
453 stop = true;
454 allGood = false;
455 }
456 else {
457 bool foundEnd = false;
458 const char* start = (char*)rpathCmd + rpathCmd->path.offset;
459 const char* end = (char*)rpathCmd + cmd->cmdsize;
460 for (const char* s=start; s < end; ++s) {
461 if ( *s == '\0' ) {
462 foundEnd = true;
463 break;
464 }
465 }
466 if ( !foundEnd ) {
467 diag.error("in '%s' load command #%d string extends beyond end of load command", path, index);
468 stop = true;
469 allGood = false;
470 }
471 }
472 break;
473 }
474 ++index;
475 });
476 if ( !allGood )
477 return false;
478
479 if ( this->filetype == MH_DYLIB ) {
480 if ( !foundInstallName ) {
481 diag.error("in '%s' MH_DYLIB is missing LC_ID_DYLIB", path);
482 return false;
483 }
484 }
485 else {
486 if ( foundInstallName ) {
487 diag.error("in '%s' LC_ID_DYLIB found in non-MH_DYLIB", path);
488 return false;
489 }
490 }
491
492 if ( (dependentsCount == 0) && (this->filetype == MH_EXECUTE) ) {
493 diag.error("in '%s' missing LC_LOAD_DYLIB (must link with at least libSystem.dylib)", path);
494 return false;
495 }
496
497 return true;
498 }
499
500 bool MachOAnalyzer::validSegments(Diagnostics& diag, const char* path, size_t fileLen) const
501 {
502 // check segment load command size
503 __block bool badSegmentLoadCommand = false;
504 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
505 if ( cmd->cmd == LC_SEGMENT_64 ) {
506 const segment_command_64* seg = (segment_command_64*)cmd;
507 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command_64);
508 if ( sectionsSpace < 0 ) {
509 diag.error("in '%s' load command size too small for LC_SEGMENT_64", path);
510 badSegmentLoadCommand = true;
511 stop = true;
512 }
513 else if ( (sectionsSpace % sizeof(section_64)) != 0 ) {
514 diag.error("in '%s' segment load command size 0x%X will not fit whole number of sections", path, cmd->cmdsize);
515 badSegmentLoadCommand = true;
516 stop = true;
517 }
518 else if ( sectionsSpace != (seg->nsects * sizeof(section_64)) ) {
519 diag.error("in '%s' load command size 0x%X does not match nsects %d", path, cmd->cmdsize, seg->nsects);
520 badSegmentLoadCommand = true;
521 stop = true;
522 }
523 else if ( greaterThanAddOrOverflow(seg->fileoff, seg->filesize, fileLen) ) {
524 diag.error("in '%s' segment load command content extends beyond end of file", path);
525 badSegmentLoadCommand = true;
526 stop = true;
527 }
528 else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
529 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
530 diag.error("in '%s' segment filesize exceeds vmsize", path);
531 badSegmentLoadCommand = true;
532 stop = true;
533 }
534 }
535 else if ( cmd->cmd == LC_SEGMENT ) {
536 const segment_command* seg = (segment_command*)cmd;
537 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command);
538 if ( sectionsSpace < 0 ) {
539 diag.error("in '%s' load command size too small for LC_SEGMENT", path);
540 badSegmentLoadCommand = true;
541 stop = true;
542 }
543 else if ( (sectionsSpace % sizeof(section)) != 0 ) {
544 diag.error("in '%s' segment load command size 0x%X will not fit whole number of sections", path, cmd->cmdsize);
545 badSegmentLoadCommand = true;
546 stop = true;
547 }
548 else if ( sectionsSpace != (seg->nsects * sizeof(section)) ) {
549 diag.error("in '%s' load command size 0x%X does not match nsects %d", path, cmd->cmdsize, seg->nsects);
550 badSegmentLoadCommand = true;
551 stop = true;
552 }
553 else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
554 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
555 diag.error("in '%s' segment filesize exceeds vmsize", path);
556 badSegmentLoadCommand = true;
557 stop = true;
558 }
559 }
560 });
561 if ( badSegmentLoadCommand )
562 return false;
563
564 // check mapping permissions of segments
565 __block bool badPermissions = false;
566 __block bool badSize = false;
567 __block bool hasTEXT = false;
568 __block bool hasLINKEDIT = false;
569 forEachSegment(^(const SegmentInfo& info, bool& stop) {
570 if ( strcmp(info.segName, "__TEXT") == 0 ) {
571 if ( info.protections != (VM_PROT_READ|VM_PROT_EXECUTE) ) {
572 diag.error("in '%s' __TEXT segment permissions is not 'r-x'", path);
573 badPermissions = true;
574 stop = true;
575 }
576 hasTEXT = true;
577 }
578 else if ( strcmp(info.segName, "__LINKEDIT") == 0 ) {
579 if ( info.protections != VM_PROT_READ ) {
580 diag.error("in '%s' __LINKEDIT segment permissions is not 'r--'", path);
581 badPermissions = true;
582 stop = true;
583 }
584 hasLINKEDIT = true;
585 }
586 else if ( (info.protections & 0xFFFFFFF8) != 0 ) {
587 diag.error("in '%s' %s segment permissions has invalid bits set", path, info.segName);
588 badPermissions = true;
589 stop = true;
590 }
591 if ( greaterThanAddOrOverflow(info.fileOffset, info.fileSize, fileLen) ) {
592 diag.error("in '%s' %s segment content extends beyond end of file", path, info.segName);
593 badSize = true;
594 stop = true;
595 }
596 if ( is64() ) {
597 if ( info.vmAddr+info.vmSize < info.vmAddr ) {
598 diag.error("in '%s' %s segment vm range wraps", path, info.segName);
599 badSize = true;
600 stop = true;
601 }
602 }
603 else {
604 if ( (uint32_t)(info.vmAddr+info.vmSize) < (uint32_t)(info.vmAddr) ) {
605 diag.error("in '%s' %s segment vm range wraps", path, info.segName);
606 badSize = true;
607 stop = true;
608 }
609 }
610 });
611 if ( badPermissions || badSize )
612 return false;
613 if ( !hasTEXT ) {
614 diag.error("in '%s' missing __TEXT segment", path);
615 return false;
616 }
617 if ( !hasLINKEDIT ) {
618 diag.error("in '%s' missing __LINKEDIT segment", path);
619 return false;
620 }
621
622 // check for overlapping segments
623 __block bool badSegments = false;
624 forEachSegment(^(const SegmentInfo& info1, bool& stop1) {
625 uint64_t seg1vmEnd = info1.vmAddr + info1.vmSize;
626 uint64_t seg1FileEnd = info1.fileOffset + info1.fileSize;
627 forEachSegment(^(const SegmentInfo& info2, bool& stop2) {
628 if ( info1.segIndex == info2.segIndex )
629 return;
630 uint64_t seg2vmEnd = info2.vmAddr + info2.vmSize;
631 uint64_t seg2FileEnd = info2.fileOffset + info2.fileSize;
632 if ( ((info2.vmAddr <= info1.vmAddr) && (seg2vmEnd > info1.vmAddr) && (seg1vmEnd > info1.vmAddr )) || ((info2.vmAddr >= info1.vmAddr ) && (info2.vmAddr < seg1vmEnd) && (seg2vmEnd > info2.vmAddr)) ) {
633 diag.error("in '%s' segment %s vm range overlaps segment %s", path, info1.segName, info2.segName);
634 badSegments = true;
635 stop1 = true;
636 stop2 = true;
637 }
638 if ( ((info2.fileOffset <= info1.fileOffset) && (seg2FileEnd > info1.fileOffset) && (seg1FileEnd > info1.fileOffset)) || ((info2.fileOffset >= info1.fileOffset) && (info2.fileOffset < seg1FileEnd) && (seg2FileEnd > info2.fileOffset )) ) {
639 diag.error("in '%s' segment %s file content overlaps segment %s", path, info1.segName, info2.segName);
640 badSegments = true;
641 stop1 = true;
642 stop2 = true;
643 }
644 if ( (info1.segIndex < info2.segIndex) && !stop1 ) {
645 if ( (info1.vmAddr > info2.vmAddr) || ((info1.fileOffset > info2.fileOffset ) && (info1.fileOffset != 0) && (info2.fileOffset != 0)) ){
646 if ( !inDyldCache() ) {
647 // dyld cache __DATA_* segments are moved around
648 diag.error("in '%s' segment load commands out of order with respect to layout for %s and %s", path, info1.segName, info2.segName);
649 badSegments = true;
650 stop1 = true;
651 stop2 = true;
652 }
653 }
654 }
655 });
656 });
657 if ( badSegments )
658 return false;
659
660 // check sections are within segment
661 __block bool badSections = false;
662 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
663 if ( cmd->cmd == LC_SEGMENT_64 ) {
664 const segment_command_64* seg = (segment_command_64*)cmd;
665 const section_64* const sectionsStart = (section_64*)((char*)seg + sizeof(struct segment_command_64));
666 const section_64* const sectionsEnd = &sectionsStart[seg->nsects];
667 for (const section_64* sect=sectionsStart; (sect < sectionsEnd); ++sect) {
668 if ( (int64_t)(sect->size) < 0 ) {
669 diag.error("in '%s' section %s size too large 0x%llX", path, sect->sectname, sect->size);
670 badSections = true;
671 }
672 else if ( sect->addr < seg->vmaddr ) {
673 diag.error("in '%s' section %s start address 0x%llX is before containing segment's address 0x%0llX", path, sect->sectname, sect->addr, seg->vmaddr);
674 badSections = true;
675 }
676 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
677 diag.error("in '%s' section %s end address 0x%llX is beyond containing segment's end address 0x%0llX", path, sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
678 badSections = true;
679 }
680 }
681 }
682 else if ( cmd->cmd == LC_SEGMENT ) {
683 const segment_command* seg = (segment_command*)cmd;
684 const section* const sectionsStart = (section*)((char*)seg + sizeof(struct segment_command));
685 const section* const sectionsEnd = &sectionsStart[seg->nsects];
686 for (const section* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) {
687 if ( (int64_t)(sect->size) < 0 ) {
688 diag.error("in '%s' section %s size too large 0x%X", path, sect->sectname, sect->size);
689 badSections = true;
690 }
691 else if ( sect->addr < seg->vmaddr ) {
692 diag.error("in '%s' section %s start address 0x%X is before containing segment's address 0x%0X", path, sect->sectname, sect->addr, seg->vmaddr);
693 badSections = true;
694 }
695 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
696 diag.error("in '%s' section %s end address 0x%X is beyond containing segment's end address 0x%0X", path, sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
697 badSections = true;
698 }
699 }
700 }
701 });
702
703 return !badSections;
704 }
705
706
707 bool MachOAnalyzer::validMain(Diagnostics& diag, const char* path) const
708 {
709 __block uint64_t textSegStartAddr = 0;
710 __block uint64_t textSegStartSize = 0;
711 forEachSegment(^(const SegmentInfo& info, bool& stop) {
712 if ( strcmp(info.segName, "__TEXT") == 0 ) {
713 textSegStartAddr = info.vmAddr;
714 textSegStartSize = info.vmSize;
715 stop = true;
716 }
717 });
718
719 __block int mainCount = 0;
720 __block int threadCount = 0;
721 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
722 entry_point_command* mainCmd;
723 uint64_t startAddress;
724 switch (cmd->cmd) {
725 case LC_MAIN:
726 ++mainCount;
727 mainCmd = (entry_point_command*)cmd;
728 if ( mainCmd->entryoff > textSegStartSize ) {
729 diag.error("LC_MAIN points outside of __TEXT segment");
730 stop = true;
731 }
732 break;
733 case LC_UNIXTHREAD:
734 ++threadCount;
735 startAddress = entryAddrFromThreadCmd((thread_command*)cmd);
736 if ( startAddress == 0 ) {
737 diag.error("LC_UNIXTHREAD not valid for arch %s", archName());
738 stop = true;
739 }
740 else if ( (startAddress < textSegStartAddr) || (startAddress > textSegStartAddr+textSegStartSize) ) {
741 diag.error("LC_UNIXTHREAD entry not in __TEXT segment");
742 stop = true;
743 }
744 break;
745 }
746 });
747 if ( diag.hasError() )
748 return false;
749 if ( diag.noError() && (mainCount+threadCount == 1) )
750 return true;
751
752 if ( mainCount + threadCount == 0 )
753 diag.error("missing LC_MAIN or LC_UNIXTHREAD");
754 else
755 diag.error("only one LC_MAIN or LC_UNIXTHREAD is allowed");
756 return false;
757 }
758
759
760 namespace {
761 struct LinkEditContentChunk
762 {
763 const char* name;
764 uint32_t stdOrder;
765 uint32_t fileOffsetStart;
766 uint32_t size;
767
768 static int compareByFileOffset(const void* l, const void* r) {
769 if ( ((LinkEditContentChunk*)l)->fileOffsetStart < ((LinkEditContentChunk*)r)->fileOffsetStart )
770 return -1;
771 else
772 return 1;
773 }
774 static int compareByStandardOrder(const void* l, const void* r) {
775 if ( ((LinkEditContentChunk*)l)->stdOrder < ((LinkEditContentChunk*)r)->stdOrder )
776 return -1;
777 else
778 return 1;
779 }
780 };
781 } // anonymous namespace
782
783
784
785 bool MachOAnalyzer::validLinkeditLayout(Diagnostics& diag, const char* path) const
786 {
787 LinkEditInfo leInfo;
788 getLinkEditPointers(diag, leInfo);
789 if ( diag.hasError() )
790 return false;
791 const uint32_t ptrSize = pointerSize();
792
793 // build vector of all blobs in LINKEDIT
794 LinkEditContentChunk blobs[32];
795 LinkEditContentChunk* bp = blobs;
796 if ( leInfo.dyldInfo != nullptr ) {
797 if ( leInfo.dyldInfo->rebase_size != 0 )
798 *bp++ = {"rebase opcodes", 1, leInfo.dyldInfo->rebase_off, leInfo.dyldInfo->rebase_size};
799 if ( leInfo.dyldInfo->bind_size != 0 )
800 *bp++ = {"bind opcodes", 2, leInfo.dyldInfo->bind_off, leInfo.dyldInfo->bind_size};
801 if ( leInfo.dyldInfo->weak_bind_size != 0 )
802 *bp++ = {"weak bind opcodes", 3, leInfo.dyldInfo->weak_bind_off, leInfo.dyldInfo->weak_bind_size};
803 if ( leInfo.dyldInfo->lazy_bind_size != 0 )
804 *bp++ = {"lazy bind opcodes", 4, leInfo.dyldInfo->lazy_bind_off, leInfo.dyldInfo->lazy_bind_size};
805 if ( leInfo.dyldInfo->export_size!= 0 )
806 *bp++ = {"exports trie", 5, leInfo.dyldInfo->export_off, leInfo.dyldInfo->export_size};
807 }
808 if ( leInfo.dynSymTab != nullptr ) {
809 if ( leInfo.dynSymTab->nlocrel != 0 )
810 *bp++ = {"local relocations", 6, leInfo.dynSymTab->locreloff, static_cast<uint32_t>(leInfo.dynSymTab->nlocrel*sizeof(relocation_info))};
811 if ( leInfo.dynSymTab->nextrel != 0 )
812 *bp++ = {"external relocations", 11, leInfo.dynSymTab->extreloff, static_cast<uint32_t>(leInfo.dynSymTab->nextrel*sizeof(relocation_info))};
813 if ( leInfo.dynSymTab->nindirectsyms != 0 )
814 *bp++ = {"indirect symbol table", 12, leInfo.dynSymTab->indirectsymoff, leInfo.dynSymTab->nindirectsyms*4};
815 }
816 if ( leInfo.splitSegInfo != nullptr ) {
817 if ( leInfo.splitSegInfo->datasize != 0 )
818 *bp++ = {"shared cache info", 6, leInfo.splitSegInfo->dataoff, leInfo.splitSegInfo->datasize};
819 }
820 if ( leInfo.functionStarts != nullptr ) {
821 if ( leInfo.functionStarts->datasize != 0 )
822 *bp++ = {"function starts", 7, leInfo.functionStarts->dataoff, leInfo.functionStarts->datasize};
823 }
824 if ( leInfo.dataInCode != nullptr ) {
825 if ( leInfo.dataInCode->datasize != 0 )
826 *bp++ = {"data in code", 8, leInfo.dataInCode->dataoff, leInfo.dataInCode->datasize};
827 }
828 if ( leInfo.symTab != nullptr ) {
829 if ( leInfo.symTab->nsyms != 0 )
830 *bp++ = {"symbol table", 10, leInfo.symTab->symoff, static_cast<uint32_t>(leInfo.symTab->nsyms*(ptrSize == 8 ? sizeof(nlist_64) : sizeof(struct nlist)))};
831 if ( leInfo.symTab->strsize != 0 )
832 *bp++ = {"symbol table strings", 20, leInfo.symTab->stroff, leInfo.symTab->strsize};
833 }
834 if ( leInfo.codeSig != nullptr ) {
835 if ( leInfo.codeSig->datasize != 0 )
836 *bp++ = {"code signature", 21, leInfo.codeSig->dataoff, leInfo.codeSig->datasize};
837 }
838
839 // check for bad combinations
840 if ( (leInfo.dyldInfo != nullptr) && (leInfo.dyldInfo->cmd == LC_DYLD_INFO_ONLY) && (leInfo.dynSymTab != nullptr) ) {
841 if ( (leInfo.dynSymTab->nlocrel != 0) && enforceFormat(Malformed::dyldInfoAndlocalRelocs) ) {
842 diag.error("in '%s' malformed mach-o contains LC_DYLD_INFO_ONLY and local relocations", path);
843 return false;
844 }
845 if ( leInfo.dynSymTab->nextrel != 0 ) {
846 diag.error("in '%s' malformed mach-o contains LC_DYLD_INFO_ONLY and external relocations", path);
847 return false;
848 }
849 }
850 if ( (leInfo.dyldInfo == nullptr) && (leInfo.dynSymTab == nullptr) ) {
851 diag.error("in '%s' malformed mach-o misssing LC_DYLD_INFO and LC_DYSYMTAB", path);
852 return false;
853 }
854 const unsigned long blobCount = bp - blobs;
855 if ( blobCount == 0 ) {
856 diag.error("in '%s' malformed mach-o misssing LINKEDIT", path);
857 return false;
858 }
859
860 uint32_t linkeditFileEnd = leInfo.layout.linkeditFileOffset + leInfo.layout.linkeditFileSize;
861
862
863 // sort blobs by file-offset and error on overlaps
864 ::qsort(blobs, blobCount, sizeof(LinkEditContentChunk), &LinkEditContentChunk::compareByFileOffset);
865 uint32_t prevEnd = leInfo.layout.linkeditFileOffset;
866 const char* prevName = "start of LINKEDIT";
867 for (unsigned long i=0; i < blobCount; ++i) {
868 const LinkEditContentChunk& blob = blobs[i];
869 if ( blob.fileOffsetStart < prevEnd ) {
870 diag.error("in '%s' LINKEDIT overlap of %s and %s", path, prevName, blob.name);
871 return false;
872 }
873 if (greaterThanAddOrOverflow(blob.fileOffsetStart, blob.size, linkeditFileEnd)) {
874 diag.error("in '%s' LINKEDIT content '%s' extends beyond end of segment", path, blob.name);
875 return false;
876 }
877 prevEnd = blob.fileOffsetStart + blob.size;
878 prevName = blob.name;
879 }
880
881 // sort vector by order and warn on non standard order or mis-alignment
882 ::qsort(blobs, blobCount, sizeof(LinkEditContentChunk), &LinkEditContentChunk::compareByStandardOrder);
883 prevEnd = leInfo.layout.linkeditFileOffset;
884 for (unsigned long i=0; i < blobCount; ++i) {
885 const LinkEditContentChunk& blob = blobs[i];
886 if ( ((blob.fileOffsetStart & (ptrSize-1)) != 0) && (blob.stdOrder != 20) && enforceFormat(Malformed::linkeditAlignment) ) // ok for "symbol table strings" to be mis-aligned
887 diag.error("in '%s' mis-aligned LINKEDIT content '%s'", path, blob.name);
888 if ( (blob.fileOffsetStart < prevEnd) && enforceFormat(Malformed::linkeditOrder) ) {
889 diag.error("in '%s' LINKEDIT out of order %s", path, blob.name);
890 }
891 prevEnd = blob.fileOffsetStart;
892 }
893
894 // Check for invalid symbol table sizes
895 if ( leInfo.symTab != nullptr ) {
896 if ( leInfo.symTab->nsyms > 0x10000000 ) {
897 diag.error("in '%s' malformed mach-o image: symbol table too large", path);
898 return false;
899 }
900 if ( leInfo.dynSymTab != nullptr ) {
901 // validate indirect symbol table
902 if ( leInfo.dynSymTab->nindirectsyms != 0 ) {
903 if ( leInfo.dynSymTab->nindirectsyms > 0x10000000 ) {
904 diag.error("in '%s' malformed mach-o image: indirect symbol table too large", path);
905 return false;
906 }
907 }
908 if ( (leInfo.dynSymTab->nlocalsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->ilocalsym > leInfo.symTab->nsyms) ) {
909 diag.error("in '%s' malformed mach-o image: indirect symbol table local symbol count exceeds total symbols", path);
910 return false;
911 }
912 if ( leInfo.dynSymTab->ilocalsym + leInfo.dynSymTab->nlocalsym < leInfo.dynSymTab->ilocalsym ) {
913 diag.error("in '%s' malformed mach-o image: indirect symbol table local symbol count wraps", path);
914 return false;
915 }
916 if ( (leInfo.dynSymTab->nextdefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iextdefsym > leInfo.symTab->nsyms) ) {
917 diag.error("in '%s' malformed mach-o image: indirect symbol table extern symbol count exceeds total symbols", path);
918 return false;
919 }
920 if ( leInfo.dynSymTab->iextdefsym + leInfo.dynSymTab->nextdefsym < leInfo.dynSymTab->iextdefsym ) {
921 diag.error("in '%s' malformed mach-o image: indirect symbol table extern symbol count wraps", path);
922 return false;
923 }
924 if ( (leInfo.dynSymTab->nundefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iundefsym > leInfo.symTab->nsyms) ) {
925 diag.error("in '%s' malformed mach-o image: indirect symbol table undefined symbol count exceeds total symbols", path);
926 return false;
927 }
928 if ( leInfo.dynSymTab->iundefsym + leInfo.dynSymTab->nundefsym < leInfo.dynSymTab->iundefsym ) {
929 diag.error("in '%s' malformed mach-o image: indirect symbol table undefined symbol count wraps", path);
930 return false;
931 }
932 }
933 }
934
935 return true;
936 }
937
938
939
940 bool MachOAnalyzer::invalidRebaseState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
941 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type) const
942 {
943 if ( !segIndexSet ) {
944 diag.error("in '%s' %s missing preceding REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path, opcodeName);
945 return true;
946 }
947 if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
948 diag.error("in '%s' %s segment index %d too large", path, opcodeName, segmentIndex);
949 return true;
950 }
951 if ( segmentOffset > (segments[segmentIndex].vmSize-ptrSize) ) {
952 diag.error("in '%s' %s current segment offset 0x%08llX beyond segment size (0x%08llX)", path, opcodeName, segmentOffset, segments[segmentIndex].vmSize);
953 return true;
954 }
955 switch ( type ) {
956 case REBASE_TYPE_POINTER:
957 if ( !segments[segmentIndex].writable() ) {
958 diag.error("in '%s' %s pointer rebase is in non-writable segment", path, opcodeName);
959 return true;
960 }
961 if ( segments[segmentIndex].executable() ) {
962 diag.error("in '%s' %s pointer rebase is in executable segment", path, opcodeName);
963 return true;
964 }
965 break;
966 case REBASE_TYPE_TEXT_ABSOLUTE32:
967 case REBASE_TYPE_TEXT_PCREL32:
968 if ( !segments[segmentIndex].textRelocs ) {
969 diag.error("in '%s' %s text rebase is in segment that does not support text relocations", path, opcodeName);
970 return true;
971 }
972 if ( segments[segmentIndex].writable() ) {
973 diag.error("in '%s' %s text rebase is in writable segment", path, opcodeName);
974 return true;
975 }
976 if ( !segments[segmentIndex].executable() ) {
977 diag.error("in '%s' %s pointer rebase is in non-executable segment", path, opcodeName);
978 return true;
979 }
980 break;
981 default:
982 diag.error("in '%s' %s unknown rebase type %d", path, opcodeName, type);
983 return true;
984 }
985 return false;
986 }
987
988
989 void MachOAnalyzer::getAllSegmentsInfos(Diagnostics& diag, SegmentInfo segments[]) const
990 {
991 forEachSegment(^(const SegmentInfo& info, bool& stop) {
992 segments[info.segIndex] = info;
993 });
994 }
995
996
997 bool MachOAnalyzer::validRebaseInfo(Diagnostics& diag, const char* path) const
998 {
999 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1000 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1001 if ( invalidRebaseState(diag, opcodeName, path, leInfo, segments, segIndexSet, ptrSize, segmentIndex, segmentOffset, type) )
1002 stop = true;
1003 });
1004 return diag.noError();
1005 }
1006
1007
1008 void MachOAnalyzer::forEachTextRebase(Diagnostics& diag, void (^handler)(uint64_t runtimeOffset, bool& stop)) const
1009 {
1010 __block bool startVmAddrSet = false;
1011 __block uint64_t startVmAddr = 0;
1012 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1013 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1014 if ( type != REBASE_TYPE_TEXT_ABSOLUTE32 )
1015 return;
1016 if ( !startVmAddrSet ) {
1017 for (int i=0; i <= segmentIndex; ++i) {
1018 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1019 startVmAddr = segments[i].vmAddr;
1020 startVmAddrSet = true;
1021 break;
1022 }
1023 }
1024 }
1025 uint64_t rebaseVmAddr = segments[segmentIndex].vmAddr + segmentOffset;
1026 uint64_t runtimeOffset = rebaseVmAddr - startVmAddr;
1027 handler(runtimeOffset, stop);
1028 });
1029 }
1030
1031
1032 void MachOAnalyzer::forEachRebase(Diagnostics& diag, bool ignoreLazyPointers, void (^handler)(uint64_t runtimeOffset, bool& stop)) const
1033 {
1034 __block bool startVmAddrSet = false;
1035 __block uint64_t startVmAddr = 0;
1036 __block uint64_t lpVmAddr = 0;
1037 __block uint64_t lpEndVmAddr = 0;
1038 __block uint64_t shVmAddr = 0;
1039 __block uint64_t shEndVmAddr = 0;
1040 if ( ignoreLazyPointers ) {
1041 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
1042 if ( (info.sectFlags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS ) {
1043 lpVmAddr = info.sectAddr;
1044 lpEndVmAddr = info.sectAddr + info.sectSize;
1045 }
1046 else if ( (info.sectFlags & S_ATTR_PURE_INSTRUCTIONS) && (strcmp(info.sectName, "__stub_helper") == 0) ) {
1047 shVmAddr = info.sectAddr;
1048 shEndVmAddr = info.sectAddr + info.sectSize;
1049 }
1050 });
1051 }
1052 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1053 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1054 if ( type != REBASE_TYPE_POINTER )
1055 return;
1056 if ( !startVmAddrSet ) {
1057 for (int i=0; i < segmentIndex; ++i) {
1058 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1059 startVmAddr = segments[i].vmAddr;
1060 startVmAddrSet = true;
1061 break;
1062 }
1063 }
1064 }
1065 uint64_t rebaseVmAddr = segments[segmentIndex].vmAddr + segmentOffset;
1066 bool skipRebase = false;
1067 if ( (rebaseVmAddr >= lpVmAddr) && (rebaseVmAddr < lpEndVmAddr) ) {
1068 // rebase is in lazy pointer section
1069 uint64_t lpValue = 0;
1070 if ( ptrSize == 8 )
1071 lpValue = *((uint64_t*)(rebaseVmAddr-startVmAddr+(uint8_t*)this));
1072 else
1073 lpValue = *((uint32_t*)(rebaseVmAddr-startVmAddr+(uint8_t*)this));
1074 if ( (lpValue >= shVmAddr) && (lpValue < shEndVmAddr) ) {
1075 // content is into stub_helper section
1076 uint64_t lpTargetImageOffset = lpValue - startVmAddr;
1077 const uint8_t* helperContent = (uint8_t*)this + lpTargetImageOffset;
1078 bool isLazyStub = contentIsRegularStub(helperContent);
1079 // ignore rebases for normal lazy pointers, but leave rebase for resolver helper stub
1080 if ( isLazyStub )
1081 skipRebase = true;
1082 }
1083 else {
1084 // if lazy pointer does not point into stub_helper, then it points to weak-def symbol and we need rebase
1085 }
1086 }
1087 if ( !skipRebase ) {
1088 uint64_t runtimeOffset = rebaseVmAddr - startVmAddr;
1089 handler(runtimeOffset, stop);
1090 }
1091 });
1092 }
1093
1094
1095 bool MachOAnalyzer::contentIsRegularStub(const uint8_t* helperContent) const
1096 {
1097 switch (this->cputype) {
1098 case CPU_TYPE_X86_64:
1099 return ( (helperContent[0] == 0x68) && (helperContent[5] == 0xE9) ); // push $xxx / JMP pcRel
1100 case CPU_TYPE_I386:
1101 return ( (helperContent[0] == 0x68) && (helperContent[5] == 0xFF) && (helperContent[2] == 0x26) ); // push $xxx / JMP *pcRel
1102 case CPU_TYPE_ARM:
1103 return ( (helperContent[0] == 0x00) && (helperContent[1] == 0xC0) && (helperContent[2] == 0x9F) && (helperContent[3] == 0xE5) ); // ldr ip, [pc, #0]
1104 case CPU_TYPE_ARM64:
1105 return ( (helperContent[0] == 0x50) && (helperContent[1] == 0x00) && (helperContent[2] == 0x00) && (helperContent[3] == 0x18) ); // ldr w16, L0
1106
1107 }
1108 return false;
1109 }
1110
1111 static int uint32Sorter(const void* l, const void* r) {
1112 if ( *((uint32_t*)l) < *((uint32_t*)r) )
1113 return -1;
1114 else
1115 return 1;
1116 }
1117
1118
1119 void MachOAnalyzer::forEachRebase(Diagnostics& diag,
1120 void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1121 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1122 uint8_t type, bool& stop)) const
1123 {
1124 LinkEditInfo leInfo;
1125 getLinkEditPointers(diag, leInfo);
1126 if ( diag.hasError() )
1127 return;
1128
1129 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1130 getAllSegmentsInfos(diag, segmentsInfo);
1131 if ( diag.hasError() )
1132 return;
1133
1134 if ( leInfo.dyldInfo != nullptr ) {
1135 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->rebase_off);
1136 const uint8_t* end = p + leInfo.dyldInfo->rebase_size;
1137 const uint32_t ptrSize = pointerSize();
1138 uint8_t type = 0;
1139 int segIndex = 0;
1140 uint64_t segOffset = 0;
1141 uint64_t count;
1142 uint64_t skip;
1143 bool segIndexSet = false;
1144 bool stop = false;
1145 while ( !stop && diag.noError() && (p < end) ) {
1146 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
1147 uint8_t opcode = *p & REBASE_OPCODE_MASK;
1148 ++p;
1149 switch (opcode) {
1150 case REBASE_OPCODE_DONE:
1151 stop = true;
1152 break;
1153 case REBASE_OPCODE_SET_TYPE_IMM:
1154 type = immediate;
1155 break;
1156 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1157 segIndex = immediate;
1158 segOffset = read_uleb128(diag, p, end);
1159 segIndexSet = true;
1160 break;
1161 case REBASE_OPCODE_ADD_ADDR_ULEB:
1162 segOffset += read_uleb128(diag, p, end);
1163 break;
1164 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
1165 segOffset += immediate*ptrSize;
1166 break;
1167 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
1168 for (int i=0; i < immediate; ++i) {
1169 handler("REBASE_OPCODE_DO_REBASE_IMM_TIMES", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1170 segOffset += ptrSize;
1171 if ( stop )
1172 break;
1173 }
1174 break;
1175 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
1176 count = read_uleb128(diag, p, end);
1177 for (uint32_t i=0; i < count; ++i) {
1178 handler("REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1179 segOffset += ptrSize;
1180 if ( stop )
1181 break;
1182 }
1183 break;
1184 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
1185 handler("REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1186 segOffset += read_uleb128(diag, p, end) + ptrSize;
1187 break;
1188 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
1189 count = read_uleb128(diag, p, end);
1190 if ( diag.hasError() )
1191 break;
1192 skip = read_uleb128(diag, p, end);
1193 for (uint32_t i=0; i < count; ++i) {
1194 handler("REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1195 segOffset += skip + ptrSize;
1196 if ( stop )
1197 break;
1198 }
1199 break;
1200 default:
1201 diag.error("unknown rebase opcode 0x%02X", opcode);
1202 }
1203 }
1204 }
1205 else {
1206 // old binary, walk relocations
1207 const uint64_t relocsStartAddress = relocBaseAddress(segmentsInfo, leInfo.layout.linkeditSegIndex);
1208 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->locreloff);
1209 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nlocrel];
1210 bool stop = false;
1211 const uint8_t relocSize = (is64() ? 3 : 2);
1212 const uint8_t ptrSize = pointerSize();
1213 STACK_ALLOC_OVERFLOW_SAFE_ARRAY(uint32_t, relocAddrs, 2048);
1214 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
1215 if ( reloc->r_length != relocSize ) {
1216 diag.error("local relocation has wrong r_length");
1217 break;
1218 }
1219 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
1220 diag.error("local relocation has wrong r_type");
1221 break;
1222 }
1223 relocAddrs.push_back(reloc->r_address);
1224 }
1225 if ( !relocAddrs.empty() ) {
1226 ::qsort(&relocAddrs[0], relocAddrs.count(), sizeof(uint32_t), &uint32Sorter);
1227 for (uint32_t addrOff : relocAddrs) {
1228 uint32_t segIndex = 0;
1229 uint64_t segOffset = 0;
1230 if ( segIndexAndOffsetForAddress(relocsStartAddress+addrOff, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1231 uint8_t type = REBASE_TYPE_POINTER;
1232 if ( this->cputype == CPU_TYPE_I386 ) {
1233 if ( segmentsInfo[segIndex].executable() )
1234 type = REBASE_TYPE_TEXT_ABSOLUTE32;
1235 }
1236 handler("local relocation", leInfo, segmentsInfo, true, ptrSize, segIndex, segOffset, type , stop);
1237 }
1238 else {
1239 diag.error("local relocation has out of range r_address");
1240 break;
1241 }
1242 }
1243 }
1244 // then process indirect symbols
1245 forEachIndirectPointer(diag, ^(uint64_t address, bool bind, int bindLibOrdinal,
1246 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
1247 if ( bind )
1248 return;
1249 uint32_t segIndex = 0;
1250 uint64_t segOffset = 0;
1251 if ( segIndexAndOffsetForAddress(address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1252 handler("local relocation", leInfo, segmentsInfo, true, ptrSize, segIndex, segOffset, REBASE_TYPE_POINTER, indStop);
1253 }
1254 else {
1255 diag.error("local relocation has out of range r_address");
1256 indStop = true;
1257 }
1258 });
1259 }
1260 }
1261
1262 bool MachOAnalyzer::segIndexAndOffsetForAddress(uint64_t addr, const SegmentInfo segmentsInfos[], uint32_t segCount, uint32_t& segIndex, uint64_t& segOffset) const
1263 {
1264 for (uint32_t i=0; i < segCount; ++i) {
1265 if ( (segmentsInfos[i].vmAddr <= addr) && (addr < segmentsInfos[i].vmAddr+segmentsInfos[i].vmSize) ) {
1266 segIndex = i;
1267 segOffset = addr - segmentsInfos[i].vmAddr;
1268 return true;
1269 }
1270 }
1271 return false;
1272 }
1273
1274 uint64_t MachOAnalyzer::relocBaseAddress(const SegmentInfo segmentsInfos[], uint32_t segCount) const
1275 {
1276 if ( is64() ) {
1277 // x86_64 reloc base address is first writable segment
1278 for (uint32_t i=0; i < segCount; ++i) {
1279 if ( segmentsInfos[i].writable() )
1280 return segmentsInfos[i].vmAddr;
1281 }
1282 }
1283 return segmentsInfos[0].vmAddr;
1284 }
1285
1286
1287
1288 void MachOAnalyzer::forEachIndirectPointer(Diagnostics& diag, void (^handler)(uint64_t pointerAddress, bool bind, int bindLibOrdinal, const char* bindSymbolName,
1289 bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& stop)) const
1290 {
1291 LinkEditInfo leInfo;
1292 getLinkEditPointers(diag, leInfo);
1293 if ( diag.hasError() )
1294 return;
1295
1296 // find lazy and non-lazy pointer sections
1297 const bool is64Bit = is64();
1298 const uint32_t* const indirectSymbolTable = (uint32_t*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->indirectsymoff);
1299 const uint32_t indirectSymbolTableCount = leInfo.dynSymTab->nindirectsyms;
1300 const uint32_t ptrSize = pointerSize();
1301 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
1302 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
1303 const struct nlist* symbols32 = (struct nlist*)symbolTable;
1304 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1305 uint32_t symCount = leInfo.symTab->nsyms;
1306 uint32_t poolSize = leInfo.symTab->strsize;
1307 __block bool stop = false;
1308 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectInfo, bool malformedSectionRange, bool& sectionStop) {
1309 uint8_t sectionType = (sectInfo.sectFlags & SECTION_TYPE);
1310 bool selfModifyingStub = (sectionType == S_SYMBOL_STUBS) && (sectInfo.sectFlags & S_ATTR_SELF_MODIFYING_CODE) && (sectInfo.reserved2 == 5) && (this->cputype == CPU_TYPE_I386);
1311 if ( (sectionType != S_LAZY_SYMBOL_POINTERS) && (sectionType != S_NON_LAZY_SYMBOL_POINTERS) && !selfModifyingStub )
1312 return;
1313 if ( (flags & S_ATTR_SELF_MODIFYING_CODE) && !selfModifyingStub ) {
1314 diag.error("S_ATTR_SELF_MODIFYING_CODE section type only valid in old i386 binaries");
1315 sectionStop = true;
1316 return;
1317 }
1318 uint32_t elementSize = selfModifyingStub ? sectInfo.reserved2 : ptrSize;
1319 uint32_t elementCount = (uint32_t)(sectInfo.sectSize/elementSize);
1320 if ( greaterThanAddOrOverflow(sectInfo.reserved1, elementCount, indirectSymbolTableCount) ) {
1321 diag.error("section %s overflows indirect symbol table", sectInfo.sectName);
1322 sectionStop = true;
1323 return;
1324 }
1325
1326 for (uint32_t i=0; (i < elementCount) && !stop; ++i) {
1327 uint32_t symNum = indirectSymbolTable[sectInfo.reserved1 + i];
1328 if ( symNum == INDIRECT_SYMBOL_ABS )
1329 continue;
1330 if ( symNum == INDIRECT_SYMBOL_LOCAL ) {
1331 handler(sectInfo.sectAddr+i*elementSize, false, 0, "", false, false, false, stop);
1332 continue;
1333 }
1334 if ( symNum > symCount ) {
1335 diag.error("indirect symbol[%d] = %d which is invalid symbol index", sectInfo.reserved1 + i, symNum);
1336 sectionStop = true;
1337 return;
1338 }
1339 uint16_t n_desc = is64Bit ? symbols64[symNum].n_desc : symbols32[symNum].n_desc;
1340 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
1341 uint32_t strOffset = is64Bit ? symbols64[symNum].n_un.n_strx : symbols32[symNum].n_un.n_strx;
1342 if ( strOffset > poolSize ) {
1343 diag.error("symbol[%d] string offset out of range", sectInfo.reserved1 + i);
1344 sectionStop = true;
1345 return;
1346 }
1347 const char* symbolName = stringPool + strOffset;
1348 bool weakImport = (n_desc & N_WEAK_REF);
1349 bool lazy = (sectionType == S_LAZY_SYMBOL_POINTERS);
1350 handler(sectInfo.sectAddr+i*elementSize, true, libOrdinal, symbolName, weakImport, lazy, selfModifyingStub, stop);
1351 }
1352 sectionStop = stop;
1353 });
1354 }
1355
1356 int MachOAnalyzer::libOrdinalFromDesc(uint16_t n_desc) const
1357 {
1358 // -flat_namespace is always flat lookup
1359 if ( (this->flags & MH_TWOLEVEL) == 0 )
1360 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
1361
1362 // extract byte from undefined symbol entry
1363 int libIndex = GET_LIBRARY_ORDINAL(n_desc);
1364 switch ( libIndex ) {
1365 case SELF_LIBRARY_ORDINAL:
1366 return BIND_SPECIAL_DYLIB_SELF;
1367
1368 case DYNAMIC_LOOKUP_ORDINAL:
1369 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
1370
1371 case EXECUTABLE_ORDINAL:
1372 return BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
1373 }
1374
1375 return libIndex;
1376 }
1377
1378 bool MachOAnalyzer::validBindInfo(Diagnostics& diag, const char* path) const
1379 {
1380 forEachBind(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1381 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1382 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1383 uint8_t type, const char* symbolName, bool weakImport, uint64_t addend, bool& stop) {
1384 if ( invalidBindState(diag, opcodeName, path, leInfo, segments, segIndexSet, libraryOrdinalSet, dylibCount,
1385 libOrdinal, ptrSize, segmentIndex, segmentOffset, type, symbolName) ) {
1386 stop = true;
1387 }
1388 }, ^(const char* symbolName) {
1389 });
1390 return diag.noError();
1391 }
1392
1393 bool MachOAnalyzer::invalidBindState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1394 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint32_t ptrSize,
1395 uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, const char* symbolName) const
1396 {
1397 if ( !segIndexSet ) {
1398 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path, opcodeName);
1399 return true;
1400 }
1401 if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
1402 diag.error("in '%s' %s segment index %d too large", path, opcodeName, segmentIndex);
1403 return true;
1404 }
1405 if ( segmentOffset > (segments[segmentIndex].vmSize-ptrSize) ) {
1406 diag.error("in '%s' %s current segment offset 0x%08llX beyond segment size (0x%08llX)", path, opcodeName, segmentOffset, segments[segmentIndex].vmSize);
1407 return true;
1408 }
1409 if ( symbolName == NULL ) {
1410 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", path, opcodeName);
1411 return true;
1412 }
1413 if ( !libraryOrdinalSet ) {
1414 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL", path, opcodeName);
1415 return true;
1416 }
1417 if ( libOrdinal > (int)dylibCount ) {
1418 diag.error("in '%s' %s has library ordinal too large (%d) max (%d)", path, opcodeName, libOrdinal, dylibCount);
1419 return true;
1420 }
1421 if ( libOrdinal < BIND_SPECIAL_DYLIB_WEAK_DEF_COALESCE ) {
1422 diag.error("in '%s' %s has unknown library special ordinal (%d)", path, opcodeName, libOrdinal);
1423 return true;
1424 }
1425 switch ( type ) {
1426 case BIND_TYPE_POINTER:
1427 if ( !segments[segmentIndex].writable() ) {
1428 diag.error("in '%s' %s pointer bind is in non-writable segment", path, opcodeName);
1429 return true;
1430 }
1431 if ( segments[segmentIndex].executable() ) {
1432 diag.error("in '%s' %s pointer bind is in executable segment", path, opcodeName);
1433 return true;
1434 }
1435 break;
1436 case BIND_TYPE_TEXT_ABSOLUTE32:
1437 case BIND_TYPE_TEXT_PCREL32:
1438 if ( !segments[segmentIndex].textRelocs ) {
1439 diag.error("in '%s' %s text bind is in segment that does not support text relocations", path, opcodeName);
1440 return true;
1441 }
1442 if ( segments[segmentIndex].writable() ) {
1443 diag.error("in '%s' %s text bind is in writable segment", path, opcodeName);
1444 return true;
1445 }
1446 if ( !segments[segmentIndex].executable() ) {
1447 diag.error("in '%s' %s pointer bind is in non-executable segment", path, opcodeName);
1448 return true;
1449 }
1450 break;
1451 default:
1452 diag.error("in '%s' %s unknown bind type %d", path, opcodeName, type);
1453 return true;
1454 }
1455 return false;
1456 }
1457
1458 void MachOAnalyzer::forEachBind(Diagnostics& diag, void (^handler)(uint64_t runtimeOffset, int libOrdinal, const char* symbolName,
1459 bool weakImport, uint64_t addend, bool& stop),
1460 void (^strongHandler)(const char* symbolName)) const
1461 {
1462 __block bool startVmAddrSet = false;
1463 __block uint64_t startVmAddr = 0;
1464 forEachBind(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1465 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1466 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1467 uint8_t type, const char* symbolName, bool weakImport, uint64_t addend, bool& stop) {
1468 if ( !startVmAddrSet ) {
1469 for (int i=0; i <= segmentIndex; ++i) {
1470 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1471 startVmAddr = segments[i].vmAddr;
1472 startVmAddrSet = true;
1473 break;
1474 }
1475 }
1476 }
1477 uint64_t bindVmOffset = segments[segmentIndex].vmAddr + segmentOffset;
1478 uint64_t runtimeOffset = bindVmOffset - startVmAddr;
1479 handler(runtimeOffset, libOrdinal, symbolName, weakImport, addend, stop);
1480 }, ^(const char* symbolName) {
1481 strongHandler(symbolName);
1482 });
1483 }
1484
1485 void MachOAnalyzer::forEachBind(Diagnostics& diag,
1486 void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1487 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1488 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1489 uint8_t type, const char* symbolName, bool weakImport, uint64_t addend, bool& stop),
1490 void (^strongHandler)(const char* symbolName)) const
1491 {
1492 const uint32_t ptrSize = this->pointerSize();
1493 bool stop = false;
1494
1495 LinkEditInfo leInfo;
1496 getLinkEditPointers(diag, leInfo);
1497 if ( diag.hasError() )
1498 return;
1499
1500 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1501 getAllSegmentsInfos(diag, segmentsInfo);
1502 if ( diag.hasError() )
1503 return;
1504
1505
1506
1507 const uint32_t dylibCount = dependentDylibCount();
1508
1509 if ( leInfo.dyldInfo != nullptr ) {
1510 // process bind opcodes
1511 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
1512 const uint8_t* end = p + leInfo.dyldInfo->bind_size;
1513 uint8_t type = 0;
1514 uint64_t segmentOffset = 0;
1515 uint8_t segmentIndex = 0;
1516 const char* symbolName = NULL;
1517 int libraryOrdinal = 0;
1518 bool segIndexSet = false;
1519 bool libraryOrdinalSet = false;
1520
1521 int64_t addend = 0;
1522 uint64_t count;
1523 uint64_t skip;
1524 bool weakImport = false;
1525 while ( !stop && diag.noError() && (p < end) ) {
1526 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1527 uint8_t opcode = *p & BIND_OPCODE_MASK;
1528 ++p;
1529 switch (opcode) {
1530 case BIND_OPCODE_DONE:
1531 stop = true;
1532 break;
1533 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1534 libraryOrdinal = immediate;
1535 libraryOrdinalSet = true;
1536 break;
1537 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1538 libraryOrdinal = (int)read_uleb128(diag, p, end);
1539 libraryOrdinalSet = true;
1540 break;
1541 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1542 // the special ordinals are negative numbers
1543 if ( immediate == 0 )
1544 libraryOrdinal = 0;
1545 else {
1546 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1547 libraryOrdinal = signExtended;
1548 }
1549 libraryOrdinalSet = true;
1550 break;
1551 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1552 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1553 symbolName = (char*)p;
1554 while (*p != '\0')
1555 ++p;
1556 ++p;
1557 break;
1558 case BIND_OPCODE_SET_TYPE_IMM:
1559 type = immediate;
1560 break;
1561 case BIND_OPCODE_SET_ADDEND_SLEB:
1562 addend = read_sleb128(diag, p, end);
1563 break;
1564 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1565 segmentIndex = immediate;
1566 segmentOffset = read_uleb128(diag, p, end);
1567 segIndexSet = true;
1568 break;
1569 case BIND_OPCODE_ADD_ADDR_ULEB:
1570 segmentOffset += read_uleb128(diag, p, end);
1571 break;
1572 case BIND_OPCODE_DO_BIND:
1573 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1574 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1575 segmentOffset += ptrSize;
1576 break;
1577 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1578 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1579 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1580 segmentOffset += read_uleb128(diag, p, end) + ptrSize;
1581 break;
1582 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1583 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1584 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1585 segmentOffset += immediate*ptrSize + ptrSize;
1586 break;
1587 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1588 count = read_uleb128(diag, p, end);
1589 skip = read_uleb128(diag, p, end);
1590 for (uint32_t i=0; i < count; ++i) {
1591 handler("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1592 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1593 segmentOffset += skip + ptrSize;
1594 if ( stop )
1595 break;
1596 }
1597 break;
1598 default:
1599 diag.error("bad bind opcode 0x%02X", *p);
1600 }
1601 }
1602 if ( diag.hasError() )
1603 return;
1604
1605 // process lazy bind opcodes
1606 if ( leInfo.dyldInfo->lazy_bind_size != 0 ) {
1607 p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->lazy_bind_off);
1608 end = p + leInfo.dyldInfo->lazy_bind_size;
1609 type = BIND_TYPE_POINTER;
1610 segmentOffset = 0;
1611 segmentIndex = 0;
1612 symbolName = NULL;
1613 libraryOrdinal = 0;
1614 segIndexSet = false;
1615 libraryOrdinalSet= false;
1616 addend = 0;
1617 weakImport = false;
1618 stop = false;
1619 while ( !stop && diag.noError() && (p < end) ) {
1620 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1621 uint8_t opcode = *p & BIND_OPCODE_MASK;
1622 ++p;
1623 switch (opcode) {
1624 case BIND_OPCODE_DONE:
1625 // this opcode marks the end of each lazy pointer binding
1626 break;
1627 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1628 libraryOrdinal = immediate;
1629 libraryOrdinalSet = true;
1630 break;
1631 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1632 libraryOrdinal = (int)read_uleb128(diag, p, end);
1633 libraryOrdinalSet = true;
1634 break;
1635 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1636 // the special ordinals are negative numbers
1637 if ( immediate == 0 )
1638 libraryOrdinal = 0;
1639 else {
1640 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1641 libraryOrdinal = signExtended;
1642 }
1643 libraryOrdinalSet = true;
1644 break;
1645 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1646 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1647 symbolName = (char*)p;
1648 while (*p != '\0')
1649 ++p;
1650 ++p;
1651 break;
1652 case BIND_OPCODE_SET_ADDEND_SLEB:
1653 addend = read_sleb128(diag, p, end);
1654 break;
1655 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1656 segmentIndex = immediate;
1657 segmentOffset = read_uleb128(diag, p, end);
1658 segIndexSet = true;
1659 break;
1660 case BIND_OPCODE_DO_BIND:
1661 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1662 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1663 segmentOffset += ptrSize;
1664 break;
1665 case BIND_OPCODE_SET_TYPE_IMM:
1666 case BIND_OPCODE_ADD_ADDR_ULEB:
1667 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1668 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1669 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1670 default:
1671 diag.error("bad lazy bind opcode 0x%02X", opcode);
1672 break;
1673 }
1674 }
1675 }
1676 if ( diag.hasError() )
1677 return;
1678
1679 // process weak bind info
1680 if ( leInfo.dyldInfo->weak_bind_size != 0 ) {
1681 p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->weak_bind_off);
1682 end = p + leInfo.dyldInfo->weak_bind_size;
1683 type = BIND_TYPE_POINTER;
1684 segmentOffset = 0;
1685 segmentIndex = 0;
1686 symbolName = NULL;
1687 libraryOrdinal = BIND_SPECIAL_DYLIB_WEAK_DEF_COALESCE;
1688 segIndexSet = false;
1689 libraryOrdinalSet= true;
1690 addend = 0;
1691 weakImport = false;
1692 stop = false;
1693 while ( !stop && diag.noError() && (p < end) ) {
1694 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1695 uint8_t opcode = *p & BIND_OPCODE_MASK;
1696 ++p;
1697 switch (opcode) {
1698 case BIND_OPCODE_DONE:
1699 stop = true;
1700 break;
1701 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1702 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1703 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1704 diag.error("unexpected dylib ordinal in weak_bind");
1705 break;
1706 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1707 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1708 symbolName = (char*)p;
1709 while (*p != '\0')
1710 ++p;
1711 ++p;
1712 if ( immediate & BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION ) {
1713 strongHandler(symbolName);
1714 }
1715 break;
1716 case BIND_OPCODE_SET_TYPE_IMM:
1717 type = immediate;
1718 break;
1719 case BIND_OPCODE_SET_ADDEND_SLEB:
1720 addend = read_sleb128(diag, p, end);
1721 break;
1722 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1723 segmentIndex = immediate;
1724 segmentOffset = read_uleb128(diag, p, end);
1725 segIndexSet = true;
1726 break;
1727 case BIND_OPCODE_ADD_ADDR_ULEB:
1728 segmentOffset += read_uleb128(diag, p, end);
1729 break;
1730 case BIND_OPCODE_DO_BIND:
1731 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1732 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1733 segmentOffset += ptrSize;
1734 break;
1735 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1736 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1737 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1738 segmentOffset += read_uleb128(diag, p, end) + ptrSize;
1739 break;
1740 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1741 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1742 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1743 segmentOffset += immediate*ptrSize + ptrSize;
1744 break;
1745 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1746 count = read_uleb128(diag, p, end);
1747 skip = read_uleb128(diag, p, end);
1748 for (uint32_t i=0; i < count; ++i) {
1749 handler("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1750 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, addend, stop);
1751 segmentOffset += skip + ptrSize;
1752 if ( stop )
1753 break;
1754 }
1755 break;
1756 default:
1757 diag.error("bad bind opcode 0x%02X", *p);
1758 }
1759 }
1760 }
1761 }
1762 else {
1763 // old binary, process external relocations
1764 const uint64_t relocsStartAddress = relocBaseAddress(segmentsInfo, leInfo.layout.linkeditSegIndex);
1765 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->extreloff);
1766 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nextrel];
1767 bool is64Bit = is64() ;
1768 const uint8_t relocSize = (is64Bit ? 3 : 2);
1769 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
1770 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
1771 const struct nlist* symbols32 = (struct nlist*)symbolTable;
1772 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1773 uint32_t symCount = leInfo.symTab->nsyms;
1774 uint32_t poolSize = leInfo.symTab->strsize;
1775 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
1776 if ( reloc->r_length != relocSize ) {
1777 diag.error("external relocation has wrong r_length");
1778 break;
1779 }
1780 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
1781 diag.error("external relocation has wrong r_type");
1782 break;
1783 }
1784 uint32_t segIndex = 0;
1785 uint64_t segOffset = 0;
1786 if ( segIndexAndOffsetForAddress(relocsStartAddress+reloc->r_address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1787 uint32_t symbolIndex = reloc->r_symbolnum;
1788 if ( symbolIndex > symCount ) {
1789 diag.error("external relocation has out of range r_symbolnum");
1790 break;
1791 }
1792 else {
1793 uint32_t strOffset = is64Bit ? symbols64[symbolIndex].n_un.n_strx : symbols32[symbolIndex].n_un.n_strx;
1794 uint16_t n_desc = is64Bit ? symbols64[symbolIndex].n_desc : symbols32[symbolIndex].n_desc;
1795 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
1796 if ( strOffset >= poolSize ) {
1797 diag.error("external relocation has r_symbolnum=%d which has out of range n_strx", symbolIndex);
1798 break;
1799 }
1800 else {
1801 const char* symbolName = stringPool + strOffset;
1802 bool weakImport = (n_desc & N_WEAK_REF);
1803 const uint8_t* content = (uint8_t*)this + segmentsInfo[segIndex].vmAddr - leInfo.layout.textUnslidVMAddr + segOffset;
1804 uint64_t addend = is64Bit ? *((uint64_t*)content) : *((uint32_t*)content);
1805 handler("external relocation", leInfo, segmentsInfo, true, true, dylibCount, libOrdinal,
1806 ptrSize, segIndex, segOffset, BIND_TYPE_POINTER, symbolName, weakImport, addend, stop);
1807 }
1808 }
1809 }
1810 else {
1811 diag.error("local relocation has out of range r_address");
1812 break;
1813 }
1814 }
1815 // then process indirect symbols
1816 forEachIndirectPointer(diag, ^(uint64_t address, bool bind, int bindLibOrdinal,
1817 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
1818 if ( !bind )
1819 return;
1820 uint32_t segIndex = 0;
1821 uint64_t segOffset = 0;
1822 if ( segIndexAndOffsetForAddress(address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1823 handler("indirect symbol", leInfo, segmentsInfo, true, true, dylibCount, bindLibOrdinal,
1824 ptrSize, segIndex, segOffset, BIND_TYPE_POINTER, bindSymbolName, bindWeakImport, 0, indStop);
1825 }
1826 else {
1827 diag.error("indirect symbol has out of range address");
1828 indStop = true;
1829 }
1830 });
1831 }
1832
1833 }
1834
1835
1836 bool MachOAnalyzer::validChainedFixupsInfo(Diagnostics& diag, const char* path) const
1837 {
1838 __block uint32_t maxTargetCount = 0;
1839 __block uint32_t currentTargetCount = 0;
1840 forEachChainedFixup(diag,
1841 ^(uint32_t totalTargets, bool& stop) {
1842 maxTargetCount = totalTargets;
1843 },
1844 ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop) {
1845 if ( symbolName == NULL ) {
1846 diag.error("in '%s' missing BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", path);
1847 }
1848 else if ( !libraryOrdinalSet ) {
1849 diag.error("in '%s' missing BIND_OPCODE_SET_DYLIB_ORDINAL", path);
1850 }
1851 else if ( libOrdinal > (int)dylibCount ) {
1852 diag.error("in '%s' has library ordinal too large (%d) max (%d)", path, libOrdinal, dylibCount);
1853 }
1854 else if ( libOrdinal < BIND_SPECIAL_DYLIB_WEAK_DEF_COALESCE ) {
1855 diag.error("in '%s' has unknown library special ordinal (%d)", path, libOrdinal);
1856 }
1857 else if ( type != BIND_TYPE_POINTER ) {
1858 diag.error("in '%s' unknown bind type %d", path, type);
1859 }
1860 else if ( currentTargetCount > maxTargetCount ) {
1861 diag.error("in '%s' chained target counts exceeds BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB", path);
1862 }
1863 ++currentTargetCount;
1864 if ( diag.hasError() )
1865 stop = true;
1866 },
1867 ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, bool& stop) {
1868 if ( !segIndexSet ) {
1869 diag.error("in '%s' missing BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path);
1870 }
1871 else if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
1872 diag.error("in '%s' segment index %d too large", path, segmentIndex);
1873 }
1874 else if ( segmentOffset > (segments[segmentIndex].vmSize-8) ) {
1875 diag.error("in '%s' current segment offset 0x%08llX beyond segment size (0x%08llX)", path, segmentOffset, segments[segmentIndex].vmSize);
1876 }
1877 else if ( !segments[segmentIndex].writable() ) {
1878 diag.error("in '%s' pointer bind is in non-writable segment", path);
1879 }
1880 else if ( segments[segmentIndex].executable() ) {
1881 diag.error("in '%s' pointer bind is in executable segment", path);
1882 }
1883 if ( diag.hasError() )
1884 stop = true;
1885 }
1886 );
1887
1888 return diag.noError();
1889 }
1890
1891
1892 void MachOAnalyzer::forEachChainedFixup(Diagnostics& diag, void (^targetCount)(uint32_t totalTargets, bool& stop),
1893 void (^addTarget)(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop),
1894 void (^addChainStart)(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, bool& stop)) const
1895 {
1896 bool stop = false;
1897
1898 LinkEditInfo leInfo;
1899 getLinkEditPointers(diag, leInfo);
1900 if ( diag.hasError() )
1901 return;
1902
1903 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1904 getAllSegmentsInfos(diag, segmentsInfo);
1905 if ( diag.hasError() )
1906 return;
1907
1908 const uint32_t dylibCount = dependentDylibCount();
1909
1910 if ( leInfo.dyldInfo != nullptr ) {
1911 // process bind opcodes
1912 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
1913 const uint8_t* end = p + leInfo.dyldInfo->bind_size;
1914 uint8_t type = 0;
1915 uint64_t segmentOffset = 0;
1916 uint8_t segmentIndex = 0;
1917 const char* symbolName = NULL;
1918 int libraryOrdinal = 0;
1919 bool segIndexSet = false;
1920 bool libraryOrdinalSet = false;
1921 uint64_t targetTableCount;
1922 uint64_t addend = 0;
1923 bool weakImport = false;
1924 while ( !stop && diag.noError() && (p < end) ) {
1925 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1926 uint8_t opcode = *p & BIND_OPCODE_MASK;
1927 ++p;
1928 switch (opcode) {
1929 case BIND_OPCODE_DONE:
1930 stop = true;
1931 break;
1932 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1933 libraryOrdinal = immediate;
1934 libraryOrdinalSet = true;
1935 break;
1936 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1937 libraryOrdinal = (int)read_uleb128(diag, p, end);
1938 libraryOrdinalSet = true;
1939 break;
1940 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1941 // the special ordinals are negative numbers
1942 if ( immediate == 0 )
1943 libraryOrdinal = 0;
1944 else {
1945 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1946 libraryOrdinal = signExtended;
1947 }
1948 libraryOrdinalSet = true;
1949 break;
1950 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1951 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1952 symbolName = (char*)p;
1953 while (*p != '\0')
1954 ++p;
1955 ++p;
1956 break;
1957 case BIND_OPCODE_SET_TYPE_IMM:
1958 type = immediate;
1959 break;
1960 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1961 segmentIndex = immediate;
1962 segmentOffset = read_uleb128(diag, p, end);
1963 segIndexSet = true;
1964 break;
1965 case BIND_OPCODE_SET_ADDEND_SLEB:
1966 addend = read_sleb128(diag, p, end);
1967 break;
1968 case BIND_OPCODE_DO_BIND:
1969 if ( addTarget )
1970 addTarget(leInfo, segmentsInfo, libraryOrdinalSet, dylibCount, libraryOrdinal, type, symbolName, addend, weakImport, stop);
1971 break;
1972 case BIND_OPCODE_THREADED:
1973 switch (immediate) {
1974 case BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB:
1975 targetTableCount = read_uleb128(diag, p, end);
1976 if ( targetTableCount > 65535 ) {
1977 diag.error("BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB size too large");
1978 stop = true;
1979 }
1980 else {
1981 if ( targetCount )
1982 targetCount((uint32_t)targetTableCount, stop);
1983 }
1984 break;
1985 case BIND_SUBOPCODE_THREADED_APPLY:
1986 if ( addChainStart )
1987 addChainStart(leInfo, segmentsInfo, segmentIndex, segIndexSet, segmentOffset, stop);
1988 break;
1989 default:
1990 diag.error("bad BIND_OPCODE_THREADED sub-opcode 0x%02X", immediate);
1991 }
1992 break;
1993 default:
1994 diag.error("bad bind opcode 0x%02X", immediate);
1995 }
1996 }
1997 if ( diag.hasError() )
1998 return;
1999 }
2000 }
2001
2002 void MachOAnalyzer::forEachChainedFixupStart(Diagnostics& diag, void (^callback)(uint64_t runtimeOffset, bool& stop)) const
2003 {
2004 __block bool startVmAddrSet = false;
2005 __block uint64_t startVmAddr = 0;
2006 forEachChainedFixup(diag, nullptr, nullptr, ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, bool& stop) {
2007 if ( !startVmAddrSet ) {
2008 for (int i=0; i <= segmentIndex; ++i) {
2009 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
2010 startVmAddr = segments[i].vmAddr;
2011 startVmAddrSet = true;
2012 break;
2013 }
2014 }
2015 }
2016 uint64_t startVmOffset = segments[segmentIndex].vmAddr + segmentOffset;
2017 uint64_t runtimeOffset = startVmOffset - startVmAddr;
2018 callback((uint32_t)runtimeOffset, stop);
2019 });
2020 }
2021
2022 void MachOAnalyzer::forEachChainedFixupTarget(Diagnostics& diag, void (^callback)(int libOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop)) const
2023 {
2024 forEachChainedFixup(diag, nullptr, ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount,
2025 int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop){
2026 callback(libOrdinal, symbolName, addend, weakImport, stop);
2027 }, nullptr);
2028 }
2029
2030 uint32_t MachOAnalyzer::segmentCount() const
2031 {
2032 __block uint32_t count = 0;
2033 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2034 ++count;
2035 });
2036 return count;
2037 }
2038
2039 bool MachOAnalyzer::hasCodeSignature(uint32_t& fileOffset, uint32_t& size) const
2040 {
2041 fileOffset = 0;
2042 size = 0;
2043
2044 Diagnostics diag;
2045 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2046 if ( cmd->cmd == LC_CODE_SIGNATURE ) {
2047 const linkedit_data_command* sigCmd = (linkedit_data_command*)cmd;
2048 fileOffset = sigCmd->dataoff;
2049 size = sigCmd->datasize;
2050 stop = true;
2051 }
2052 });
2053 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2054
2055 // early exist if no LC_CODE_SIGNATURE
2056 if ( fileOffset == 0 )
2057 return false;
2058
2059 // <rdar://problem/13622786> ignore code signatures in macOS binaries built with pre-10.9 tools
2060 if ( (this->cputype == CPU_TYPE_X86_64) || (this->cputype == CPU_TYPE_I386) ) {
2061 __block bool foundPlatform = false;
2062 __block bool badSignature = false;
2063 forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) {
2064 foundPlatform = true;
2065 if ( (platform == Platform::macOS) && (sdk < 0x000A0900) )
2066 badSignature = true;
2067 });
2068 return foundPlatform && !badSignature;
2069 }
2070
2071 return true;
2072 }
2073
2074 bool MachOAnalyzer::hasInitializer(Diagnostics& diag, bool contentRebased, const void* dyldCache) const
2075 {
2076 __block bool result = false;
2077 forEachInitializer(diag, contentRebased, ^(uint32_t offset) {
2078 result = true;
2079 }, dyldCache);
2080 return result;
2081 }
2082
2083 void MachOAnalyzer::forEachInitializer(Diagnostics& diag, bool contentRebased, void (^callback)(uint32_t offset), const void* dyldCache) const
2084 {
2085 __block uint64_t prefTextSegAddrStart = 0;
2086 __block uint64_t prefTextSegAddrEnd = 0;
2087
2088 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2089 if ( strcmp(info.segName, "__TEXT") == 0 ) {
2090 prefTextSegAddrStart = info.vmAddr;
2091 prefTextSegAddrEnd = info.vmAddr + info.vmSize;
2092 stop = true;
2093 }
2094 });
2095 if ( prefTextSegAddrStart == prefTextSegAddrEnd ) {
2096 diag.error("no __TEXT segment");
2097 return;
2098 }
2099 uint64_t slide = (long)this - prefTextSegAddrStart;
2100
2101 // if dylib linked with -init linker option, that initializer is first
2102 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2103 if ( cmd->cmd == LC_ROUTINES ) {
2104 const routines_command* routines = (routines_command*)cmd;
2105 uint64_t dashInit = routines->init_address;
2106 if ( (prefTextSegAddrStart < dashInit) && (dashInit < prefTextSegAddrEnd) )
2107 callback((uint32_t)(dashInit - prefTextSegAddrStart));
2108 else
2109 diag.error("-init does not point within __TEXT segment");
2110 }
2111 else if ( cmd->cmd == LC_ROUTINES_64 ) {
2112 const routines_command_64* routines = (routines_command_64*)cmd;
2113 uint64_t dashInit = routines->init_address;
2114 if ( (prefTextSegAddrStart < dashInit) && (dashInit < prefTextSegAddrEnd) )
2115 callback((uint32_t)(dashInit - prefTextSegAddrStart));
2116 else
2117 diag.error("-init does not point within __TEXT segment");
2118 }
2119 });
2120
2121 // next any function pointers in mod-init section
2122 unsigned ptrSize = pointerSize();
2123 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2124 if ( (info.sectFlags & SECTION_TYPE) == S_MOD_INIT_FUNC_POINTERS ) {
2125 const uint8_t* content;
2126 content = (uint8_t*)(info.sectAddr + slide);
2127 if ( (info.sectSize % ptrSize) != 0 ) {
2128 diag.error("initializer section %s/%s has bad size", info.segInfo.segName, info.sectName);
2129 stop = true;
2130 return;
2131 }
2132 if ( malformedSectionRange ) {
2133 diag.error("initializer section %s/%s extends beyond its segment", info.segInfo.segName, info.sectName);
2134 stop = true;
2135 return;
2136 }
2137 if ( ((long)content % ptrSize) != 0 ) {
2138 diag.error("initializer section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2139 stop = true;
2140 return;
2141 }
2142 if ( ptrSize == 8 ) {
2143 const uint64_t* initsStart = (uint64_t*)content;
2144 const uint64_t* initsEnd = (uint64_t*)((uint8_t*)content + info.sectSize);
2145 for (const uint64_t* p=initsStart; p < initsEnd; ++p) {
2146 uint64_t anInit = *p;
2147 if ( contentRebased )
2148 anInit -= slide;
2149 if ( hasChainedFixups() ) {
2150 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2151 if ( aChainedInit->authBind.bind )
2152 diag.error("initializer uses bind");
2153 if ( aChainedInit->authRebase.auth ) {
2154 anInit = aChainedInit->authRebase.target;
2155 }
2156 else {
2157 anInit = aChainedInit->plainRebase.signExtendedTarget();
2158 }
2159 }
2160 if ( (anInit <= prefTextSegAddrStart) || (anInit > prefTextSegAddrEnd) ) {
2161 diag.error("initializer 0x%0llX does not point within __TEXT segment", anInit);
2162 stop = true;
2163 break;
2164 }
2165 callback((uint32_t)(anInit - prefTextSegAddrStart));
2166 }
2167 }
2168 else {
2169 const uint32_t* initsStart = (uint32_t*)content;
2170 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + info.sectSize);
2171 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2172 uint32_t anInit = *p;
2173 if ( contentRebased )
2174 anInit -= slide;
2175 if ( (anInit <= prefTextSegAddrStart) || (anInit > prefTextSegAddrEnd) ) {
2176 diag.error("initializer 0x%0X does not point within __TEXT segment", anInit);
2177 stop = true;
2178 break;
2179 }
2180 callback(anInit - (uint32_t)prefTextSegAddrStart);
2181 }
2182 }
2183 }
2184 });
2185 }
2186
2187
2188 void MachOAnalyzer::forEachRPath(void (^callback)(const char* rPath, bool& stop)) const
2189 {
2190 Diagnostics diag;
2191 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2192 if ( cmd->cmd == LC_RPATH ) {
2193 const char* rpath = (char*)cmd + ((struct rpath_command*)cmd)->path.offset;
2194 callback(rpath, stop);
2195 }
2196 });
2197 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2198 }
2199
2200
2201 bool MachOAnalyzer::hasObjC() const
2202 {
2203 __block bool result = false;
2204 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2205 if ( (strcmp(info.sectName, "__objc_imageinfo") == 0) && (strncmp(info.segInfo.segName, "__DATA", 6) == 0) ) {
2206 result = true;
2207 stop = true;
2208 }
2209 if ( (this->cputype == CPU_TYPE_I386) && (strcmp(info.sectName, "__image_info") == 0) && (strcmp(info.segInfo.segName, "__OBJC") == 0) ) {
2210 result = true;
2211 stop = true;
2212 }
2213 });
2214 return result;
2215 }
2216
2217 bool MachOAnalyzer::hasPlusLoadMethod(Diagnostics& diag) const
2218 {
2219 __block bool result = false;
2220 if ( (this->cputype == CPU_TYPE_I386) && supportsPlatform(Platform::macOS) ) {
2221 // old objc runtime has no special section for +load methods, scan for string
2222 int64_t slide = getSlide();
2223 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2224 if ( ( (info.sectFlags & SECTION_TYPE) == S_CSTRING_LITERALS ) ) {
2225 if ( malformedSectionRange ) {
2226 diag.error("cstring section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2227 stop = true;
2228 return;
2229 }
2230 const uint8_t* content = (uint8_t*)(info.sectAddr + slide);
2231 const char* s = (char*)content;
2232 const char* end = s + info.sectSize;
2233 while ( s < end ) {
2234 if ( strcmp(s, "load") == 0 ) {
2235 result = true;
2236 stop = true;
2237 return;
2238 }
2239 while (*s != '\0' )
2240 ++s;
2241 ++s;
2242 }
2243 }
2244 });
2245 }
2246 else {
2247 // in new objc runtime compiler puts classes/categories with +load method in specical section
2248 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2249 if ( strncmp(info.segInfo.segName, "__DATA", 6) != 0 )
2250 return;
2251 if ( (strcmp(info.sectName, "__objc_nlclslist") == 0) || (strcmp(info.sectName, "__objc_nlcatlist") == 0)) {
2252 result = true;
2253 stop = true;
2254 }
2255 });
2256 }
2257 return result;
2258 }
2259
2260 const void* MachOAnalyzer::getRebaseOpcodes(uint32_t& size) const
2261 {
2262 Diagnostics diag;
2263 LinkEditInfo leInfo;
2264 getLinkEditPointers(diag, leInfo);
2265 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2266 return nullptr;
2267
2268 size = leInfo.dyldInfo->rebase_size;
2269 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->rebase_off);
2270 }
2271
2272 const void* MachOAnalyzer::getBindOpcodes(uint32_t& size) const
2273 {
2274 Diagnostics diag;
2275 LinkEditInfo leInfo;
2276 getLinkEditPointers(diag, leInfo);
2277 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2278 return nullptr;
2279
2280 size = leInfo.dyldInfo->bind_size;
2281 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
2282 }
2283
2284 const void* MachOAnalyzer::getLazyBindOpcodes(uint32_t& size) const
2285 {
2286 Diagnostics diag;
2287 LinkEditInfo leInfo;
2288 getLinkEditPointers(diag, leInfo);
2289 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2290 return nullptr;
2291
2292 size = leInfo.dyldInfo->lazy_bind_size;
2293 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->lazy_bind_off);
2294 }
2295
2296
2297 uint64_t MachOAnalyzer::segAndOffsetToRuntimeOffset(uint8_t targetSegIndex, uint64_t targetSegOffset) const
2298 {
2299 __block uint64_t textVmAddr = 0;
2300 __block uint64_t result = 0;
2301 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2302 if ( strcmp(info.segName, "__TEXT") == 0 )
2303 textVmAddr = info.vmAddr;
2304 if ( info.segIndex == targetSegIndex ) {
2305 result = (info.vmAddr - textVmAddr) + targetSegOffset;
2306 }
2307 });
2308 return result;
2309 }
2310
2311 bool MachOAnalyzer::hasLazyPointers(uint32_t& runtimeOffset, uint32_t& size) const
2312 {
2313 size = 0;
2314 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2315 if ( (info.sectFlags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS ) {
2316 runtimeOffset = (uint32_t)(info.sectAddr - preferredLoadAddress());
2317 size = (uint32_t)info.sectSize;
2318 stop = true;
2319 }
2320 });
2321 return (size != 0);
2322 }
2323
2324 uint64_t MachOAnalyzer::preferredLoadAddress() const
2325 {
2326 __block uint64_t textVmAddr = 0;
2327 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2328 if ( strcmp(info.segName, "__TEXT") == 0 ) {
2329 textVmAddr = info.vmAddr;
2330 stop = true;
2331 }
2332 });
2333 return textVmAddr;
2334 }
2335
2336
2337 bool MachOAnalyzer::getEntry(uint32_t& offset, bool& usesCRT) const
2338 {
2339 Diagnostics diag;
2340 offset = 0;
2341 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2342 if ( cmd->cmd == LC_MAIN ) {
2343 entry_point_command* mainCmd = (entry_point_command*)cmd;
2344 usesCRT = false;
2345 offset = (uint32_t)mainCmd->entryoff;
2346 stop = true;
2347 }
2348 else if ( cmd->cmd == LC_UNIXTHREAD ) {
2349 stop = true;
2350 usesCRT = true;
2351 uint64_t startAddress = entryAddrFromThreadCmd((thread_command*)cmd);
2352 offset = (uint32_t)(startAddress - preferredLoadAddress());
2353 }
2354 });
2355 return (offset != 0);
2356 }
2357
2358 uint64_t MachOAnalyzer::entryAddrFromThreadCmd(const thread_command* cmd) const
2359 {
2360 assert(cmd->cmd == LC_UNIXTHREAD);
2361 const uint32_t* regs32 = (uint32_t*)(((char*)cmd) + 16);
2362 const uint64_t* regs64 = (uint64_t*)(((char*)cmd) + 16);
2363 uint64_t startAddress = 0;
2364 switch ( this->cputype ) {
2365 case CPU_TYPE_I386:
2366 startAddress = regs32[10]; // i386_thread_state_t.eip
2367 break;
2368 case CPU_TYPE_X86_64:
2369 startAddress = regs64[16]; // x86_thread_state64_t.rip
2370 break;
2371 }
2372 return startAddress;
2373 }
2374
2375
2376 void MachOAnalyzer::forEachInterposingSection(Diagnostics& diag, void (^handler)(uint64_t vmOffset, uint64_t vmSize, bool& stop)) const
2377 {
2378 const unsigned ptrSize = pointerSize();
2379 const unsigned entrySize = 2 * ptrSize;
2380 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2381 if ( ((info.sectFlags & SECTION_TYPE) == S_INTERPOSING) || ((strcmp(info.sectName, "__interpose") == 0) && (strcmp(info.segInfo.segName, "__DATA") == 0)) ) {
2382 if ( info.sectSize % entrySize != 0 ) {
2383 diag.error("interposing section %s/%s has bad size", info.segInfo.segName, info.sectName);
2384 stop = true;
2385 return;
2386 }
2387 if ( malformedSectionRange ) {
2388 diag.error("interposing section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2389 stop = true;
2390 return;
2391 }
2392 if ( (info.sectAddr % ptrSize) != 0 ) {
2393 diag.error("interposing section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2394 stop = true;
2395 return;
2396 }
2397 handler(info.sectAddr - preferredLoadAddress(), info.sectSize, stop);
2398 }
2399 });
2400 }
2401
2402 void MachOAnalyzer::forEachDOFSection(Diagnostics& diag, void (^callback)(uint32_t offset)) const
2403 {
2404 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2405 if ( ( (info.sectFlags & SECTION_TYPE) == S_DTRACE_DOF ) && !malformedSectionRange ) {
2406 callback((uint32_t)(info.sectAddr - info.segInfo.vmAddr));
2407 }
2408 });
2409 }
2410
2411 bool MachOAnalyzer::getCDHash(uint8_t cdHash[20]) const
2412 {
2413 Diagnostics diag;
2414 LinkEditInfo leInfo;
2415 getLinkEditPointers(diag, leInfo);
2416 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2417 return false;
2418
2419 return cdHashOfCodeSignature(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff), leInfo.codeSig->datasize, cdHash);
2420 }
2421
2422 bool MachOAnalyzer::isRestricted() const
2423 {
2424 __block bool result = false;
2425 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2426 if ( (strcmp(info.segInfo.segName, "__RESTRICT") == 0) && (strcmp(info.sectName, "__restrict") == 0) ) {
2427 result = true;
2428 stop = true;
2429 }
2430 });
2431 return result;
2432 }
2433
2434 bool MachOAnalyzer::usesLibraryValidation() const
2435 {
2436 Diagnostics diag;
2437 LinkEditInfo leInfo;
2438 getLinkEditPointers(diag, leInfo);
2439 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2440 return false;
2441
2442 const CS_CodeDirectory* cd = (const CS_CodeDirectory*)findCodeDirectoryBlob(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff), leInfo.codeSig->datasize);
2443 if ( cd == nullptr )
2444 return false;
2445
2446 // check for CS_REQUIRE_LV in CS_CodeDirectory.flags
2447 return (htonl(cd->flags) & CS_REQUIRE_LV);
2448 }
2449
2450 bool MachOAnalyzer::canHavePrecomputedDlopenClosure(const char* path, void (^failureReason)(const char*)) const
2451 {
2452 __block bool retval = true;
2453
2454 // only dylibs can go in cache
2455 if ( (this->filetype != MH_DYLIB) && (this->filetype != MH_BUNDLE) ) {
2456 retval = false;
2457 failureReason("not MH_DYLIB or MH_BUNDLE");
2458 }
2459
2460 // flat namespace files cannot go in cache
2461 if ( (this->flags & MH_TWOLEVEL) == 0 ) {
2462 retval = false;
2463 failureReason("not built with two level namespaces");
2464 }
2465
2466 // can only depend on other dylibs with absolute paths
2467 __block bool allDepPathsAreGood = true;
2468 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
2469 if ( loadPath[0] != '/' ) {
2470 allDepPathsAreGood = false;
2471 stop = true;
2472 }
2473 });
2474 if ( !allDepPathsAreGood ) {
2475 retval = false;
2476 failureReason("depends on dylibs that are not absolute paths");
2477 }
2478
2479 // dylibs with interposing info cannot have dlopen closure pre-computed
2480 __block bool hasInterposing = false;
2481 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool &stop) {
2482 if ( ((info.sectFlags & SECTION_TYPE) == S_INTERPOSING) || ((strcmp(info.sectName, "__interpose") == 0) && (strcmp(info.segInfo.segName, "__DATA") == 0)) )
2483 hasInterposing = true;
2484 });
2485 if ( hasInterposing ) {
2486 retval = false;
2487 failureReason("has interposing tuples");
2488 }
2489
2490 // images that use dynamic_lookup, bundle_loader, or have weak-defs cannot have dlopen closure pre-computed
2491 Diagnostics diag;
2492 auto checkBind = ^(int libOrdinal, bool& stop) {
2493 switch (libOrdinal) {
2494 case BIND_SPECIAL_DYLIB_WEAK_DEF_COALESCE:
2495 failureReason("has weak externals");
2496 retval = false;
2497 stop = true;
2498 break;
2499 case BIND_SPECIAL_DYLIB_FLAT_LOOKUP:
2500 failureReason("has dynamic_lookup binds");
2501 retval = false;
2502 stop = true;
2503 break;
2504 case BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE:
2505 failureReason("has reference to main executable (bundle loader)");
2506 retval = false;
2507 stop = true;
2508 break;
2509 }
2510 };
2511
2512 if (hasChainedFixups()) {
2513 forEachChainedFixupTarget(diag, ^(int libOrdinal, const char *symbolName, uint64_t addend, bool weakImport, bool &stop) {
2514 checkBind(libOrdinal, stop);
2515 });
2516 } else {
2517 forEachBind(diag, ^(uint64_t runtimeOffset, int libOrdinal, const char* symbolName, bool weakImport, uint64_t addend, bool& stop) {
2518 checkBind(libOrdinal, stop);
2519 },
2520 ^(const char* symbolName) {
2521 });
2522 }
2523
2524 // special system dylib overrides cannot have closure pre-computed
2525 if ( strncmp(path, "/usr/lib/system/introspection/", 30) == 0 ) {
2526 retval = false;
2527 failureReason("override of OS dylib");
2528 }
2529
2530 return retval;
2531 }
2532
2533 bool MachOAnalyzer::canBePlacedInDyldCache(const char* path, void (^failureReason)(const char*)) const
2534 {
2535 if (!MachOFile::canBePlacedInDyldCache(path, failureReason))
2536 return false;
2537 if ( !(isArch("x86_64") || isArch("x86_64h")) )
2538 return true;
2539
2540 // Kick dylibs out of the x86_64 cache if they are using TBI.
2541 __block bool rebasesOk = true;
2542 Diagnostics diag;
2543 uint64_t startVMAddr = preferredLoadAddress();
2544 uint64_t endVMAddr = startVMAddr + mappedSize();
2545 forEachRebase(diag, false, ^(uint64_t runtimeOffset, bool &stop) {
2546 uint64_t value = *(uint64_t*)((uint8_t*)this + runtimeOffset);
2547 if ( (value < startVMAddr) || (value >= endVMAddr) ) {
2548 failureReason("rebase value out of range of dylib");
2549 rebasesOk = false;
2550 stop = true;
2551 }
2552 });
2553 return rebasesOk;
2554 }
2555
2556 } // dyld3
2557
2558