dyld-750.5.tar.gz
[apple/dyld.git] / dyld3 / MachOAnalyzer.cpp
1 /*
2 * Copyright (c) 2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 #include <sys/types.h>
25 #include <mach/mach.h>
26 #include <assert.h>
27 #include <limits.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <mach-o/reloc.h>
32 #include <mach-o/nlist.h>
33 #include <TargetConditionals.h>
34
35 #include "MachOAnalyzer.h"
36 #include "CodeSigningTypes.h"
37 #include "Array.h"
38
39
40 namespace dyld3 {
41
42
43 const MachOAnalyzer* MachOAnalyzer::validMainExecutable(Diagnostics& diag, const mach_header* mh, const char* path, uint64_t sliceLength,
44 const GradedArchs& archs, Platform platform)
45 {
46 const MachOAnalyzer* result = (const MachOAnalyzer*)mh;
47 if ( !result->validMachOForArchAndPlatform(diag, (size_t)sliceLength, path, archs, platform) )
48 return nullptr;
49 if ( !result->isDynamicExecutable() )
50 return nullptr;
51
52 return result;
53 }
54
55
56 closure::LoadedFileInfo MachOAnalyzer::load(Diagnostics& diag, const closure::FileSystem& fileSystem,
57 const char* path, const GradedArchs& archs, Platform platform, char realerPath[MAXPATHLEN])
58 {
59 // FIXME: This should probably be an assert, but if we happen to have a diagnostic here then something is wrong
60 // above us and we should quickly return instead of doing unnecessary work.
61 if (diag.hasError())
62 return closure::LoadedFileInfo();
63
64 closure::LoadedFileInfo info;
65 if (!fileSystem.loadFile(path, info, realerPath, ^(const char *format, ...) {
66 va_list list;
67 va_start(list, format);
68 diag.error(format, list);
69 va_end(list);
70 })) {
71 return closure::LoadedFileInfo();
72 }
73
74 // If we now have an error, but succeeded, then we must have tried multiple paths, one of which errored, but
75 // then succeeded on a later path. So clear the error.
76 if (diag.hasError())
77 diag.clearError();
78
79 // if fat, remap just slice needed
80 bool fatButMissingSlice;
81 const FatFile* fh = (FatFile*)info.fileContent;
82 uint64_t sliceOffset = info.sliceOffset;
83 uint64_t sliceLen = info.sliceLen;
84 if ( fh->isFatFileWithSlice(diag, info.fileContentLen, archs, sliceOffset, sliceLen, fatButMissingSlice) ) {
85 // unmap anything before slice
86 fileSystem.unloadPartialFile(info, sliceOffset, sliceLen);
87 // Update the info to keep track of the new slice offset.
88 info.sliceOffset = sliceOffset;
89 info.sliceLen = sliceLen;
90 }
91 else if ( diag.hasError() ) {
92 // We must have generated an error in the fat file parsing so use that error
93 fileSystem.unloadFile(info);
94 return closure::LoadedFileInfo();
95 }
96 else if ( fatButMissingSlice ) {
97 diag.error("missing compatible arch in %s", path);
98 fileSystem.unloadFile(info);
99 return closure::LoadedFileInfo();
100 }
101
102 const MachOAnalyzer* mh = (MachOAnalyzer*)info.fileContent;
103
104 // validate is mach-o of requested arch and platform
105 if ( !mh->validMachOForArchAndPlatform(diag, (size_t)info.sliceLen, path, archs, platform) ) {
106 fileSystem.unloadFile(info);
107 return closure::LoadedFileInfo();
108 }
109
110 // if has zero-fill expansion, re-map
111 mh = mh->remapIfZeroFill(diag, fileSystem, info);
112
113 // on error, remove mappings and return nullptr
114 if ( diag.hasError() ) {
115 fileSystem.unloadFile(info);
116 return closure::LoadedFileInfo();
117 }
118
119 // now that LINKEDIT is at expected offset, finish validation
120 mh->validLinkedit(diag, path);
121
122 // on error, remove mappings and return nullptr
123 if ( diag.hasError() ) {
124 fileSystem.unloadFile(info);
125 return closure::LoadedFileInfo();
126 }
127
128 return info;
129 }
130
131 #if DEBUG
132 // only used in debug builds of cache builder to verify segment moves are valid
133 void MachOAnalyzer::validateDyldCacheDylib(Diagnostics& diag, const char* path) const
134 {
135 validLinkedit(diag, path);
136 validSegments(diag, path, 0xffffffff);
137 }
138 #endif
139
140 uint64_t MachOAnalyzer::mappedSize() const
141 {
142 uint64_t vmSpace;
143 bool hasZeroFill;
144 analyzeSegmentsLayout(vmSpace, hasZeroFill);
145 return vmSpace;
146 }
147
148 bool MachOAnalyzer::validMachOForArchAndPlatform(Diagnostics& diag, size_t sliceLength, const char* path, const GradedArchs& archs, Platform platform) const
149 {
150 // must start with mach-o magic value
151 if ( (this->magic != MH_MAGIC) && (this->magic != MH_MAGIC_64) ) {
152 diag.error("could not use '%s' because it is not a mach-o file: 0x%08X 0x%08X", path, this->magic, this->cputype);
153 return false;
154 }
155
156 if ( !archs.grade(this->cputype, this->cpusubtype) ) {
157 diag.error("could not use '%s' because it is not a compatible arch", path);
158 return false;
159 }
160
161 // must be a filetype dyld can load
162 switch ( this->filetype ) {
163 case MH_EXECUTE:
164 case MH_DYLIB:
165 case MH_BUNDLE:
166 case MH_DYLINKER:
167 break;
168 #if BUILDING_DYLDINFO
169 // Allow offline tools to analyze binaries dyld doesn't load
170 case MH_KEXT_BUNDLE:
171 break;
172 #endif
173 default:
174 diag.error("could not use '%s' because it is not a dylib, bundle, or executable, filetype=0x%08X", path, this->filetype);
175 return false;
176 }
177
178 // validate load commands structure
179 if ( !this->validLoadCommands(diag, path, sliceLength) ) {
180 return false;
181 }
182
183 // filter out static executables
184 if ( (this->filetype == MH_EXECUTE) && !isDynamicExecutable() ) {
185 #if !BUILDING_DYLDINFO
186 // dyldinfo should be able to inspect static executables such as the kernel
187 diag.error("could not use '%s' because it is a static executable", path);
188 return false;
189 #endif
190 }
191
192 // must match requested platform (do this after load commands are validated)
193 if ( !this->supportsPlatform(platform) ) {
194 diag.error("could not use '%s' because it was built for a different platform", path);
195 return false;
196 }
197
198 // validate dylib loads
199 if ( !validEmbeddedPaths(diag, platform, path) )
200 return false;
201
202 // validate segments
203 if ( !validSegments(diag, path, sliceLength) )
204 return false;
205
206 // validate entry
207 if ( this->filetype == MH_EXECUTE ) {
208 if ( !validMain(diag, path) )
209 return false;
210 }
211
212 // further validations done in validLinkedit()
213
214 return true;
215 }
216
217 bool MachOAnalyzer::validLinkedit(Diagnostics& diag, const char* path) const
218 {
219 // validate LINKEDIT layout
220 if ( !validLinkeditLayout(diag, path) )
221 return false;
222
223 if ( hasLoadCommand(LC_DYLD_CHAINED_FIXUPS) ) {
224 if ( !validChainedFixupsInfo(diag, path) )
225 return false;
226 }
227 #if SUPPORT_ARCH_arm64e
228 else if ( (this->cputype == CPU_TYPE_ARM64) && (this->cpusubtype == CPU_SUBTYPE_ARM64E) ) {
229 if ( !validChainedFixupsInfoOldArm64e(diag, path) )
230 return false;
231 }
232 #endif
233 else {
234 // validate rebasing info
235 if ( !validRebaseInfo(diag, path) )
236 return false;
237
238 // validate binding info
239 if ( !validBindInfo(diag, path) )
240 return false;
241 }
242
243 return true;
244 }
245
246 bool MachOAnalyzer::validLoadCommands(Diagnostics& diag, const char* path, size_t fileLen) const
247 {
248 // check load command don't exceed file length
249 if ( this->sizeofcmds + machHeaderSize() > fileLen ) {
250 diag.error("in '%s' load commands exceed length of file", path);
251 return false;
252 }
253
254 // walk all load commands and sanity check them
255 Diagnostics walkDiag;
256 forEachLoadCommand(walkDiag, ^(const load_command* cmd, bool& stop) {});
257 if ( walkDiag.hasError() ) {
258 #if BUILDING_CACHE_BUILDER
259 diag.error("in '%s' %s", path, walkDiag.errorMessage().c_str());
260 #else
261 diag.error("in '%s' %s", path, walkDiag.errorMessage());
262 #endif
263 return false;
264 }
265
266 // check load commands fit in TEXT segment
267 __block bool foundTEXT = false;
268 forEachSegment(^(const SegmentInfo& info, bool& stop) {
269 if ( strcmp(info.segName, "__TEXT") == 0 ) {
270 foundTEXT = true;
271 if ( this->sizeofcmds + machHeaderSize() > info.fileSize ) {
272 diag.error("in '%s' load commands exceed length of __TEXT segment", path);
273 }
274 if ( info.fileOffset != 0 ) {
275 diag.error("in '%s' __TEXT segment not start of mach-o", path);
276 }
277 stop = true;
278 }
279 });
280 if ( !diag.noError() && !foundTEXT ) {
281 diag.error("in '%s' __TEXT segment not found", path);
282 return false;
283 }
284
285 return true;
286 }
287
288 const MachOAnalyzer* MachOAnalyzer::remapIfZeroFill(Diagnostics& diag, const closure::FileSystem& fileSystem, closure::LoadedFileInfo& info) const
289 {
290 uint64_t vmSpaceRequired;
291 bool hasZeroFill;
292 analyzeSegmentsLayout(vmSpaceRequired, hasZeroFill);
293
294 if ( hasZeroFill ) {
295 vm_address_t newMappedAddr;
296 if ( ::vm_allocate(mach_task_self(), &newMappedAddr, (size_t)vmSpaceRequired, VM_FLAGS_ANYWHERE) != 0 ) {
297 diag.error("vm_allocate failure");
298 return nullptr;
299 }
300 // re-map each segment read-only, with runtime layout
301 uint64_t textSegVmAddr = preferredLoadAddress();
302 forEachSegment(^(const SegmentInfo& segmentInfo, bool& stop) {
303 if ( segmentInfo.fileSize != 0 ) {
304 kern_return_t r = vm_copy(mach_task_self(), (vm_address_t)((long)info.fileContent+segmentInfo.fileOffset), (vm_size_t)segmentInfo.fileSize, (vm_address_t)(newMappedAddr+segmentInfo.vmAddr-textSegVmAddr));
305 if ( r != KERN_SUCCESS ) {
306 diag.error("vm_copy() failure");
307 stop = true;
308 }
309 }
310 });
311 if ( diag.noError() ) {
312 // remove original mapping and return new mapping
313 fileSystem.unloadFile(info);
314
315 // make the new mapping read-only
316 ::vm_protect(mach_task_self(), newMappedAddr, (vm_size_t)vmSpaceRequired, false, VM_PROT_READ);
317
318 // Set vm_deallocate as the unload method.
319 info.unload = [](const closure::LoadedFileInfo& info) {
320 ::vm_deallocate(mach_task_self(), (vm_address_t)info.fileContent, (size_t)info.fileContentLen);
321 };
322
323 // And update the file content to the new location
324 info.fileContent = (const void*)newMappedAddr;
325 info.fileContentLen = vmSpaceRequired;
326 return (const MachOAnalyzer*)info.fileContent;
327 }
328 else {
329 // new mapping failed, return old mapping with an error in diag
330 ::vm_deallocate(mach_task_self(), newMappedAddr, (size_t)vmSpaceRequired);
331 return nullptr;
332 }
333 }
334
335 return this;
336 }
337
338 void MachOAnalyzer::analyzeSegmentsLayout(uint64_t& vmSpace, bool& hasZeroFill) const
339 {
340 __block bool writeExpansion = false;
341 __block uint64_t lowestVmAddr = 0xFFFFFFFFFFFFFFFFULL;
342 __block uint64_t highestVmAddr = 0;
343 __block uint64_t sumVmSizes = 0;
344 forEachSegment(^(const SegmentInfo& segmentInfo, bool& stop) {
345 if ( strcmp(segmentInfo.segName, "__PAGEZERO") == 0 )
346 return;
347 if ( segmentInfo.writable() && (segmentInfo.fileSize != segmentInfo.vmSize) )
348 writeExpansion = true; // zerofill at end of __DATA
349 if ( segmentInfo.vmAddr < lowestVmAddr )
350 lowestVmAddr = segmentInfo.vmAddr;
351 if ( segmentInfo.vmAddr+segmentInfo.vmSize > highestVmAddr )
352 highestVmAddr = segmentInfo.vmAddr+segmentInfo.vmSize;
353 sumVmSizes += segmentInfo.vmSize;
354 });
355 uint64_t totalVmSpace = (highestVmAddr - lowestVmAddr);
356 // LINKEDIT vmSize is not required to be a multiple of page size. Round up if that is the case
357 const uint64_t pageSize = uses16KPages() ? 0x4000 : 0x1000;
358 totalVmSpace = (totalVmSpace + (pageSize - 1)) & ~(pageSize - 1);
359 bool hasHole = (totalVmSpace != sumVmSizes); // segments not contiguous
360
361 vmSpace = totalVmSpace;
362 hasZeroFill = writeExpansion || hasHole;
363 }
364
365 bool MachOAnalyzer::enforceFormat(Malformed kind) const
366 {
367 __block bool result = false;
368 forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) {
369 switch (platform) {
370 case Platform::macOS:
371 switch (kind) {
372 case Malformed::linkeditOrder:
373 case Malformed::linkeditAlignment:
374 case Malformed::dyldInfoAndlocalRelocs:
375 // enforce these checks on new binaries only
376 if (sdk >= 0x000A0E00) // macOS 10.14
377 result = true;
378 break;
379 case Malformed::segmentOrder:
380 case Malformed::linkeditPermissions:
381 case Malformed::textPermissions:
382 case Malformed::executableData:
383 case Malformed::codeSigAlignment:
384 // enforce these checks on new binaries only
385 if (sdk >= 0x000A0F00) // macOS 10.15
386 result = true;
387 break;
388 }
389 break;
390 case Platform::iOS:
391 switch (kind) {
392 case Malformed::linkeditOrder:
393 case Malformed::dyldInfoAndlocalRelocs:
394 case Malformed::textPermissions:
395 case Malformed::executableData:
396 result = true;
397 break;
398 case Malformed::linkeditAlignment:
399 case Malformed::segmentOrder:
400 case Malformed::linkeditPermissions:
401 case Malformed::codeSigAlignment:
402 // enforce these checks on new binaries only
403 if (sdk >= 0x000D0000) // iOS 13
404 result = true;
405 break;
406 }
407 break;
408 default:
409 result = true;
410 break;
411 }
412 });
413 // if binary is so old, there is no platform info, don't enforce malformed errors
414 return result;
415 }
416
417 bool MachOAnalyzer::validEmbeddedPaths(Diagnostics& diag, Platform platform, const char* path) const
418 {
419 __block int index = 1;
420 __block bool allGood = true;
421 __block bool foundInstallName = false;
422 __block int dependentsCount = 0;
423 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
424 const dylib_command* dylibCmd;
425 const rpath_command* rpathCmd;
426 switch ( cmd->cmd ) {
427 case LC_ID_DYLIB:
428 foundInstallName = true;
429 // fall through
430 [[clang::fallthrough]];
431 case LC_LOAD_DYLIB:
432 case LC_LOAD_WEAK_DYLIB:
433 case LC_REEXPORT_DYLIB:
434 case LC_LOAD_UPWARD_DYLIB:
435 dylibCmd = (dylib_command*)cmd;
436 if ( dylibCmd->dylib.name.offset > cmd->cmdsize ) {
437 diag.error("in '%s' load command #%d name offset (%u) outside its size (%u)", path, index, dylibCmd->dylib.name.offset, cmd->cmdsize);
438 stop = true;
439 allGood = false;
440 }
441 else {
442 bool foundEnd = false;
443 const char* start = (char*)dylibCmd + dylibCmd->dylib.name.offset;
444 const char* end = (char*)dylibCmd + cmd->cmdsize;
445 for (const char* s=start; s < end; ++s) {
446 if ( *s == '\0' ) {
447 foundEnd = true;
448 break;
449 }
450 }
451 if ( !foundEnd ) {
452 diag.error("in '%s' load command #%d string extends beyond end of load command", path, index);
453 stop = true;
454 allGood = false;
455 }
456 }
457 if ( cmd->cmd != LC_ID_DYLIB )
458 ++dependentsCount;
459 break;
460 case LC_RPATH:
461 rpathCmd = (rpath_command*)cmd;
462 if ( rpathCmd->path.offset > cmd->cmdsize ) {
463 diag.error("in '%s' load command #%d path offset (%u) outside its size (%u)", path, index, rpathCmd->path.offset, cmd->cmdsize);
464 stop = true;
465 allGood = false;
466 }
467 else {
468 bool foundEnd = false;
469 const char* start = (char*)rpathCmd + rpathCmd->path.offset;
470 const char* end = (char*)rpathCmd + cmd->cmdsize;
471 for (const char* s=start; s < end; ++s) {
472 if ( *s == '\0' ) {
473 foundEnd = true;
474 break;
475 }
476 }
477 if ( !foundEnd ) {
478 diag.error("in '%s' load command #%d string extends beyond end of load command", path, index);
479 stop = true;
480 allGood = false;
481 }
482 }
483 break;
484 }
485 ++index;
486 });
487 if ( !allGood )
488 return false;
489
490 if ( this->filetype == MH_DYLIB ) {
491 if ( !foundInstallName ) {
492 diag.error("in '%s' MH_DYLIB is missing LC_ID_DYLIB", path);
493 return false;
494 }
495 }
496 else {
497 if ( foundInstallName ) {
498 diag.error("in '%s' LC_ID_DYLIB found in non-MH_DYLIB", path);
499 return false;
500 }
501 }
502
503 if ( (dependentsCount == 0) && (this->filetype == MH_EXECUTE) && isDynamicExecutable() ) {
504 diag.error("in '%s' missing LC_LOAD_DYLIB (must link with at least libSystem.dylib)", path);
505 return false;
506 }
507
508 return true;
509 }
510
511 bool MachOAnalyzer::validSegments(Diagnostics& diag, const char* path, size_t fileLen) const
512 {
513 // check segment load command size
514 __block bool badSegmentLoadCommand = false;
515 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
516 if ( cmd->cmd == LC_SEGMENT_64 ) {
517 const segment_command_64* seg = (segment_command_64*)cmd;
518 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command_64);
519 if ( sectionsSpace < 0 ) {
520 diag.error("in '%s' load command size too small for LC_SEGMENT_64", path);
521 badSegmentLoadCommand = true;
522 stop = true;
523 }
524 else if ( (sectionsSpace % sizeof(section_64)) != 0 ) {
525 diag.error("in '%s' segment load command size 0x%X will not fit whole number of sections", path, cmd->cmdsize);
526 badSegmentLoadCommand = true;
527 stop = true;
528 }
529 else if ( sectionsSpace != (seg->nsects * sizeof(section_64)) ) {
530 diag.error("in '%s' load command size 0x%X does not match nsects %d", path, cmd->cmdsize, seg->nsects);
531 badSegmentLoadCommand = true;
532 stop = true;
533 }
534 else if ( greaterThanAddOrOverflow(seg->fileoff, seg->filesize, fileLen) ) {
535 diag.error("in '%s' segment load command content extends beyond end of file", path);
536 badSegmentLoadCommand = true;
537 stop = true;
538 }
539 else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
540 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
541 diag.error("in '%s' segment filesize exceeds vmsize", path);
542 badSegmentLoadCommand = true;
543 stop = true;
544 }
545 }
546 else if ( cmd->cmd == LC_SEGMENT ) {
547 const segment_command* seg = (segment_command*)cmd;
548 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command);
549 if ( sectionsSpace < 0 ) {
550 diag.error("in '%s' load command size too small for LC_SEGMENT", path);
551 badSegmentLoadCommand = true;
552 stop = true;
553 }
554 else if ( (sectionsSpace % sizeof(section)) != 0 ) {
555 diag.error("in '%s' segment load command size 0x%X will not fit whole number of sections", path, cmd->cmdsize);
556 badSegmentLoadCommand = true;
557 stop = true;
558 }
559 else if ( sectionsSpace != (seg->nsects * sizeof(section)) ) {
560 diag.error("in '%s' load command size 0x%X does not match nsects %d", path, cmd->cmdsize, seg->nsects);
561 badSegmentLoadCommand = true;
562 stop = true;
563 }
564 else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
565 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
566 diag.error("in '%s' segment filesize exceeds vmsize", path);
567 badSegmentLoadCommand = true;
568 stop = true;
569 }
570 }
571 });
572 if ( badSegmentLoadCommand )
573 return false;
574
575 // check mapping permissions of segments
576 __block bool badPermissions = false;
577 __block bool badSize = false;
578 __block bool hasTEXT = false;
579 __block bool hasLINKEDIT = false;
580 forEachSegment(^(const SegmentInfo& info, bool& stop) {
581 if ( strcmp(info.segName, "__TEXT") == 0 ) {
582 if ( (info.protections != (VM_PROT_READ|VM_PROT_EXECUTE)) && enforceFormat(Malformed::textPermissions) ) {
583 diag.error("in '%s' __TEXT segment permissions is not 'r-x'", path);
584 badPermissions = true;
585 stop = true;
586 }
587 hasTEXT = true;
588 }
589 else if ( strcmp(info.segName, "__LINKEDIT") == 0 ) {
590 if ( (info.protections != VM_PROT_READ) && enforceFormat(Malformed::linkeditPermissions) ) {
591 diag.error("in '%s' __LINKEDIT segment permissions is not 'r--'", path);
592 badPermissions = true;
593 stop = true;
594 }
595 hasLINKEDIT = true;
596 }
597 else if ( (info.protections & 0xFFFFFFF8) != 0 ) {
598 diag.error("in '%s' %s segment permissions has invalid bits set", path, info.segName);
599 badPermissions = true;
600 stop = true;
601 }
602 if ( greaterThanAddOrOverflow(info.fileOffset, info.fileSize, fileLen) ) {
603 diag.error("in '%s' %s segment content extends beyond end of file", path, info.segName);
604 badSize = true;
605 stop = true;
606 }
607 if ( is64() ) {
608 if ( info.vmAddr+info.vmSize < info.vmAddr ) {
609 diag.error("in '%s' %s segment vm range wraps", path, info.segName);
610 badSize = true;
611 stop = true;
612 }
613 }
614 else {
615 if ( (uint32_t)(info.vmAddr+info.vmSize) < (uint32_t)(info.vmAddr) ) {
616 diag.error("in '%s' %s segment vm range wraps", path, info.segName);
617 badSize = true;
618 stop = true;
619 }
620 }
621 });
622 if ( badPermissions || badSize )
623 return false;
624 if ( !hasTEXT ) {
625 diag.error("in '%s' missing __TEXT segment", path);
626 return false;
627 }
628 if ( !hasLINKEDIT ) {
629 diag.error("in '%s' missing __LINKEDIT segment", path);
630 return false;
631 }
632
633 // check for overlapping segments
634 __block bool badSegments = false;
635 forEachSegment(^(const SegmentInfo& info1, bool& stop1) {
636 uint64_t seg1vmEnd = info1.vmAddr + info1.vmSize;
637 uint64_t seg1FileEnd = info1.fileOffset + info1.fileSize;
638 forEachSegment(^(const SegmentInfo& info2, bool& stop2) {
639 if ( info1.segIndex == info2.segIndex )
640 return;
641 uint64_t seg2vmEnd = info2.vmAddr + info2.vmSize;
642 uint64_t seg2FileEnd = info2.fileOffset + info2.fileSize;
643 if ( ((info2.vmAddr <= info1.vmAddr) && (seg2vmEnd > info1.vmAddr) && (seg1vmEnd > info1.vmAddr )) || ((info2.vmAddr >= info1.vmAddr ) && (info2.vmAddr < seg1vmEnd) && (seg2vmEnd > info2.vmAddr)) ) {
644 diag.error("in '%s' segment %s vm range overlaps segment %s", path, info1.segName, info2.segName);
645 badSegments = true;
646 stop1 = true;
647 stop2 = true;
648 }
649 if ( ((info2.fileOffset <= info1.fileOffset) && (seg2FileEnd > info1.fileOffset) && (seg1FileEnd > info1.fileOffset)) || ((info2.fileOffset >= info1.fileOffset) && (info2.fileOffset < seg1FileEnd) && (seg2FileEnd > info2.fileOffset )) ) {
650 diag.error("in '%s' segment %s file content overlaps segment %s", path, info1.segName, info2.segName);
651 badSegments = true;
652 stop1 = true;
653 stop2 = true;
654 }
655 if ( (info1.segIndex < info2.segIndex) && !stop1 ) {
656 if ( (info1.vmAddr > info2.vmAddr) || ((info1.fileOffset > info2.fileOffset ) && (info1.fileOffset != 0) && (info2.fileOffset != 0)) ){
657 if ( !inDyldCache() && enforceFormat(Malformed::segmentOrder) && !isStaticExecutable() ) {
658 // dyld cache __DATA_* segments are moved around
659 // The static kernel also has segments with vmAddr's before __TEXT
660 diag.error("in '%s' segment load commands out of order with respect to layout for %s and %s", path, info1.segName, info2.segName);
661 badSegments = true;
662 stop1 = true;
663 stop2 = true;
664 }
665 }
666 }
667 });
668 });
669 if ( badSegments )
670 return false;
671
672 // check sections are within segment
673 __block bool badSections = false;
674 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
675 if ( cmd->cmd == LC_SEGMENT_64 ) {
676 const segment_command_64* seg = (segment_command_64*)cmd;
677 const section_64* const sectionsStart = (section_64*)((char*)seg + sizeof(struct segment_command_64));
678 const section_64* const sectionsEnd = &sectionsStart[seg->nsects];
679 for (const section_64* sect=sectionsStart; (sect < sectionsEnd); ++sect) {
680 if ( (int64_t)(sect->size) < 0 ) {
681 diag.error("in '%s' section %s size too large 0x%llX", path, sect->sectname, sect->size);
682 badSections = true;
683 }
684 else if ( sect->addr < seg->vmaddr ) {
685 diag.error("in '%s' section %s start address 0x%llX is before containing segment's address 0x%0llX", path, sect->sectname, sect->addr, seg->vmaddr);
686 badSections = true;
687 }
688 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
689 diag.error("in '%s' section %s end address 0x%llX is beyond containing segment's end address 0x%0llX", path, sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
690 badSections = true;
691 }
692 }
693 }
694 else if ( cmd->cmd == LC_SEGMENT ) {
695 const segment_command* seg = (segment_command*)cmd;
696 const section* const sectionsStart = (section*)((char*)seg + sizeof(struct segment_command));
697 const section* const sectionsEnd = &sectionsStart[seg->nsects];
698 for (const section* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) {
699 if ( (int64_t)(sect->size) < 0 ) {
700 diag.error("in '%s' section %s size too large 0x%X", path, sect->sectname, sect->size);
701 badSections = true;
702 }
703 else if ( sect->addr < seg->vmaddr ) {
704 diag.error("in '%s' section %s start address 0x%X is before containing segment's address 0x%0X", path, sect->sectname, sect->addr, seg->vmaddr);
705 badSections = true;
706 }
707 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
708 diag.error("in '%s' section %s end address 0x%X is beyond containing segment's end address 0x%0X", path, sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
709 badSections = true;
710 }
711 }
712 }
713 });
714
715 return !badSections;
716 }
717
718
719 bool MachOAnalyzer::validMain(Diagnostics& diag, const char* path) const
720 {
721 __block uint64_t textSegStartAddr = 0;
722 __block uint64_t textSegStartSize = 0;
723 forEachSegment(^(const SegmentInfo& info, bool& stop) {
724 if ( strcmp(info.segName, "__TEXT") == 0 ) {
725 textSegStartAddr = info.vmAddr;
726 textSegStartSize = info.vmSize;
727 stop = true;
728 }
729 });
730
731 __block int mainCount = 0;
732 __block int threadCount = 0;
733 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
734 entry_point_command* mainCmd;
735 uint64_t startAddress;
736 switch (cmd->cmd) {
737 case LC_MAIN:
738 ++mainCount;
739 mainCmd = (entry_point_command*)cmd;
740 if ( mainCmd->entryoff >= textSegStartSize ) {
741 startAddress = preferredLoadAddress() + mainCmd->entryoff;
742 __block bool foundSegment = false;
743 forEachSegment(^(const SegmentInfo& info, bool& stopSegment) {
744 // Skip segments which don't contain this address
745 if ( (startAddress < info.vmAddr) || (startAddress >= info.vmAddr+info.vmSize) )
746 return;
747 foundSegment = true;
748 if ( (info.protections & VM_PROT_EXECUTE) == 0 )
749 diag.error("LC_MAIN points to non-executable segment");
750 stopSegment = true;
751 });
752 if (!foundSegment)
753 diag.error("LC_MAIN entryoff is out of range");
754 stop = true;
755 }
756 break;
757 case LC_UNIXTHREAD:
758 ++threadCount;
759 startAddress = entryAddrFromThreadCmd((thread_command*)cmd);
760 if ( startAddress == 0 ) {
761 diag.error("LC_UNIXTHREAD not valid for arch %s", archName());
762 stop = true;
763 }
764 #if BUILDING_DYLDINFO
765 else if ( isStaticExecutable() ) {
766 __block bool foundSegment = false;
767 forEachSegment(^(const SegmentInfo& info, bool& stopSegment) {
768 // Skip segments which don't contain this address
769 if ( (startAddress < info.vmAddr) || (startAddress >= info.vmAddr+info.vmSize) )
770 return;
771 foundSegment = true;
772 if ( (info.protections & VM_PROT_EXECUTE) == 0 )
773 diag.error("LC_UNIXTHREAD points to non-executable segment");
774 stopSegment = true;
775 });
776 if (!foundSegment)
777 diag.error("LC_UNIXTHREAD entry is out of range");
778 stop = true;
779 }
780 #endif
781 else if ( (startAddress < textSegStartAddr) || (startAddress >= textSegStartAddr+textSegStartSize) ) {
782 diag.error("LC_UNIXTHREAD entry not in __TEXT segment");
783 stop = true;
784 }
785 break;
786 }
787 });
788 if ( diag.hasError() )
789 return false;
790 if ( diag.noError() && (mainCount+threadCount == 1) )
791 return true;
792
793 if ( mainCount + threadCount == 0 )
794 diag.error("missing LC_MAIN or LC_UNIXTHREAD");
795 else
796 diag.error("only one LC_MAIN or LC_UNIXTHREAD is allowed");
797 return false;
798 }
799
800
801 namespace {
802 struct LinkEditContentChunk
803 {
804 const char* name;
805 uint32_t alignment;
806 uint32_t fileOffsetStart;
807 uint32_t size;
808
809 static int compareByFileOffset(const void* l, const void* r) {
810 if ( ((LinkEditContentChunk*)l)->fileOffsetStart < ((LinkEditContentChunk*)r)->fileOffsetStart )
811 return -1;
812 else
813 return 1;
814 }
815 };
816 } // anonymous namespace
817
818
819
820 bool MachOAnalyzer::validLinkeditLayout(Diagnostics& diag, const char* path) const
821 {
822 LinkEditInfo leInfo;
823 getLinkEditPointers(diag, leInfo);
824 if ( diag.hasError() )
825 return false;
826 const uint32_t ptrSize = pointerSize();
827
828 // build vector of all blobs in LINKEDIT
829 LinkEditContentChunk blobs[32];
830 LinkEditContentChunk* bp = blobs;
831 if ( leInfo.dyldInfo != nullptr ) {
832 if ( leInfo.dyldInfo->rebase_size != 0 )
833 *bp++ = {"rebase opcodes", ptrSize, leInfo.dyldInfo->rebase_off, leInfo.dyldInfo->rebase_size};
834 if ( leInfo.dyldInfo->bind_size != 0 )
835 *bp++ = {"bind opcodes", ptrSize, leInfo.dyldInfo->bind_off, leInfo.dyldInfo->bind_size};
836 if ( leInfo.dyldInfo->weak_bind_size != 0 )
837 *bp++ = {"weak bind opcodes", ptrSize, leInfo.dyldInfo->weak_bind_off, leInfo.dyldInfo->weak_bind_size};
838 if ( leInfo.dyldInfo->lazy_bind_size != 0 )
839 *bp++ = {"lazy bind opcodes", ptrSize, leInfo.dyldInfo->lazy_bind_off, leInfo.dyldInfo->lazy_bind_size};
840 if ( leInfo.dyldInfo->export_size!= 0 )
841 *bp++ = {"exports trie", ptrSize, leInfo.dyldInfo->export_off, leInfo.dyldInfo->export_size};
842 }
843 if ( leInfo.exportsTrie != nullptr ) {
844 if ( leInfo.exportsTrie->datasize != 0 )
845 *bp++ = {"exports trie", ptrSize, leInfo.exportsTrie->dataoff, leInfo.exportsTrie->datasize};
846 }
847
848 if ( leInfo.dynSymTab != nullptr ) {
849 if ( leInfo.dynSymTab->nlocrel != 0 )
850 *bp++ = {"local relocations", ptrSize, leInfo.dynSymTab->locreloff, static_cast<uint32_t>(leInfo.dynSymTab->nlocrel*sizeof(relocation_info))};
851 if ( leInfo.dynSymTab->nextrel != 0 )
852 *bp++ = {"external relocations", ptrSize, leInfo.dynSymTab->extreloff, static_cast<uint32_t>(leInfo.dynSymTab->nextrel*sizeof(relocation_info))};
853 if ( leInfo.dynSymTab->nindirectsyms != 0 )
854 *bp++ = {"indirect symbol table", 4, leInfo.dynSymTab->indirectsymoff, leInfo.dynSymTab->nindirectsyms*4};
855 }
856 if ( leInfo.splitSegInfo != nullptr ) {
857 if ( leInfo.splitSegInfo->datasize != 0 )
858 *bp++ = {"shared cache info", ptrSize, leInfo.splitSegInfo->dataoff, leInfo.splitSegInfo->datasize};
859 }
860 if ( leInfo.functionStarts != nullptr ) {
861 if ( leInfo.functionStarts->datasize != 0 )
862 *bp++ = {"function starts", ptrSize, leInfo.functionStarts->dataoff, leInfo.functionStarts->datasize};
863 }
864 if ( leInfo.dataInCode != nullptr ) {
865 if ( leInfo.dataInCode->datasize != 0 )
866 *bp++ = {"data in code", ptrSize, leInfo.dataInCode->dataoff, leInfo.dataInCode->datasize};
867 }
868 if ( leInfo.symTab != nullptr ) {
869 if ( leInfo.symTab->nsyms != 0 )
870 *bp++ = {"symbol table", ptrSize, leInfo.symTab->symoff, static_cast<uint32_t>(leInfo.symTab->nsyms*(ptrSize == 8 ? sizeof(nlist_64) : sizeof(struct nlist)))};
871 if ( leInfo.symTab->strsize != 0 )
872 *bp++ = {"symbol table strings", 1, leInfo.symTab->stroff, leInfo.symTab->strsize};
873 }
874 if ( leInfo.codeSig != nullptr ) {
875 if ( leInfo.codeSig->datasize != 0 )
876 *bp++ = {"code signature", ptrSize, leInfo.codeSig->dataoff, leInfo.codeSig->datasize};
877 }
878
879 // check for bad combinations
880 if ( (leInfo.dyldInfo != nullptr) && (leInfo.dyldInfo->cmd == LC_DYLD_INFO_ONLY) && (leInfo.dynSymTab != nullptr) ) {
881 if ( (leInfo.dynSymTab->nlocrel != 0) && enforceFormat(Malformed::dyldInfoAndlocalRelocs) ) {
882 diag.error("in '%s' malformed mach-o contains LC_DYLD_INFO_ONLY and local relocations", path);
883 return false;
884 }
885 if ( leInfo.dynSymTab->nextrel != 0 ) {
886 diag.error("in '%s' malformed mach-o contains LC_DYLD_INFO_ONLY and external relocations", path);
887 return false;
888 }
889 }
890 if ( (leInfo.dyldInfo == nullptr) && (leInfo.dynSymTab == nullptr) ) {
891 diag.error("in '%s' malformed mach-o misssing LC_DYLD_INFO and LC_DYSYMTAB", path);
892 return false;
893 }
894 const unsigned long blobCount = bp - blobs;
895 if ( blobCount == 0 ) {
896 diag.error("in '%s' malformed mach-o misssing LINKEDIT", path);
897 return false;
898 }
899
900 uint32_t linkeditFileEnd = leInfo.layout.linkeditFileOffset + leInfo.layout.linkeditFileSize;
901
902
903 // sort blobs by file-offset and error on overlaps
904 ::qsort(blobs, blobCount, sizeof(LinkEditContentChunk), &LinkEditContentChunk::compareByFileOffset);
905 uint32_t prevEnd = leInfo.layout.linkeditFileOffset;
906 const char* prevName = "start of LINKEDIT";
907 for (unsigned long i=0; i < blobCount; ++i) {
908 const LinkEditContentChunk& blob = blobs[i];
909 if ( blob.fileOffsetStart < prevEnd ) {
910 diag.error("in '%s' LINKEDIT overlap of %s and %s", path, prevName, blob.name);
911 return false;
912 }
913 if (greaterThanAddOrOverflow(blob.fileOffsetStart, blob.size, linkeditFileEnd)) {
914 diag.error("in '%s' LINKEDIT content '%s' extends beyond end of segment", path, blob.name);
915 return false;
916 }
917 if ( (blob.fileOffsetStart & (blob.alignment-1)) != 0 ) {
918 // <rdar://problem/51115705> relax code sig alignment for pre iOS13
919 Malformed kind = (strcmp(blob.name, "code signature") == 0) ? Malformed::codeSigAlignment : Malformed::linkeditAlignment;
920 if ( enforceFormat(kind) )
921 diag.error("in '%s' mis-aligned LINKEDIT content '%s'", path, blob.name);
922 }
923 prevEnd = blob.fileOffsetStart + blob.size;
924 prevName = blob.name;
925 }
926
927 // Check for invalid symbol table sizes
928 if ( leInfo.symTab != nullptr ) {
929 if ( leInfo.symTab->nsyms > 0x10000000 ) {
930 diag.error("in '%s' malformed mach-o image: symbol table too large", path);
931 return false;
932 }
933 if ( leInfo.dynSymTab != nullptr ) {
934 // validate indirect symbol table
935 if ( leInfo.dynSymTab->nindirectsyms != 0 ) {
936 if ( leInfo.dynSymTab->nindirectsyms > 0x10000000 ) {
937 diag.error("in '%s' malformed mach-o image: indirect symbol table too large", path);
938 return false;
939 }
940 }
941 if ( (leInfo.dynSymTab->nlocalsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->ilocalsym > leInfo.symTab->nsyms) ) {
942 diag.error("in '%s' malformed mach-o image: indirect symbol table local symbol count exceeds total symbols", path);
943 return false;
944 }
945 if ( leInfo.dynSymTab->ilocalsym + leInfo.dynSymTab->nlocalsym < leInfo.dynSymTab->ilocalsym ) {
946 diag.error("in '%s' malformed mach-o image: indirect symbol table local symbol count wraps", path);
947 return false;
948 }
949 if ( (leInfo.dynSymTab->nextdefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iextdefsym > leInfo.symTab->nsyms) ) {
950 diag.error("in '%s' malformed mach-o image: indirect symbol table extern symbol count exceeds total symbols", path);
951 return false;
952 }
953 if ( leInfo.dynSymTab->iextdefsym + leInfo.dynSymTab->nextdefsym < leInfo.dynSymTab->iextdefsym ) {
954 diag.error("in '%s' malformed mach-o image: indirect symbol table extern symbol count wraps", path);
955 return false;
956 }
957 if ( (leInfo.dynSymTab->nundefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iundefsym > leInfo.symTab->nsyms) ) {
958 diag.error("in '%s' malformed mach-o image: indirect symbol table undefined symbol count exceeds total symbols", path);
959 return false;
960 }
961 if ( leInfo.dynSymTab->iundefsym + leInfo.dynSymTab->nundefsym < leInfo.dynSymTab->iundefsym ) {
962 diag.error("in '%s' malformed mach-o image: indirect symbol table undefined symbol count wraps", path);
963 return false;
964 }
965 }
966 }
967
968 return true;
969 }
970
971
972
973 bool MachOAnalyzer::invalidRebaseState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
974 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type) const
975 {
976 if ( !segIndexSet ) {
977 diag.error("in '%s' %s missing preceding REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path, opcodeName);
978 return true;
979 }
980 if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
981 diag.error("in '%s' %s segment index %d too large", path, opcodeName, segmentIndex);
982 return true;
983 }
984 if ( segmentOffset > (segments[segmentIndex].vmSize-ptrSize) ) {
985 diag.error("in '%s' %s current segment offset 0x%08llX beyond segment size (0x%08llX)", path, opcodeName, segmentOffset, segments[segmentIndex].vmSize);
986 return true;
987 }
988 switch ( type ) {
989 case REBASE_TYPE_POINTER:
990 if ( !segments[segmentIndex].writable() ) {
991 diag.error("in '%s' %s pointer rebase is in non-writable segment", path, opcodeName);
992 return true;
993 }
994 if ( segments[segmentIndex].executable() && enforceFormat(Malformed::executableData) ) {
995 diag.error("in '%s' %s pointer rebase is in executable segment", path, opcodeName);
996 return true;
997 }
998 break;
999 case REBASE_TYPE_TEXT_ABSOLUTE32:
1000 case REBASE_TYPE_TEXT_PCREL32:
1001 if ( !segments[segmentIndex].textRelocs ) {
1002 diag.error("in '%s' %s text rebase is in segment that does not support text relocations", path, opcodeName);
1003 return true;
1004 }
1005 if ( segments[segmentIndex].writable() ) {
1006 diag.error("in '%s' %s text rebase is in writable segment", path, opcodeName);
1007 return true;
1008 }
1009 if ( !segments[segmentIndex].executable() ) {
1010 diag.error("in '%s' %s pointer rebase is in non-executable segment", path, opcodeName);
1011 return true;
1012 }
1013 break;
1014 default:
1015 diag.error("in '%s' %s unknown rebase type %d", path, opcodeName, type);
1016 return true;
1017 }
1018 return false;
1019 }
1020
1021
1022 void MachOAnalyzer::getAllSegmentsInfos(Diagnostics& diag, SegmentInfo segments[]) const
1023 {
1024 forEachSegment(^(const SegmentInfo& info, bool& stop) {
1025 segments[info.segIndex] = info;
1026 });
1027 }
1028
1029
1030 bool MachOAnalyzer::validRebaseInfo(Diagnostics& diag, const char* path) const
1031 {
1032 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1033 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1034 if ( invalidRebaseState(diag, opcodeName, path, leInfo, segments, segIndexSet, ptrSize, segmentIndex, segmentOffset, type) )
1035 stop = true;
1036 });
1037 return diag.noError();
1038 }
1039
1040
1041 void MachOAnalyzer::forEachTextRebase(Diagnostics& diag, void (^handler)(uint64_t runtimeOffset, bool& stop)) const
1042 {
1043 __block bool startVmAddrSet = false;
1044 __block uint64_t startVmAddr = 0;
1045 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1046 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1047 if ( type != REBASE_TYPE_TEXT_ABSOLUTE32 )
1048 return;
1049 if ( !startVmAddrSet ) {
1050 for (int i=0; i <= segmentIndex; ++i) {
1051 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1052 startVmAddr = segments[i].vmAddr;
1053 startVmAddrSet = true;
1054 break;
1055 }
1056 }
1057 }
1058 uint64_t rebaseVmAddr = segments[segmentIndex].vmAddr + segmentOffset;
1059 uint64_t runtimeOffset = rebaseVmAddr - startVmAddr;
1060 handler(runtimeOffset, stop);
1061 });
1062 }
1063
1064
1065 void MachOAnalyzer::forEachRebase(Diagnostics& diag, bool ignoreLazyPointers, void (^handler)(uint64_t runtimeOffset, bool& stop)) const
1066 {
1067 __block bool startVmAddrSet = false;
1068 __block uint64_t startVmAddr = 0;
1069 __block uint64_t lpVmAddr = 0;
1070 __block uint64_t lpEndVmAddr = 0;
1071 __block uint64_t shVmAddr = 0;
1072 __block uint64_t shEndVmAddr = 0;
1073 if ( ignoreLazyPointers ) {
1074 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
1075 if ( (info.sectFlags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS ) {
1076 lpVmAddr = info.sectAddr;
1077 lpEndVmAddr = info.sectAddr + info.sectSize;
1078 }
1079 else if ( (info.sectFlags & S_ATTR_PURE_INSTRUCTIONS) && (strcmp(info.sectName, "__stub_helper") == 0) ) {
1080 shVmAddr = info.sectAddr;
1081 shEndVmAddr = info.sectAddr + info.sectSize;
1082 }
1083 });
1084 }
1085 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1086 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1087 if ( type != REBASE_TYPE_POINTER )
1088 return;
1089 if ( !startVmAddrSet ) {
1090 for (int i=0; i < segmentIndex; ++i) {
1091 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1092 startVmAddr = segments[i].vmAddr;
1093 startVmAddrSet = true;
1094 break;
1095 }
1096 }
1097 }
1098 uint64_t rebaseVmAddr = segments[segmentIndex].vmAddr + segmentOffset;
1099 bool skipRebase = false;
1100 if ( (rebaseVmAddr >= lpVmAddr) && (rebaseVmAddr < lpEndVmAddr) ) {
1101 // rebase is in lazy pointer section
1102 uint64_t lpValue = 0;
1103 if ( ptrSize == 8 )
1104 lpValue = *((uint64_t*)(rebaseVmAddr-startVmAddr+(uint8_t*)this));
1105 else
1106 lpValue = *((uint32_t*)(rebaseVmAddr-startVmAddr+(uint8_t*)this));
1107 if ( (lpValue >= shVmAddr) && (lpValue < shEndVmAddr) ) {
1108 // content is into stub_helper section
1109 uint64_t lpTargetImageOffset = lpValue - startVmAddr;
1110 const uint8_t* helperContent = (uint8_t*)this + lpTargetImageOffset;
1111 bool isLazyStub = contentIsRegularStub(helperContent);
1112 // ignore rebases for normal lazy pointers, but leave rebase for resolver helper stub
1113 if ( isLazyStub )
1114 skipRebase = true;
1115 }
1116 else {
1117 // if lazy pointer does not point into stub_helper, then it points to weak-def symbol and we need rebase
1118 }
1119 }
1120 if ( !skipRebase ) {
1121 uint64_t runtimeOffset = rebaseVmAddr - startVmAddr;
1122 handler(runtimeOffset, stop);
1123 }
1124 });
1125 }
1126
1127
1128 bool MachOAnalyzer::contentIsRegularStub(const uint8_t* helperContent) const
1129 {
1130 switch (this->cputype) {
1131 case CPU_TYPE_X86_64:
1132 return ( (helperContent[0] == 0x68) && (helperContent[5] == 0xE9) ); // push $xxx / JMP pcRel
1133 case CPU_TYPE_I386:
1134 return ( (helperContent[0] == 0x68) && (helperContent[5] == 0xFF) && (helperContent[2] == 0x26) ); // push $xxx / JMP *pcRel
1135 case CPU_TYPE_ARM:
1136 return ( (helperContent[0] == 0x00) && (helperContent[1] == 0xC0) && (helperContent[2] == 0x9F) && (helperContent[3] == 0xE5) ); // ldr ip, [pc, #0]
1137 case CPU_TYPE_ARM64:
1138 return ( (helperContent[0] == 0x50) && (helperContent[1] == 0x00) && (helperContent[2] == 0x00) && (helperContent[3] == 0x18) ); // ldr w16, L0
1139
1140 }
1141 return false;
1142 }
1143
1144 static int uint32Sorter(const void* l, const void* r) {
1145 if ( *((uint32_t*)l) < *((uint32_t*)r) )
1146 return -1;
1147 else
1148 return 1;
1149 }
1150
1151
1152 void MachOAnalyzer::forEachRebase(Diagnostics& diag,
1153 void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1154 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1155 uint8_t type, bool& stop)) const
1156 {
1157 LinkEditInfo leInfo;
1158 getLinkEditPointers(diag, leInfo);
1159 if ( diag.hasError() )
1160 return;
1161
1162 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1163 getAllSegmentsInfos(diag, segmentsInfo);
1164 if ( diag.hasError() )
1165 return;
1166
1167 if ( leInfo.dyldInfo != nullptr ) {
1168 const uint8_t* const start = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->rebase_off);
1169 const uint8_t* const end = start + leInfo.dyldInfo->rebase_size;
1170 const uint8_t* p = start;
1171 const uint32_t ptrSize = pointerSize();
1172 uint8_t type = 0;
1173 int segIndex = 0;
1174 uint64_t segOffset = 0;
1175 uint64_t count;
1176 uint64_t skip;
1177 bool segIndexSet = false;
1178 bool stop = false;
1179 while ( !stop && diag.noError() && (p < end) ) {
1180 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
1181 uint8_t opcode = *p & REBASE_OPCODE_MASK;
1182 ++p;
1183 switch (opcode) {
1184 case REBASE_OPCODE_DONE:
1185 if ( (end - p) > 8 )
1186 diag.error("rebase opcodes terminated early at offset %d of %d", (int)(p-start), (int)(end-start));
1187 stop = true;
1188 break;
1189 case REBASE_OPCODE_SET_TYPE_IMM:
1190 type = immediate;
1191 break;
1192 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1193 segIndex = immediate;
1194 segOffset = read_uleb128(diag, p, end);
1195 segIndexSet = true;
1196 break;
1197 case REBASE_OPCODE_ADD_ADDR_ULEB:
1198 segOffset += read_uleb128(diag, p, end);
1199 break;
1200 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
1201 segOffset += immediate*ptrSize;
1202 break;
1203 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
1204 for (int i=0; i < immediate; ++i) {
1205 handler("REBASE_OPCODE_DO_REBASE_IMM_TIMES", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1206 segOffset += ptrSize;
1207 if ( stop )
1208 break;
1209 }
1210 break;
1211 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
1212 count = read_uleb128(diag, p, end);
1213 for (uint32_t i=0; i < count; ++i) {
1214 handler("REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1215 segOffset += ptrSize;
1216 if ( stop )
1217 break;
1218 }
1219 break;
1220 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
1221 handler("REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1222 segOffset += read_uleb128(diag, p, end) + ptrSize;
1223 break;
1224 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
1225 count = read_uleb128(diag, p, end);
1226 if ( diag.hasError() )
1227 break;
1228 skip = read_uleb128(diag, p, end);
1229 for (uint32_t i=0; i < count; ++i) {
1230 handler("REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1231 segOffset += skip + ptrSize;
1232 if ( stop )
1233 break;
1234 }
1235 break;
1236 default:
1237 diag.error("unknown rebase opcode 0x%02X", opcode);
1238 }
1239 }
1240 }
1241 else if ( leInfo.chainedFixups != nullptr ) {
1242 // binary uses chained fixups, so do nothing
1243 }
1244 else {
1245 // old binary, walk relocations
1246 const uint64_t relocsStartAddress = relocBaseAddress(segmentsInfo, leInfo.layout.linkeditSegIndex);
1247 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->locreloff);
1248 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nlocrel];
1249 bool stop = false;
1250 const uint8_t relocSize = (is64() ? 3 : 2);
1251 const uint8_t ptrSize = pointerSize();
1252 STACK_ALLOC_OVERFLOW_SAFE_ARRAY(uint32_t, relocAddrs, 2048);
1253 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
1254 if ( reloc->r_length != relocSize ) {
1255 diag.error("local relocation has wrong r_length");
1256 break;
1257 }
1258 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
1259 diag.error("local relocation has wrong r_type");
1260 break;
1261 }
1262 relocAddrs.push_back(reloc->r_address);
1263 }
1264 if ( !relocAddrs.empty() ) {
1265 ::qsort(&relocAddrs[0], relocAddrs.count(), sizeof(uint32_t), &uint32Sorter);
1266 for (uint32_t addrOff : relocAddrs) {
1267 uint32_t segIndex = 0;
1268 uint64_t segOffset = 0;
1269 if ( segIndexAndOffsetForAddress(relocsStartAddress+addrOff, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1270 uint8_t type = REBASE_TYPE_POINTER;
1271 if ( this->cputype == CPU_TYPE_I386 ) {
1272 if ( segmentsInfo[segIndex].executable() )
1273 type = REBASE_TYPE_TEXT_ABSOLUTE32;
1274 }
1275 handler("local relocation", leInfo, segmentsInfo, true, ptrSize, segIndex, segOffset, type , stop);
1276 }
1277 else {
1278 diag.error("local relocation has out of range r_address");
1279 break;
1280 }
1281 }
1282 }
1283 // then process indirect symbols
1284 forEachIndirectPointer(diag, ^(uint64_t address, bool bind, int bindLibOrdinal,
1285 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
1286 if ( bind )
1287 return;
1288 uint32_t segIndex = 0;
1289 uint64_t segOffset = 0;
1290 if ( segIndexAndOffsetForAddress(address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1291 handler("local relocation", leInfo, segmentsInfo, true, ptrSize, segIndex, segOffset, REBASE_TYPE_POINTER, indStop);
1292 }
1293 else {
1294 diag.error("local relocation has out of range r_address");
1295 indStop = true;
1296 }
1297 });
1298 }
1299 }
1300
1301 bool MachOAnalyzer::segIndexAndOffsetForAddress(uint64_t addr, const SegmentInfo segmentsInfos[], uint32_t segCount, uint32_t& segIndex, uint64_t& segOffset) const
1302 {
1303 for (uint32_t i=0; i < segCount; ++i) {
1304 if ( (segmentsInfos[i].vmAddr <= addr) && (addr < segmentsInfos[i].vmAddr+segmentsInfos[i].vmSize) ) {
1305 segIndex = i;
1306 segOffset = addr - segmentsInfos[i].vmAddr;
1307 return true;
1308 }
1309 }
1310 return false;
1311 }
1312
1313 uint64_t MachOAnalyzer::relocBaseAddress(const SegmentInfo segmentsInfos[], uint32_t segCount) const
1314 {
1315 if ( is64() ) {
1316 // x86_64 reloc base address is first writable segment
1317 for (uint32_t i=0; i < segCount; ++i) {
1318 if ( segmentsInfos[i].writable() )
1319 return segmentsInfos[i].vmAddr;
1320 }
1321 }
1322 return segmentsInfos[0].vmAddr;
1323 }
1324
1325
1326
1327 void MachOAnalyzer::forEachIndirectPointer(Diagnostics& diag, void (^handler)(uint64_t pointerAddress, bool bind, int bindLibOrdinal, const char* bindSymbolName,
1328 bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& stop)) const
1329 {
1330 LinkEditInfo leInfo;
1331 getLinkEditPointers(diag, leInfo);
1332 if ( diag.hasError() )
1333 return;
1334
1335 // find lazy and non-lazy pointer sections
1336 const bool is64Bit = is64();
1337 const uint32_t* const indirectSymbolTable = (uint32_t*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->indirectsymoff);
1338 const uint32_t indirectSymbolTableCount = leInfo.dynSymTab->nindirectsyms;
1339 const uint32_t ptrSize = pointerSize();
1340 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
1341 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
1342 const struct nlist* symbols32 = (struct nlist*)symbolTable;
1343 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1344 uint32_t symCount = leInfo.symTab->nsyms;
1345 uint32_t poolSize = leInfo.symTab->strsize;
1346 __block bool stop = false;
1347 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectInfo, bool malformedSectionRange, bool& sectionStop) {
1348 uint8_t sectionType = (sectInfo.sectFlags & SECTION_TYPE);
1349 bool selfModifyingStub = (sectionType == S_SYMBOL_STUBS) && (sectInfo.sectFlags & S_ATTR_SELF_MODIFYING_CODE) && (sectInfo.reserved2 == 5) && (this->cputype == CPU_TYPE_I386);
1350 if ( (sectionType != S_LAZY_SYMBOL_POINTERS) && (sectionType != S_NON_LAZY_SYMBOL_POINTERS) && !selfModifyingStub )
1351 return;
1352 if ( (flags & S_ATTR_SELF_MODIFYING_CODE) && !selfModifyingStub ) {
1353 diag.error("S_ATTR_SELF_MODIFYING_CODE section type only valid in old i386 binaries");
1354 sectionStop = true;
1355 return;
1356 }
1357 uint32_t elementSize = selfModifyingStub ? sectInfo.reserved2 : ptrSize;
1358 uint32_t elementCount = (uint32_t)(sectInfo.sectSize/elementSize);
1359 if ( greaterThanAddOrOverflow(sectInfo.reserved1, elementCount, indirectSymbolTableCount) ) {
1360 diag.error("section %s overflows indirect symbol table", sectInfo.sectName);
1361 sectionStop = true;
1362 return;
1363 }
1364
1365 for (uint32_t i=0; (i < elementCount) && !stop; ++i) {
1366 uint32_t symNum = indirectSymbolTable[sectInfo.reserved1 + i];
1367 if ( symNum == INDIRECT_SYMBOL_ABS )
1368 continue;
1369 if ( symNum == INDIRECT_SYMBOL_LOCAL ) {
1370 handler(sectInfo.sectAddr+i*elementSize, false, 0, "", false, false, false, stop);
1371 continue;
1372 }
1373 if ( symNum > symCount ) {
1374 diag.error("indirect symbol[%d] = %d which is invalid symbol index", sectInfo.reserved1 + i, symNum);
1375 sectionStop = true;
1376 return;
1377 }
1378 uint16_t n_desc = is64Bit ? symbols64[symNum].n_desc : symbols32[symNum].n_desc;
1379 uint8_t n_type = is64Bit ? symbols64[symNum].n_type : symbols32[symNum].n_type;
1380 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
1381 uint32_t strOffset = is64Bit ? symbols64[symNum].n_un.n_strx : symbols32[symNum].n_un.n_strx;
1382 if ( strOffset > poolSize ) {
1383 diag.error("symbol[%d] string offset out of range", sectInfo.reserved1 + i);
1384 sectionStop = true;
1385 return;
1386 }
1387 const char* symbolName = stringPool + strOffset;
1388 bool weakImport = (n_desc & N_WEAK_REF);
1389 bool lazy = (sectionType == S_LAZY_SYMBOL_POINTERS);
1390 // Handle defined weak def symbols which need to get a special ordinal
1391 if ( ((n_type & N_TYPE) == N_SECT) && ((n_type & N_EXT) != 0) && ((n_desc & N_WEAK_DEF) != 0) )
1392 libOrdinal = BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
1393 handler(sectInfo.sectAddr+i*elementSize, true, libOrdinal, symbolName, weakImport, lazy, selfModifyingStub, stop);
1394 }
1395 sectionStop = stop;
1396 });
1397 }
1398
1399 int MachOAnalyzer::libOrdinalFromDesc(uint16_t n_desc) const
1400 {
1401 // -flat_namespace is always flat lookup
1402 if ( (this->flags & MH_TWOLEVEL) == 0 )
1403 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
1404
1405 // extract byte from undefined symbol entry
1406 int libIndex = GET_LIBRARY_ORDINAL(n_desc);
1407 switch ( libIndex ) {
1408 case SELF_LIBRARY_ORDINAL:
1409 return BIND_SPECIAL_DYLIB_SELF;
1410
1411 case DYNAMIC_LOOKUP_ORDINAL:
1412 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
1413
1414 case EXECUTABLE_ORDINAL:
1415 return BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
1416 }
1417
1418 return libIndex;
1419 }
1420
1421 bool MachOAnalyzer::validBindInfo(Diagnostics& diag, const char* path) const
1422 {
1423 forEachBind(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1424 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1425 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1426 uint8_t type, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
1427 if ( invalidBindState(diag, opcodeName, path, leInfo, segments, segIndexSet, libraryOrdinalSet, dylibCount,
1428 libOrdinal, ptrSize, segmentIndex, segmentOffset, type, symbolName) ) {
1429 stop = true;
1430 }
1431 }, ^(const char* symbolName) {
1432 }, ^() { });
1433 return diag.noError();
1434 }
1435
1436 bool MachOAnalyzer::invalidBindState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1437 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint32_t ptrSize,
1438 uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, const char* symbolName) const
1439 {
1440 if ( !segIndexSet ) {
1441 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path, opcodeName);
1442 return true;
1443 }
1444 if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
1445 diag.error("in '%s' %s segment index %d too large", path, opcodeName, segmentIndex);
1446 return true;
1447 }
1448 if ( segmentOffset > (segments[segmentIndex].vmSize-ptrSize) ) {
1449 diag.error("in '%s' %s current segment offset 0x%08llX beyond segment size (0x%08llX)", path, opcodeName, segmentOffset, segments[segmentIndex].vmSize);
1450 return true;
1451 }
1452 if ( symbolName == NULL ) {
1453 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", path, opcodeName);
1454 return true;
1455 }
1456 if ( !libraryOrdinalSet ) {
1457 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL", path, opcodeName);
1458 return true;
1459 }
1460 if ( libOrdinal > (int)dylibCount ) {
1461 diag.error("in '%s' %s has library ordinal too large (%d) max (%d)", path, opcodeName, libOrdinal, dylibCount);
1462 return true;
1463 }
1464 if ( libOrdinal < BIND_SPECIAL_DYLIB_WEAK_LOOKUP ) {
1465 diag.error("in '%s' %s has unknown library special ordinal (%d)", path, opcodeName, libOrdinal);
1466 return true;
1467 }
1468 switch ( type ) {
1469 case BIND_TYPE_POINTER:
1470 if ( !segments[segmentIndex].writable() ) {
1471 diag.error("in '%s' %s pointer bind is in non-writable segment", path, opcodeName);
1472 return true;
1473 }
1474 if ( segments[segmentIndex].executable() && enforceFormat(Malformed::executableData) ) {
1475 diag.error("in '%s' %s pointer bind is in executable segment", path, opcodeName);
1476 return true;
1477 }
1478 break;
1479 case BIND_TYPE_TEXT_ABSOLUTE32:
1480 case BIND_TYPE_TEXT_PCREL32:
1481 if ( !segments[segmentIndex].textRelocs ) {
1482 diag.error("in '%s' %s text bind is in segment that does not support text relocations", path, opcodeName);
1483 return true;
1484 }
1485 if ( segments[segmentIndex].writable() ) {
1486 diag.error("in '%s' %s text bind is in writable segment", path, opcodeName);
1487 return true;
1488 }
1489 if ( !segments[segmentIndex].executable() ) {
1490 diag.error("in '%s' %s pointer bind is in non-executable segment", path, opcodeName);
1491 return true;
1492 }
1493 break;
1494 default:
1495 diag.error("in '%s' %s unknown bind type %d", path, opcodeName, type);
1496 return true;
1497 }
1498 return false;
1499 }
1500
1501 void MachOAnalyzer::forEachBind(Diagnostics& diag, void (^handler)(uint64_t runtimeOffset, int libOrdinal, const char* symbolName,
1502 bool weakImport, bool lazyBind, uint64_t addend, bool& stop),
1503 void (^strongHandler)(const char* symbolName),
1504 void (^missingLazyBindHandler)()) const
1505 {
1506 __block bool startVmAddrSet = false;
1507 __block uint64_t startVmAddr = 0;
1508 forEachBind(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1509 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1510 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1511 uint8_t type, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
1512 if ( !startVmAddrSet ) {
1513 for (int i=0; i <= segmentIndex; ++i) {
1514 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1515 startVmAddr = segments[i].vmAddr;
1516 startVmAddrSet = true;
1517 break;
1518 }
1519 }
1520 }
1521 uint64_t bindVmOffset = segments[segmentIndex].vmAddr + segmentOffset;
1522 uint64_t runtimeOffset = bindVmOffset - startVmAddr;
1523 handler(runtimeOffset, libOrdinal, symbolName, weakImport, lazyBind, addend, stop);
1524 }, ^(const char* symbolName) {
1525 strongHandler(symbolName);
1526 }, ^() {
1527 missingLazyBindHandler();
1528 });
1529 }
1530
1531 void MachOAnalyzer::forEachBind(Diagnostics& diag,
1532 void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1533 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1534 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type,
1535 const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop),
1536 void (^strongHandler)(const char* symbolName),
1537 void (^missingLazyBindHandler)()) const
1538 {
1539 const uint32_t ptrSize = this->pointerSize();
1540 bool stop = false;
1541
1542 LinkEditInfo leInfo;
1543 getLinkEditPointers(diag, leInfo);
1544 if ( diag.hasError() )
1545 return;
1546
1547 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1548 getAllSegmentsInfos(diag, segmentsInfo);
1549 if ( diag.hasError() )
1550 return;
1551
1552
1553
1554 const uint32_t dylibCount = dependentDylibCount();
1555
1556 if ( leInfo.dyldInfo != nullptr ) {
1557 // process bind opcodes
1558 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
1559 const uint8_t* end = p + leInfo.dyldInfo->bind_size;
1560 uint8_t type = 0;
1561 uint64_t segmentOffset = 0;
1562 uint8_t segmentIndex = 0;
1563 const char* symbolName = NULL;
1564 int libraryOrdinal = 0;
1565 bool segIndexSet = false;
1566 bool libraryOrdinalSet = false;
1567
1568 int64_t addend = 0;
1569 uint64_t count;
1570 uint64_t skip;
1571 bool weakImport = false;
1572 while ( !stop && diag.noError() && (p < end) ) {
1573 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1574 uint8_t opcode = *p & BIND_OPCODE_MASK;
1575 ++p;
1576 switch (opcode) {
1577 case BIND_OPCODE_DONE:
1578 stop = true;
1579 break;
1580 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1581 libraryOrdinal = immediate;
1582 libraryOrdinalSet = true;
1583 break;
1584 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1585 libraryOrdinal = (int)read_uleb128(diag, p, end);
1586 libraryOrdinalSet = true;
1587 break;
1588 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1589 // the special ordinals are negative numbers
1590 if ( immediate == 0 )
1591 libraryOrdinal = 0;
1592 else {
1593 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1594 libraryOrdinal = signExtended;
1595 }
1596 libraryOrdinalSet = true;
1597 break;
1598 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1599 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1600 symbolName = (char*)p;
1601 while (*p != '\0')
1602 ++p;
1603 ++p;
1604 break;
1605 case BIND_OPCODE_SET_TYPE_IMM:
1606 type = immediate;
1607 break;
1608 case BIND_OPCODE_SET_ADDEND_SLEB:
1609 addend = read_sleb128(diag, p, end);
1610 break;
1611 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1612 segmentIndex = immediate;
1613 segmentOffset = read_uleb128(diag, p, end);
1614 segIndexSet = true;
1615 break;
1616 case BIND_OPCODE_ADD_ADDR_ULEB:
1617 segmentOffset += read_uleb128(diag, p, end);
1618 break;
1619 case BIND_OPCODE_DO_BIND:
1620 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1621 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1622 segmentOffset += ptrSize;
1623 break;
1624 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1625 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1626 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1627 segmentOffset += read_uleb128(diag, p, end) + ptrSize;
1628 break;
1629 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1630 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1631 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1632 segmentOffset += immediate*ptrSize + ptrSize;
1633 break;
1634 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1635 count = read_uleb128(diag, p, end);
1636 skip = read_uleb128(diag, p, end);
1637 for (uint32_t i=0; i < count; ++i) {
1638 handler("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1639 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1640 segmentOffset += skip + ptrSize;
1641 if ( stop )
1642 break;
1643 }
1644 break;
1645 default:
1646 diag.error("bad bind opcode 0x%02X", *p);
1647 }
1648 }
1649 if ( diag.hasError() )
1650 return;
1651
1652 // process lazy bind opcodes
1653 uint32_t lazyDoneCount = 0;
1654 uint32_t lazyBindCount = 0;
1655 if ( leInfo.dyldInfo->lazy_bind_size != 0 ) {
1656 p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->lazy_bind_off);
1657 end = p + leInfo.dyldInfo->lazy_bind_size;
1658 type = BIND_TYPE_POINTER;
1659 segmentOffset = 0;
1660 segmentIndex = 0;
1661 symbolName = NULL;
1662 libraryOrdinal = 0;
1663 segIndexSet = false;
1664 libraryOrdinalSet= false;
1665 addend = 0;
1666 weakImport = false;
1667 stop = false;
1668 while ( !stop && diag.noError() && (p < end) ) {
1669 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1670 uint8_t opcode = *p & BIND_OPCODE_MASK;
1671 ++p;
1672 switch (opcode) {
1673 case BIND_OPCODE_DONE:
1674 // this opcode marks the end of each lazy pointer binding
1675 ++lazyDoneCount;
1676 break;
1677 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1678 libraryOrdinal = immediate;
1679 libraryOrdinalSet = true;
1680 break;
1681 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1682 libraryOrdinal = (int)read_uleb128(diag, p, end);
1683 libraryOrdinalSet = true;
1684 break;
1685 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1686 // the special ordinals are negative numbers
1687 if ( immediate == 0 )
1688 libraryOrdinal = 0;
1689 else {
1690 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1691 libraryOrdinal = signExtended;
1692 }
1693 libraryOrdinalSet = true;
1694 break;
1695 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1696 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1697 symbolName = (char*)p;
1698 while (*p != '\0')
1699 ++p;
1700 ++p;
1701 break;
1702 case BIND_OPCODE_SET_ADDEND_SLEB:
1703 addend = read_sleb128(diag, p, end);
1704 break;
1705 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1706 segmentIndex = immediate;
1707 segmentOffset = read_uleb128(diag, p, end);
1708 segIndexSet = true;
1709 break;
1710 case BIND_OPCODE_DO_BIND:
1711 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1712 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, true, addend, stop);
1713 segmentOffset += ptrSize;
1714 ++lazyBindCount;
1715 break;
1716 case BIND_OPCODE_SET_TYPE_IMM:
1717 case BIND_OPCODE_ADD_ADDR_ULEB:
1718 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1719 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1720 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1721 default:
1722 diag.error("bad lazy bind opcode 0x%02X", opcode);
1723 break;
1724 }
1725 }
1726 if ( lazyDoneCount > lazyBindCount+7 )
1727 missingLazyBindHandler();
1728 // diag.error("lazy bind opcodes missing binds");
1729 }
1730 if ( diag.hasError() )
1731 return;
1732
1733 // process weak bind info
1734 if ( leInfo.dyldInfo->weak_bind_size != 0 ) {
1735 p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->weak_bind_off);
1736 end = p + leInfo.dyldInfo->weak_bind_size;
1737 type = BIND_TYPE_POINTER;
1738 segmentOffset = 0;
1739 segmentIndex = 0;
1740 symbolName = NULL;
1741 libraryOrdinal = BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
1742 segIndexSet = false;
1743 libraryOrdinalSet= true;
1744 addend = 0;
1745 weakImport = false;
1746 stop = false;
1747 while ( !stop && diag.noError() && (p < end) ) {
1748 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1749 uint8_t opcode = *p & BIND_OPCODE_MASK;
1750 ++p;
1751 switch (opcode) {
1752 case BIND_OPCODE_DONE:
1753 stop = true;
1754 break;
1755 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1756 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1757 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1758 diag.error("unexpected dylib ordinal in weak_bind");
1759 break;
1760 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1761 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1762 symbolName = (char*)p;
1763 while (*p != '\0')
1764 ++p;
1765 ++p;
1766 if ( immediate & BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION ) {
1767 strongHandler(symbolName);
1768 }
1769 break;
1770 case BIND_OPCODE_SET_TYPE_IMM:
1771 type = immediate;
1772 break;
1773 case BIND_OPCODE_SET_ADDEND_SLEB:
1774 addend = read_sleb128(diag, p, end);
1775 break;
1776 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1777 segmentIndex = immediate;
1778 segmentOffset = read_uleb128(diag, p, end);
1779 segIndexSet = true;
1780 break;
1781 case BIND_OPCODE_ADD_ADDR_ULEB:
1782 segmentOffset += read_uleb128(diag, p, end);
1783 break;
1784 case BIND_OPCODE_DO_BIND:
1785 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1786 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1787 segmentOffset += ptrSize;
1788 break;
1789 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1790 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1791 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1792 segmentOffset += read_uleb128(diag, p, end) + ptrSize;
1793 break;
1794 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1795 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1796 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1797 segmentOffset += immediate*ptrSize + ptrSize;
1798 break;
1799 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1800 count = read_uleb128(diag, p, end);
1801 skip = read_uleb128(diag, p, end);
1802 for (uint32_t i=0; i < count; ++i) {
1803 handler("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1804 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1805 segmentOffset += skip + ptrSize;
1806 if ( stop )
1807 break;
1808 }
1809 break;
1810 default:
1811 diag.error("bad bind opcode 0x%02X", *p);
1812 }
1813 }
1814 }
1815 }
1816 else if ( leInfo.chainedFixups != nullptr ) {
1817 // binary uses chained fixups, so do nothing
1818 }
1819 else {
1820 // old binary, process external relocations
1821 const uint64_t relocsStartAddress = relocBaseAddress(segmentsInfo, leInfo.layout.linkeditSegIndex);
1822 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->extreloff);
1823 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nextrel];
1824 bool is64Bit = is64() ;
1825 const uint8_t relocSize = (is64Bit ? 3 : 2);
1826 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
1827 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
1828 const struct nlist* symbols32 = (struct nlist*)symbolTable;
1829 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1830 uint32_t symCount = leInfo.symTab->nsyms;
1831 uint32_t poolSize = leInfo.symTab->strsize;
1832 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
1833 if ( reloc->r_length != relocSize ) {
1834 diag.error("external relocation has wrong r_length");
1835 break;
1836 }
1837 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
1838 diag.error("external relocation has wrong r_type");
1839 break;
1840 }
1841 uint32_t segIndex = 0;
1842 uint64_t segOffset = 0;
1843 if ( segIndexAndOffsetForAddress(relocsStartAddress+reloc->r_address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1844 uint32_t symbolIndex = reloc->r_symbolnum;
1845 if ( symbolIndex > symCount ) {
1846 diag.error("external relocation has out of range r_symbolnum");
1847 break;
1848 }
1849 else {
1850 uint32_t strOffset = is64Bit ? symbols64[symbolIndex].n_un.n_strx : symbols32[symbolIndex].n_un.n_strx;
1851 uint16_t n_desc = is64Bit ? symbols64[symbolIndex].n_desc : symbols32[symbolIndex].n_desc;
1852 uint8_t n_type = is64Bit ? symbols64[symbolIndex].n_type : symbols32[symbolIndex].n_type;
1853 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
1854 if ( strOffset >= poolSize ) {
1855 diag.error("external relocation has r_symbolnum=%d which has out of range n_strx", symbolIndex);
1856 break;
1857 }
1858 else {
1859 const char* symbolName = stringPool + strOffset;
1860 bool weakImport = (n_desc & N_WEAK_REF);
1861 const uint8_t* content = (uint8_t*)this + segmentsInfo[segIndex].vmAddr - leInfo.layout.textUnslidVMAddr + segOffset;
1862 uint64_t addend = is64Bit ? *((uint64_t*)content) : *((uint32_t*)content);
1863 // Handle defined weak def symbols which need to get a special ordinal
1864 if ( ((n_type & N_TYPE) == N_SECT) && ((n_type & N_EXT) != 0) && ((n_desc & N_WEAK_DEF) != 0) )
1865 libOrdinal = BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
1866 handler("external relocation", leInfo, segmentsInfo, true, true, dylibCount, libOrdinal,
1867 ptrSize, segIndex, segOffset, BIND_TYPE_POINTER, symbolName, weakImport, false, addend, stop);
1868 }
1869 }
1870 }
1871 else {
1872 diag.error("local relocation has out of range r_address");
1873 break;
1874 }
1875 }
1876 // then process indirect symbols
1877 forEachIndirectPointer(diag, ^(uint64_t address, bool bind, int bindLibOrdinal,
1878 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
1879 if ( !bind )
1880 return;
1881 uint32_t segIndex = 0;
1882 uint64_t segOffset = 0;
1883 if ( segIndexAndOffsetForAddress(address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1884 handler("indirect symbol", leInfo, segmentsInfo, true, true, dylibCount, bindLibOrdinal,
1885 ptrSize, segIndex, segOffset, BIND_TYPE_POINTER, bindSymbolName, bindWeakImport, bindLazy, 0, indStop);
1886 }
1887 else {
1888 diag.error("indirect symbol has out of range address");
1889 indStop = true;
1890 }
1891 });
1892 }
1893
1894 }
1895
1896 bool MachOAnalyzer::validChainedFixupsInfo(Diagnostics& diag, const char* path) const
1897 {
1898 LinkEditInfo leInfo;
1899 getLinkEditPointers(diag, leInfo);
1900 if ( diag.hasError() )
1901 return false;
1902
1903 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1904 getAllSegmentsInfos(diag, segmentsInfo);
1905 if ( diag.hasError() )
1906 return false;
1907
1908 // validate dyld_chained_fixups_header
1909 const dyld_chained_fixups_header* chainsHeader = (dyld_chained_fixups_header*)getLinkEditContent(leInfo.layout, leInfo.chainedFixups->dataoff);
1910 if ( chainsHeader->fixups_version != 0 ) {
1911 diag.error("chained fixups, unknown header version");
1912 return false;
1913 }
1914 if ( chainsHeader->starts_offset >= leInfo.chainedFixups->datasize ) {
1915 diag.error("chained fixups, starts_offset exceeds LC_DYLD_CHAINED_FIXUPS size");
1916 return false;
1917 }
1918 if ( chainsHeader->imports_offset >= leInfo.chainedFixups->datasize ) {
1919 diag.error("chained fixups, imports_offset exceeds LC_DYLD_CHAINED_FIXUPS size");
1920 return false;
1921 }
1922 if ( chainsHeader->imports_count >= 0xFFFF ) {
1923 diag.error("chained fixups, imports_count exceeds 64K");
1924 return false;
1925 }
1926 uint32_t formatEntrySize;
1927 switch ( chainsHeader->imports_format ) {
1928 case DYLD_CHAINED_IMPORT:
1929 formatEntrySize = sizeof(dyld_chained_import);
1930 break;
1931 case DYLD_CHAINED_IMPORT_ADDEND:
1932 formatEntrySize = sizeof(dyld_chained_import_addend);
1933 break;
1934 case DYLD_CHAINED_IMPORT_ADDEND64:
1935 formatEntrySize = sizeof(dyld_chained_import_addend64);
1936 break;
1937 default:
1938 diag.error("chained fixups, unknown imports_format");
1939 return false;
1940 }
1941 if ( greaterThanAddOrOverflow(chainsHeader->imports_offset, (formatEntrySize * chainsHeader->imports_count), chainsHeader->symbols_offset) ) {
1942 diag.error("chained fixups, imports array overlaps symbols");
1943 return false;
1944 }
1945 if ( chainsHeader->symbols_format != 0 ) {
1946 diag.error("chained fixups, symbols_format unknown");
1947 return false;
1948 }
1949
1950 // validate dyld_chained_starts_in_image
1951 const dyld_chained_starts_in_image* startsInfo = (dyld_chained_starts_in_image*)((uint8_t*)chainsHeader + chainsHeader->starts_offset);
1952 if ( startsInfo->seg_count != leInfo.layout.linkeditSegIndex+1 ) {
1953 diag.error("chained fixups, seg_count does not match number of segments");
1954 return false;
1955 }
1956 const uint64_t baseAddress = preferredLoadAddress();
1957 uint32_t maxValidPointerSeen = 0;
1958 const uint8_t* endOfStarts = (uint8_t*)chainsHeader + chainsHeader->imports_offset;
1959 for (uint32_t i=0; i < startsInfo->seg_count; ++i) {
1960 uint32_t segInfoOffset = startsInfo->seg_info_offset[i];
1961 // 0 offset means this segment has no fixups
1962 if ( segInfoOffset == 0 )
1963 continue;
1964 const dyld_chained_starts_in_segment* segInfo = (dyld_chained_starts_in_segment*)((uint8_t*)startsInfo + segInfoOffset);
1965 if ( segInfo->size > (endOfStarts - (uint8_t*)segInfo) ) {
1966 diag.error("chained fixups, dyld_chained_starts_in_segment for segment #%d overruns imports table", i);
1967 return false;
1968 }
1969
1970 // validate dyld_chained_starts_in_segment
1971 if ( (segInfo->page_size != 0x1000) && (segInfo->page_size != 0x4000) ) {
1972 diag.error("chained fixups, page_size not 4KB or 16KB in segment #%d", i);
1973 return false;
1974 }
1975 if ( segInfo->pointer_format > 10 ) {
1976 diag.error("chained fixups, unknown pointer_format %d in segment #%d", segInfo->pointer_format, i);
1977 return false;
1978 }
1979 if ( segInfo->segment_offset != (segmentsInfo[i].vmAddr - baseAddress) ) {
1980 diag.error("chained fixups, segment_offset does not match vmaddr from LC_SEGMENT in segment #%d", i);
1981 return false;
1982 }
1983 if ( segInfo->max_valid_pointer != 0 ) {
1984 if ( maxValidPointerSeen == 0 ) {
1985 // record max_valid_pointer values seen
1986 maxValidPointerSeen = segInfo->max_valid_pointer;
1987 }
1988 else if ( maxValidPointerSeen != segInfo->max_valid_pointer ) {
1989 diag.error("chained fixups, different max_valid_pointer values seen in different segments");
1990 return false;
1991 }
1992 }
1993 // validate starts table in segment
1994 if ( offsetof(dyld_chained_starts_in_segment, page_start[segInfo->page_count]) > segInfo->size ) {
1995 diag.error("chained fixups, page_start array overflows size");
1996 return false;
1997 }
1998 uint32_t maxOverflowIndex = (uint32_t)(segInfo->size - offsetof(dyld_chained_starts_in_segment, page_start[segInfo->page_count]))/sizeof(uint16_t);
1999 for (int pageIndex=0; pageIndex < segInfo->page_count; ++pageIndex) {
2000 uint16_t offsetInPage = segInfo->page_start[pageIndex];
2001 if ( offsetInPage == DYLD_CHAINED_PTR_START_NONE )
2002 continue;
2003 if ( (offsetInPage & DYLD_CHAINED_PTR_START_MULTI) == 0 ) {
2004 // this is the offset into the page where the first fixup is
2005 if ( offsetInPage > segInfo->page_size ) {
2006 diag.error("chained fixups, in segment #%d page_start[%d]=0x%04X exceeds page size", i, pageIndex, offsetInPage);
2007 }
2008 }
2009 else {
2010 // this is actually an index into chain_starts[]
2011 uint32_t overflowIndex = offsetInPage & ~DYLD_CHAINED_PTR_START_MULTI;
2012 // now verify all starts are within the page and in ascending order
2013 uint16_t lastOffsetInPage = 0;
2014 do {
2015 if ( overflowIndex > maxOverflowIndex ) {
2016 diag.error("chain overflow index out of range %d (max=%d) in segment %s", overflowIndex, maxOverflowIndex, segmentName(i));
2017 return false;
2018 }
2019 offsetInPage = (segInfo->page_start[overflowIndex] & ~DYLD_CHAINED_PTR_START_LAST);
2020 if ( offsetInPage > segInfo->page_size ) {
2021 diag.error("chained fixups, in segment #%d overflow page_start[%d]=0x%04X exceeds page size", i, overflowIndex, offsetInPage);
2022 return false;
2023 }
2024 if ( (offsetInPage <= lastOffsetInPage) && (lastOffsetInPage != 0) ) {
2025 diag.error("chained fixups, in segment #%d overflow page_start[%d]=0x%04X is before previous at 0x%04X\n", i, overflowIndex, offsetInPage, lastOffsetInPage);
2026 return false;
2027 }
2028 lastOffsetInPage = offsetInPage;
2029 ++overflowIndex;
2030 } while ( (segInfo->page_start[overflowIndex] & DYLD_CHAINED_PTR_START_LAST) == 0 );
2031 }
2032 }
2033
2034 }
2035 // validate max_valid_pointer is larger than last segment
2036 if ( (maxValidPointerSeen != 0) && !inDyldCache() ) {
2037 uint64_t lastSegmentLastVMAddr = segmentsInfo[leInfo.layout.linkeditSegIndex-1].vmAddr + segmentsInfo[leInfo.layout.linkeditSegIndex-1].vmSize;
2038 if ( maxValidPointerSeen < lastSegmentLastVMAddr ) {
2039 diag.error("chained fixups, max_valid_pointer too small for image");
2040 return false;
2041 }
2042 }
2043
2044 return diag.noError();
2045 }
2046
2047 bool MachOAnalyzer::validChainedFixupsInfoOldArm64e(Diagnostics& diag, const char* path) const
2048 {
2049 __block uint32_t maxTargetCount = 0;
2050 __block uint32_t currentTargetCount = 0;
2051 parseOrgArm64eChainedFixups(diag,
2052 ^(uint32_t totalTargets, bool& stop) {
2053 maxTargetCount = totalTargets;
2054 },
2055 ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop) {
2056 if ( symbolName == NULL ) {
2057 diag.error("in '%s' missing BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", path);
2058 }
2059 else if ( !libraryOrdinalSet ) {
2060 diag.error("in '%s' missing BIND_OPCODE_SET_DYLIB_ORDINAL", path);
2061 }
2062 else if ( libOrdinal > (int)dylibCount ) {
2063 diag.error("in '%s' has library ordinal too large (%d) max (%d)", path, libOrdinal, dylibCount);
2064 }
2065 else if ( libOrdinal < BIND_SPECIAL_DYLIB_WEAK_LOOKUP ) {
2066 diag.error("in '%s' has unknown library special ordinal (%d)", path, libOrdinal);
2067 }
2068 else if ( type != BIND_TYPE_POINTER ) {
2069 diag.error("in '%s' unknown bind type %d", path, type);
2070 }
2071 else if ( currentTargetCount > maxTargetCount ) {
2072 diag.error("in '%s' chained target counts exceeds BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB", path);
2073 }
2074 ++currentTargetCount;
2075 if ( diag.hasError() )
2076 stop = true;
2077 },
2078 ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, uint16_t format, bool& stop) {
2079 if ( !segIndexSet ) {
2080 diag.error("in '%s' missing BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path);
2081 }
2082 else if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
2083 diag.error("in '%s' segment index %d too large", path, segmentIndex);
2084 }
2085 else if ( segmentOffset > (segments[segmentIndex].vmSize-8) ) {
2086 diag.error("in '%s' current segment offset 0x%08llX beyond segment size (0x%08llX)", path, segmentOffset, segments[segmentIndex].vmSize);
2087 }
2088 else if ( !segments[segmentIndex].writable() ) {
2089 diag.error("in '%s' pointer bind is in non-writable segment", path);
2090 }
2091 else if ( segments[segmentIndex].executable() ) {
2092 diag.error("in '%s' pointer bind is in executable segment", path);
2093 }
2094 if ( diag.hasError() )
2095 stop = true;
2096 }
2097 );
2098
2099 return diag.noError();
2100 }
2101
2102
2103
2104 void MachOAnalyzer::parseOrgArm64eChainedFixups(Diagnostics& diag, void (^targetCount)(uint32_t totalTargets, bool& stop),
2105 void (^addTarget)(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop),
2106 void (^addChainStart)(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, uint16_t format, bool& stop)) const
2107 {
2108 bool stop = false;
2109
2110 LinkEditInfo leInfo;
2111 getLinkEditPointers(diag, leInfo);
2112 if ( diag.hasError() )
2113 return;
2114
2115 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
2116 getAllSegmentsInfos(diag, segmentsInfo);
2117 if ( diag.hasError() )
2118 return;
2119
2120 const uint32_t dylibCount = dependentDylibCount();
2121
2122 if ( leInfo.dyldInfo != nullptr ) {
2123 // process bind opcodes
2124 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
2125 const uint8_t* end = p + leInfo.dyldInfo->bind_size;
2126 uint8_t type = 0;
2127 uint64_t segmentOffset = 0;
2128 uint8_t segmentIndex = 0;
2129 const char* symbolName = NULL;
2130 int libraryOrdinal = 0;
2131 bool segIndexSet = false;
2132 bool libraryOrdinalSet = false;
2133 uint64_t targetTableCount;
2134 uint64_t addend = 0;
2135 bool weakImport = false;
2136 while ( !stop && diag.noError() && (p < end) ) {
2137 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
2138 uint8_t opcode = *p & BIND_OPCODE_MASK;
2139 ++p;
2140 switch (opcode) {
2141 case BIND_OPCODE_DONE:
2142 stop = true;
2143 break;
2144 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
2145 libraryOrdinal = immediate;
2146 libraryOrdinalSet = true;
2147 break;
2148 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
2149 libraryOrdinal = (int)read_uleb128(diag, p, end);
2150 libraryOrdinalSet = true;
2151 break;
2152 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
2153 // the special ordinals are negative numbers
2154 if ( immediate == 0 )
2155 libraryOrdinal = 0;
2156 else {
2157 int8_t signExtended = BIND_OPCODE_MASK | immediate;
2158 libraryOrdinal = signExtended;
2159 }
2160 libraryOrdinalSet = true;
2161 break;
2162 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
2163 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
2164 symbolName = (char*)p;
2165 while (*p != '\0')
2166 ++p;
2167 ++p;
2168 break;
2169 case BIND_OPCODE_SET_TYPE_IMM:
2170 type = immediate;
2171 break;
2172 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
2173 segmentIndex = immediate;
2174 segmentOffset = read_uleb128(diag, p, end);
2175 segIndexSet = true;
2176 break;
2177 case BIND_OPCODE_SET_ADDEND_SLEB:
2178 addend = read_sleb128(diag, p, end);
2179 break;
2180 case BIND_OPCODE_DO_BIND:
2181 if ( addTarget )
2182 addTarget(leInfo, segmentsInfo, libraryOrdinalSet, dylibCount, libraryOrdinal, type, symbolName, addend, weakImport, stop);
2183 break;
2184 case BIND_OPCODE_THREADED:
2185 switch (immediate) {
2186 case BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB:
2187 targetTableCount = read_uleb128(diag, p, end);
2188 if ( targetTableCount > 65535 ) {
2189 diag.error("BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB size too large");
2190 stop = true;
2191 }
2192 else {
2193 if ( targetCount )
2194 targetCount((uint32_t)targetTableCount, stop);
2195 }
2196 break;
2197 case BIND_SUBOPCODE_THREADED_APPLY:
2198 if ( addChainStart )
2199 addChainStart(leInfo, segmentsInfo, segmentIndex, segIndexSet, segmentOffset, DYLD_CHAINED_PTR_ARM64E, stop);
2200 break;
2201 default:
2202 diag.error("bad BIND_OPCODE_THREADED sub-opcode 0x%02X", immediate);
2203 }
2204 break;
2205 default:
2206 diag.error("bad bind opcode 0x%02X", immediate);
2207 }
2208 }
2209 if ( diag.hasError() )
2210 return;
2211 }
2212 }
2213
2214 void MachOAnalyzer::forEachChainedFixupTarget(Diagnostics& diag, void (^callback)(int libOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop)) const
2215 {
2216 LinkEditInfo leInfo;
2217 getLinkEditPointers(diag, leInfo);
2218 if ( diag.hasError() )
2219 return;
2220
2221 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
2222 getAllSegmentsInfos(diag, segmentsInfo);
2223 if ( diag.hasError() )
2224 return;
2225
2226 bool stop = false;
2227 if ( leInfo.dyldInfo != nullptr ) {
2228 parseOrgArm64eChainedFixups(diag, nullptr, ^(const LinkEditInfo& leInfo2, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount,
2229 int libOrdinal, uint8_t type, const char* symbolName, uint64_t fixAddend, bool weakImport, bool& stopChain) {
2230 callback(libOrdinal, symbolName, fixAddend, weakImport, stopChain);
2231 }, nullptr);
2232 }
2233 else if ( leInfo.chainedFixups != nullptr ) {
2234 const dyld_chained_fixups_header* header = (dyld_chained_fixups_header*)getLinkEditContent(leInfo.layout, leInfo.chainedFixups->dataoff);
2235 if ( (header->imports_offset > leInfo.chainedFixups->datasize) || (header->symbols_offset > leInfo.chainedFixups->datasize) ) {
2236 diag.error("malformed import table");
2237 return;
2238 }
2239 const dyld_chained_import* imports;
2240 const dyld_chained_import_addend* importsA32;
2241 const dyld_chained_import_addend64* importsA64;
2242 const char* symbolsPool = (char*)header + header->symbols_offset;
2243 uint32_t maxSymbolOffset = leInfo.chainedFixups->datasize - header->symbols_offset;
2244 int libOrdinal;
2245 switch (header->imports_format) {
2246 case DYLD_CHAINED_IMPORT:
2247 imports = (dyld_chained_import*)((uint8_t*)header + header->imports_offset);
2248 for (uint32_t i=0; i < header->imports_count && !stop; ++i) {
2249 const char* symbolName = &symbolsPool[imports[i].name_offset];
2250 if ( imports[i].name_offset > maxSymbolOffset ) {
2251 diag.error("malformed import table, string overflow");
2252 return;
2253 }
2254 uint8_t libVal = imports[i].lib_ordinal;
2255 if ( libVal > 0xF0 )
2256 libOrdinal = (int8_t)libVal;
2257 else
2258 libOrdinal = libVal;
2259 callback(libOrdinal, symbolName, 0, imports[i].weak_import, stop);
2260 }
2261 break;
2262 case DYLD_CHAINED_IMPORT_ADDEND:
2263 importsA32 = (dyld_chained_import_addend*)((uint8_t*)header + header->imports_offset);
2264 for (uint32_t i=0; i < header->imports_count && !stop; ++i) {
2265 const char* symbolName = &symbolsPool[importsA32[i].name_offset];
2266 if ( importsA32[i].name_offset > maxSymbolOffset ) {
2267 diag.error("malformed import table, string overflow");
2268 return;
2269 }
2270 uint8_t libVal = importsA32[i].lib_ordinal;
2271 if ( libVal > 0xF0 )
2272 libOrdinal = (int8_t)libVal;
2273 else
2274 libOrdinal = libVal;
2275 callback(libOrdinal, symbolName, importsA32[i].addend, importsA32[i].weak_import, stop);
2276 }
2277 break;
2278 case DYLD_CHAINED_IMPORT_ADDEND64:
2279 importsA64 = (dyld_chained_import_addend64*)((uint8_t*)header + header->imports_offset);
2280 for (uint32_t i=0; i < header->imports_count && !stop; ++i) {
2281 const char* symbolName = &symbolsPool[importsA64[i].name_offset];
2282 if ( importsA64[i].name_offset > maxSymbolOffset ) {
2283 diag.error("malformed import table, string overflow");
2284 return;
2285 }
2286 uint16_t libVal = importsA64[i].lib_ordinal;
2287 if ( libVal > 0xFFF0 )
2288 libOrdinal = (int16_t)libVal;
2289 else
2290 libOrdinal = libVal;
2291 callback(libOrdinal, symbolName, importsA64[i].addend, importsA64[i].weak_import, stop);
2292 }
2293 break;
2294 default:
2295 diag.error("unknown imports format");
2296 return;
2297 }
2298 }
2299 }
2300
2301 uint32_t MachOAnalyzer::segmentCount() const
2302 {
2303 __block uint32_t count = 0;
2304 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2305 ++count;
2306 });
2307 return count;
2308 }
2309
2310 bool MachOAnalyzer::hasCodeSignature(uint32_t& fileOffset, uint32_t& size) const
2311 {
2312 fileOffset = 0;
2313 size = 0;
2314
2315 Diagnostics diag;
2316 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2317 if ( cmd->cmd == LC_CODE_SIGNATURE ) {
2318 const linkedit_data_command* sigCmd = (linkedit_data_command*)cmd;
2319 fileOffset = sigCmd->dataoff;
2320 size = sigCmd->datasize;
2321 stop = true;
2322 }
2323 });
2324 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2325
2326 // early exist if no LC_CODE_SIGNATURE
2327 if ( fileOffset == 0 )
2328 return false;
2329
2330 // <rdar://problem/13622786> ignore code signatures in macOS binaries built with pre-10.9 tools
2331 if ( (this->cputype == CPU_TYPE_X86_64) || (this->cputype == CPU_TYPE_I386) ) {
2332 __block bool foundPlatform = false;
2333 __block bool badSignature = false;
2334 forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) {
2335 foundPlatform = true;
2336 if ( (platform == Platform::macOS) && (sdk < 0x000A0900) )
2337 badSignature = true;
2338 });
2339 return foundPlatform && !badSignature;
2340 }
2341
2342 return true;
2343 }
2344
2345 bool MachOAnalyzer::hasProgramVars(Diagnostics& diag, uint32_t& progVarsOffset) const
2346 {
2347 if ( this->filetype != MH_EXECUTE )
2348 return false;
2349
2350 // macOS 10.8+ program uses LC_MAIN and ProgramVars are in libdyld.dylib
2351 // macOS 10.6 -> 10.7 ProgramVars are in __program_vars section in main executable
2352 // macOS 10.5 ProgramVars are in __dyld section in main executable and 7 pointers in size
2353 // macOS 10.4 and earlier ProgramVars need to be looked up by name in nlist of main executable
2354
2355 uint32_t offset;
2356 bool usesCRT;
2357 if ( getEntry(offset, usesCRT) && usesCRT ) {
2358 // is pre-10.8 program
2359 uint64_t sectionSize;
2360 if ( const void* progVarsSection = findSectionContent("__DATA", "__program_vars", sectionSize) ) {
2361 progVarsOffset = (uint32_t)((uint8_t*)progVarsSection - (uint8_t*)this);
2362 return true;
2363 }
2364 else if ( const void* dyldSection = findSectionContent("__DATA", "__dyld", sectionSize) ) {
2365 if ( sectionSize >= 7*pointerSize() ) {
2366 progVarsOffset = (uint32_t)((uint8_t*)dyldSection - (uint8_t*)this) + 2*pointerSize();
2367 return true;
2368 }
2369 }
2370 diag.error("pre-macOS 10.5 binaries not supported");
2371 return true;
2372 }
2373 return false;
2374 }
2375
2376 bool MachOAnalyzer::hasInitializer(Diagnostics& diag, bool contentRebased, const void* dyldCache) const
2377 {
2378 __block bool result = false;
2379 forEachInitializer(diag, contentRebased, ^(uint32_t offset) {
2380 result = true;
2381 }, dyldCache);
2382 return result;
2383 }
2384
2385 void MachOAnalyzer::forEachInitializerPointerSection(Diagnostics& diag, void (^callback)(uint32_t sectionOffset, uint32_t sectionSize, const uint8_t* content, bool& stop)) const
2386 {
2387 const unsigned ptrSize = pointerSize();
2388 const uint64_t baseAddress = preferredLoadAddress();
2389 const uint64_t slide = (uint64_t)this - baseAddress;
2390 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& sectStop) {
2391 if ( (info.sectFlags & SECTION_TYPE) == S_MOD_INIT_FUNC_POINTERS ) {
2392 if ( (info.sectSize % ptrSize) != 0 ) {
2393 diag.error("initializer section %s/%s has bad size", info.segInfo.segName, info.sectName);
2394 sectStop = true;
2395 return;
2396 }
2397 if ( malformedSectionRange ) {
2398 diag.error("initializer section %s/%s extends beyond its segment", info.segInfo.segName, info.sectName);
2399 sectStop = true;
2400 return;
2401 }
2402 const uint8_t* content = (uint8_t*)(info.sectAddr + slide);
2403 if ( ((long)content % ptrSize) != 0 ) {
2404 diag.error("initializer section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2405 sectStop = true;
2406 return;
2407 }
2408 callback((uint32_t)(info.sectAddr - baseAddress), (uint32_t)info.sectSize, content, sectStop);
2409 }
2410 });
2411 }
2412
2413 struct VIS_HIDDEN SegmentRanges
2414 {
2415 struct SegmentRange {
2416 uint64_t vmAddrStart;
2417 uint64_t vmAddrEnd;
2418 uint32_t fileSize;
2419 };
2420
2421 bool contains(uint64_t vmAddr) const {
2422 for (const SegmentRange& range : segments) {
2423 if ( (range.vmAddrStart <= vmAddr) && (vmAddr < range.vmAddrEnd) )
2424 return true;
2425 }
2426 return false;
2427 }
2428
2429 private:
2430 SegmentRange localAlloc[1];
2431
2432 public:
2433 dyld3::OverflowSafeArray<SegmentRange> segments { localAlloc, sizeof(localAlloc) / sizeof(localAlloc[0]) };
2434 };
2435
2436 void MachOAnalyzer::forEachInitializer(Diagnostics& diag, bool contentRebased, void (^callback)(uint32_t offset), const void* dyldCache) const
2437 {
2438 __block SegmentRanges executableSegments;
2439 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2440 if ( (info.protections & VM_PROT_EXECUTE) != 0 ) {
2441 executableSegments.segments.push_back({ info.vmAddr, info.vmAddr + info.vmSize, (uint32_t)info.fileSize });
2442 }
2443 });
2444
2445 if (executableSegments.segments.empty()) {
2446 diag.error("no exeutable segments");
2447 return;
2448 }
2449
2450 uint64_t loadAddress = preferredLoadAddress();
2451 intptr_t slide = getSlide();
2452
2453 // if dylib linked with -init linker option, that initializer is first
2454 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2455 if ( cmd->cmd == LC_ROUTINES ) {
2456 const routines_command* routines = (routines_command*)cmd;
2457 uint64_t dashInit = routines->init_address;
2458 if ( executableSegments.contains(dashInit) )
2459 callback((uint32_t)(dashInit - loadAddress));
2460 else
2461 diag.error("-init does not point within __TEXT segment");
2462 }
2463 else if ( cmd->cmd == LC_ROUTINES_64 ) {
2464 const routines_command_64* routines = (routines_command_64*)cmd;
2465 uint64_t dashInit = routines->init_address;
2466 if ( executableSegments.contains(dashInit) )
2467 callback((uint32_t)(dashInit - loadAddress));
2468 else
2469 diag.error("-init does not point within __TEXT segment");
2470 }
2471 });
2472
2473 // next any function pointers in mod-init section
2474 const unsigned ptrSize = pointerSize();
2475 const bool useChainedFixups = hasChainedFixups();
2476 const uint16_t pointerFormat = useChainedFixups ? this->chainedPointerFormat() : 0;
2477 forEachInitializerPointerSection(diag, ^(uint32_t sectionOffset, uint32_t sectionSize, const uint8_t* content, bool& stop) {
2478 if ( ptrSize == 8 ) {
2479 const uint64_t* initsStart = (uint64_t*)content;
2480 const uint64_t* initsEnd = (uint64_t*)((uint8_t*)content + sectionSize);
2481 for (const uint64_t* p=initsStart; p < initsEnd; ++p) {
2482 uint64_t anInit = *p;
2483 if ( contentRebased ) {
2484 // The function pointer may have been signed. Strip the signature if that is the case
2485 #if __has_feature(ptrauth_calls)
2486 anInit = (uint64_t)__builtin_ptrauth_strip((void*)anInit, ptrauth_key_asia);
2487 #endif
2488 anInit -= slide;
2489 }
2490 else if ( useChainedFixups ) {
2491 uint64_t initFuncRuntimeOffset;
2492 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2493 if ( aChainedInit->isRebase(pointerFormat, loadAddress, initFuncRuntimeOffset) ) {
2494 anInit = loadAddress+initFuncRuntimeOffset;
2495 }
2496 else {
2497 diag.error("initializer is not rebased");
2498 stop = true;
2499 break;
2500 }
2501 }
2502 if ( !executableSegments.contains(anInit) ) {
2503 diag.error("initializer 0x%0llX does not point within executable segment", anInit);
2504 stop = true;
2505 break;
2506 }
2507 callback((uint32_t)(anInit - loadAddress));
2508 }
2509 }
2510 else {
2511 const uint32_t* initsStart = (uint32_t*)content;
2512 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + sectionSize);
2513 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2514 uint32_t anInit = *p;
2515 if ( contentRebased ) {
2516 anInit -= slide;
2517 }
2518 else if ( useChainedFixups ) {
2519 uint64_t initFuncRuntimeOffset;
2520 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2521 if ( aChainedInit->isRebase(pointerFormat, loadAddress, initFuncRuntimeOffset) ) {
2522 anInit = (uint32_t)(loadAddress+initFuncRuntimeOffset);
2523 }
2524 else {
2525 diag.error("initializer is not rebased");
2526 stop = true;
2527 break;
2528 }
2529 }
2530 if ( !executableSegments.contains(anInit) ) {
2531 diag.error("initializer 0x%0X does not point within executable segment", anInit);
2532 stop = true;
2533 break;
2534 }
2535 callback(anInit - (uint32_t)loadAddress);
2536 }
2537 }
2538 });
2539
2540 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2541 if ( (info.sectFlags & SECTION_TYPE) != S_INIT_FUNC_OFFSETS )
2542 return;
2543 const uint8_t* content = (uint8_t*)(info.sectAddr + slide);
2544 if ( info.segInfo.writable() ) {
2545 diag.error("initializer offsets section %s/%s must be in read-only segment", info.segInfo.segName, info.sectName);
2546 stop = true;
2547 return;
2548 }
2549 if ( (info.sectSize % 4) != 0 ) {
2550 diag.error("initializer offsets section %s/%s has bad size", info.segInfo.segName, info.sectName);
2551 stop = true;
2552 return;
2553 }
2554 if ( malformedSectionRange ) {
2555 diag.error("initializer offsets section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2556 stop = true;
2557 return;
2558 }
2559 if ( (info.sectAddr % 4) != 0 ) {
2560 diag.error("initializer offsets section %s/%s is not 4-byte aligned", info.segInfo.segName, info.sectName);
2561 stop = true;
2562 return;
2563 }
2564 const uint32_t* initsStart = (uint32_t*)content;
2565 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + info.sectSize);
2566 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2567 uint32_t anInitOffset = *p;
2568 if ( !executableSegments.contains(loadAddress + anInitOffset) ) {
2569 diag.error("initializer 0x%08X does not an offset to an executable segment", anInitOffset);
2570 stop = true;
2571 break;
2572 }
2573 callback(anInitOffset);
2574 }
2575 });
2576 }
2577
2578 bool MachOAnalyzer::hasTerminators(Diagnostics& diag, bool contentRebased) const
2579 {
2580 __block bool result = false;
2581 forEachTerminator(diag, contentRebased, ^(uint32_t offset) {
2582 result = true;
2583 });
2584 return result;
2585 }
2586
2587 void MachOAnalyzer::forEachTerminator(Diagnostics& diag, bool contentRebased, void (^callback)(uint32_t offset)) const
2588 {
2589 __block SegmentRanges executableSegments;
2590 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2591 if ( (info.protections & VM_PROT_EXECUTE) != 0 ) {
2592 executableSegments.segments.push_back({ info.vmAddr, info.vmAddr + info.vmSize, (uint32_t)info.fileSize });
2593 }
2594 });
2595
2596 if (executableSegments.segments.empty()) {
2597 diag.error("no exeutable segments");
2598 return;
2599 }
2600
2601 uint64_t loadAddress = preferredLoadAddress();
2602 intptr_t slide = getSlide();
2603
2604 // next any function pointers in mod-term section
2605 const unsigned ptrSize = pointerSize();
2606 const bool useChainedFixups = hasChainedFixups();
2607 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2608 if ( (info.sectFlags & SECTION_TYPE) == S_MOD_TERM_FUNC_POINTERS ) {
2609 uint64_t initFuncRuntimeOffset;
2610 const uint16_t pointerFormat = useChainedFixups ? this->chainedPointerFormat() : 0;
2611 const uint8_t* content;
2612 content = (uint8_t*)(info.sectAddr + slide);
2613 if ( (info.sectSize % ptrSize) != 0 ) {
2614 diag.error("terminator section %s/%s has bad size", info.segInfo.segName, info.sectName);
2615 stop = true;
2616 return;
2617 }
2618 if ( malformedSectionRange ) {
2619 diag.error("terminator section %s/%s extends beyond its segment", info.segInfo.segName, info.sectName);
2620 stop = true;
2621 return;
2622 }
2623 if ( ((long)content % ptrSize) != 0 ) {
2624 diag.error("terminator section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2625 stop = true;
2626 return;
2627 }
2628 if ( ptrSize == 8 ) {
2629 const uint64_t* initsStart = (uint64_t*)content;
2630 const uint64_t* initsEnd = (uint64_t*)((uint8_t*)content + info.sectSize);
2631 for (const uint64_t* p=initsStart; p < initsEnd; ++p) {
2632 uint64_t anInit = *p;
2633 if ( contentRebased ) {
2634 // The function pointer may have been signed. Strip the signature if that is the case
2635 #if __has_feature(ptrauth_calls)
2636 anInit = (uint64_t)__builtin_ptrauth_strip((void*)anInit, ptrauth_key_asia);
2637 #endif
2638 anInit -= slide;
2639 }
2640 else if ( useChainedFixups ) {
2641 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2642 if ( aChainedInit->isRebase(pointerFormat, loadAddress, initFuncRuntimeOffset) ) {
2643 anInit = loadAddress+initFuncRuntimeOffset;
2644 }
2645 else {
2646 diag.error("terminator is not rebased");
2647 stop = true;
2648 break;
2649 }
2650 }
2651 if ( !executableSegments.contains(anInit) ) {
2652 diag.error("terminator 0x%0llX does not point within executable segment", anInit);
2653 stop = true;
2654 break;
2655 }
2656 callback((uint32_t)(anInit - loadAddress));
2657 }
2658 }
2659 else {
2660 const uint32_t* initsStart = (uint32_t*)content;
2661 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + info.sectSize);
2662 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2663 uint32_t anInit = *p;
2664 if ( contentRebased ) {
2665 anInit -= slide;
2666 }
2667 else if ( useChainedFixups ) {
2668 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2669 if ( aChainedInit->isRebase(pointerFormat, loadAddress, initFuncRuntimeOffset) ) {
2670 anInit = (uint32_t)(loadAddress+initFuncRuntimeOffset);
2671 }
2672 else {
2673 diag.error("terminator is not rebased");
2674 stop = true;
2675 break;
2676 }
2677 }
2678 if ( !executableSegments.contains(anInit) ) {
2679 diag.error("terminator 0x%0X does not point within executable segment", anInit);
2680 stop = true;
2681 break;
2682 }
2683 callback(anInit - (uint32_t)loadAddress);
2684 }
2685 }
2686 }
2687 });
2688 }
2689
2690
2691
2692 void MachOAnalyzer::forEachRPath(void (^callback)(const char* rPath, bool& stop)) const
2693 {
2694 Diagnostics diag;
2695 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2696 if ( cmd->cmd == LC_RPATH ) {
2697 const char* rpath = (char*)cmd + ((struct rpath_command*)cmd)->path.offset;
2698 callback(rpath, stop);
2699 }
2700 });
2701 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2702 }
2703
2704
2705 bool MachOAnalyzer::hasObjC() const
2706 {
2707 __block bool result = false;
2708 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2709 if ( (strcmp(info.sectName, "__objc_imageinfo") == 0) && (strncmp(info.segInfo.segName, "__DATA", 6) == 0) ) {
2710 result = true;
2711 stop = true;
2712 }
2713 if ( (this->cputype == CPU_TYPE_I386) && (strcmp(info.sectName, "__image_info") == 0) && (strcmp(info.segInfo.segName, "__OBJC") == 0) ) {
2714 result = true;
2715 stop = true;
2716 }
2717 });
2718 return result;
2719 }
2720
2721 bool MachOAnalyzer::hasPlusLoadMethod(Diagnostics& diag) const
2722 {
2723 __block bool result = false;
2724 if ( (this->cputype == CPU_TYPE_I386) && supportsPlatform(Platform::macOS) ) {
2725 // old objc runtime has no special section for +load methods, scan for string
2726 int64_t slide = getSlide();
2727 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2728 if ( ( (info.sectFlags & SECTION_TYPE) == S_CSTRING_LITERALS ) ) {
2729 if ( malformedSectionRange ) {
2730 diag.error("cstring section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2731 stop = true;
2732 return;
2733 }
2734 const uint8_t* content = (uint8_t*)(info.sectAddr + slide);
2735 const char* s = (char*)content;
2736 const char* end = s + info.sectSize;
2737 while ( s < end ) {
2738 if ( strcmp(s, "load") == 0 ) {
2739 result = true;
2740 stop = true;
2741 return;
2742 }
2743 while (*s != '\0' )
2744 ++s;
2745 ++s;
2746 }
2747 }
2748 });
2749 }
2750 else {
2751 // in new objc runtime compiler puts classes/categories with +load method in specical section
2752 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2753 if ( strncmp(info.segInfo.segName, "__DATA", 6) != 0 )
2754 return;
2755 if ( (strcmp(info.sectName, "__objc_nlclslist") == 0) || (strcmp(info.sectName, "__objc_nlcatlist") == 0)) {
2756 result = true;
2757 stop = true;
2758 }
2759 });
2760 }
2761 return result;
2762 }
2763
2764 const void* MachOAnalyzer::getRebaseOpcodes(uint32_t& size) const
2765 {
2766 Diagnostics diag;
2767 LinkEditInfo leInfo;
2768 getLinkEditPointers(diag, leInfo);
2769 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2770 return nullptr;
2771
2772 size = leInfo.dyldInfo->rebase_size;
2773 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->rebase_off);
2774 }
2775
2776 const void* MachOAnalyzer::getBindOpcodes(uint32_t& size) const
2777 {
2778 Diagnostics diag;
2779 LinkEditInfo leInfo;
2780 getLinkEditPointers(diag, leInfo);
2781 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2782 return nullptr;
2783
2784 size = leInfo.dyldInfo->bind_size;
2785 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
2786 }
2787
2788 const void* MachOAnalyzer::getLazyBindOpcodes(uint32_t& size) const
2789 {
2790 Diagnostics diag;
2791 LinkEditInfo leInfo;
2792 getLinkEditPointers(diag, leInfo);
2793 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2794 return nullptr;
2795
2796 size = leInfo.dyldInfo->lazy_bind_size;
2797 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->lazy_bind_off);
2798 }
2799
2800 const void* MachOAnalyzer::getSplitSeg(uint32_t& size) const
2801 {
2802 Diagnostics diag;
2803 LinkEditInfo leInfo;
2804 getLinkEditPointers(diag, leInfo);
2805 if ( diag.hasError() || (leInfo.splitSegInfo == nullptr) )
2806 return nullptr;
2807
2808 size = leInfo.splitSegInfo->datasize;
2809 return getLinkEditContent(leInfo.layout, leInfo.splitSegInfo->dataoff);
2810 }
2811
2812
2813 uint64_t MachOAnalyzer::segAndOffsetToRuntimeOffset(uint8_t targetSegIndex, uint64_t targetSegOffset) const
2814 {
2815 __block uint64_t textVmAddr = 0;
2816 __block uint64_t result = 0;
2817 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2818 if ( strcmp(info.segName, "__TEXT") == 0 )
2819 textVmAddr = info.vmAddr;
2820 if ( info.segIndex == targetSegIndex ) {
2821 result = (info.vmAddr - textVmAddr) + targetSegOffset;
2822 }
2823 });
2824 return result;
2825 }
2826
2827 bool MachOAnalyzer::hasLazyPointers(uint32_t& runtimeOffset, uint32_t& size) const
2828 {
2829 size = 0;
2830 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2831 if ( (info.sectFlags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS ) {
2832 runtimeOffset = (uint32_t)(info.sectAddr - preferredLoadAddress());
2833 size = (uint32_t)info.sectSize;
2834 stop = true;
2835 }
2836 });
2837 return (size != 0);
2838 }
2839
2840 uint64_t MachOAnalyzer::preferredLoadAddress() const
2841 {
2842 __block uint64_t textVmAddr = 0;
2843 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2844 if ( strcmp(info.segName, "__TEXT") == 0 ) {
2845 textVmAddr = info.vmAddr;
2846 stop = true;
2847 }
2848 });
2849 return textVmAddr;
2850 }
2851
2852
2853 bool MachOAnalyzer::getEntry(uint32_t& offset, bool& usesCRT) const
2854 {
2855 Diagnostics diag;
2856 offset = 0;
2857 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2858 if ( cmd->cmd == LC_MAIN ) {
2859 entry_point_command* mainCmd = (entry_point_command*)cmd;
2860 usesCRT = false;
2861 offset = (uint32_t)mainCmd->entryoff;
2862 stop = true;
2863 }
2864 else if ( cmd->cmd == LC_UNIXTHREAD ) {
2865 stop = true;
2866 usesCRT = true;
2867 uint64_t startAddress = entryAddrFromThreadCmd((thread_command*)cmd);
2868 offset = (uint32_t)(startAddress - preferredLoadAddress());
2869 }
2870 });
2871 return (offset != 0);
2872 }
2873
2874 uint64_t MachOAnalyzer::entryAddrFromThreadCmd(const thread_command* cmd) const
2875 {
2876 assert(cmd->cmd == LC_UNIXTHREAD);
2877 const uint32_t* regs32 = (uint32_t*)(((char*)cmd) + 16);
2878 const uint64_t* regs64 = (uint64_t*)(((char*)cmd) + 16);
2879 uint64_t startAddress = 0;
2880 switch ( this->cputype ) {
2881 case CPU_TYPE_I386:
2882 startAddress = regs32[10]; // i386_thread_state_t.eip
2883 break;
2884 case CPU_TYPE_X86_64:
2885 startAddress = regs64[16]; // x86_thread_state64_t.rip
2886 break;
2887 case CPU_TYPE_ARM:
2888 startAddress = regs32[15]; // arm_thread_state_t.pc
2889 break;
2890 case CPU_TYPE_ARM64:
2891 startAddress = regs64[32]; // arm_thread_state64_t.__pc
2892 break;
2893 }
2894 return startAddress;
2895 }
2896
2897
2898 void MachOAnalyzer::forEachInterposingSection(Diagnostics& diag, void (^handler)(uint64_t vmOffset, uint64_t vmSize, bool& stop)) const
2899 {
2900 const unsigned ptrSize = pointerSize();
2901 const unsigned entrySize = 2 * ptrSize;
2902 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2903 if ( ((info.sectFlags & SECTION_TYPE) == S_INTERPOSING) || ((strcmp(info.sectName, "__interpose") == 0) && (strcmp(info.segInfo.segName, "__DATA") == 0)) ) {
2904 if ( info.sectSize % entrySize != 0 ) {
2905 diag.error("interposing section %s/%s has bad size", info.segInfo.segName, info.sectName);
2906 stop = true;
2907 return;
2908 }
2909 if ( malformedSectionRange ) {
2910 diag.error("interposing section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2911 stop = true;
2912 return;
2913 }
2914 if ( (info.sectAddr % ptrSize) != 0 ) {
2915 diag.error("interposing section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2916 stop = true;
2917 return;
2918 }
2919 handler(info.sectAddr - preferredLoadAddress(), info.sectSize, stop);
2920 }
2921 });
2922 }
2923
2924 void MachOAnalyzer::forEachDOFSection(Diagnostics& diag, void (^callback)(uint32_t offset)) const
2925 {
2926 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2927 if ( ( (info.sectFlags & SECTION_TYPE) == S_DTRACE_DOF ) && !malformedSectionRange ) {
2928 callback((uint32_t)(info.sectAddr - info.segInfo.vmAddr));
2929 }
2930 });
2931 }
2932
2933 void MachOAnalyzer::forEachCDHash(void (^handler)(const uint8_t cdHash[20])) const
2934 {
2935 Diagnostics diag;
2936 LinkEditInfo leInfo;
2937 getLinkEditPointers(diag, leInfo);
2938 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2939 return;
2940
2941 forEachCDHashOfCodeSignature(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff),
2942 leInfo.codeSig->datasize,
2943 handler);
2944 }
2945
2946 bool MachOAnalyzer::isRestricted() const
2947 {
2948 __block bool result = false;
2949 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2950 if ( (strcmp(info.segInfo.segName, "__RESTRICT") == 0) && (strcmp(info.sectName, "__restrict") == 0) ) {
2951 result = true;
2952 stop = true;
2953 }
2954 });
2955 return result;
2956 }
2957
2958 bool MachOAnalyzer::usesLibraryValidation() const
2959 {
2960 Diagnostics diag;
2961 LinkEditInfo leInfo;
2962 getLinkEditPointers(diag, leInfo);
2963 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2964 return false;
2965
2966 // check for CS_REQUIRE_LV in CS_CodeDirectory.flags
2967 __block bool requiresLV = false;
2968 forEachCodeDirectoryBlob(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff),
2969 leInfo.codeSig->datasize,
2970 ^(const void *cdBuffer) {
2971 const CS_CodeDirectory* cd = (const CS_CodeDirectory*)cdBuffer;
2972 requiresLV |= (htonl(cd->flags) & CS_REQUIRE_LV);
2973 });
2974
2975 return requiresLV;
2976 }
2977
2978 bool MachOAnalyzer::canHavePrecomputedDlopenClosure(const char* path, void (^failureReason)(const char*)) const
2979 {
2980 __block bool retval = true;
2981
2982 // only dylibs can go in cache
2983 if ( (this->filetype != MH_DYLIB) && (this->filetype != MH_BUNDLE) ) {
2984 retval = false;
2985 failureReason("not MH_DYLIB or MH_BUNDLE");
2986 }
2987
2988 // flat namespace files cannot go in cache
2989 if ( (this->flags & MH_TWOLEVEL) == 0 ) {
2990 retval = false;
2991 failureReason("not built with two level namespaces");
2992 }
2993
2994 // can only depend on other dylibs with absolute paths
2995 __block bool allDepPathsAreGood = true;
2996 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
2997 if ( loadPath[0] != '/' ) {
2998 allDepPathsAreGood = false;
2999 stop = true;
3000 }
3001 });
3002 if ( !allDepPathsAreGood ) {
3003 retval = false;
3004 failureReason("depends on dylibs that are not absolute paths");
3005 }
3006
3007 // dylibs with interposing info cannot have dlopen closure pre-computed
3008 __block bool hasInterposing = false;
3009 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool &stop) {
3010 if ( ((info.sectFlags & SECTION_TYPE) == S_INTERPOSING) || ((strcmp(info.sectName, "__interpose") == 0) && (strcmp(info.segInfo.segName, "__DATA") == 0)) )
3011 hasInterposing = true;
3012 });
3013 if ( hasInterposing ) {
3014 retval = false;
3015 failureReason("has interposing tuples");
3016 }
3017
3018 // images that use dynamic_lookup, bundle_loader, or have weak-defs cannot have dlopen closure pre-computed
3019 Diagnostics diag;
3020 auto checkBind = ^(int libOrdinal, bool& stop) {
3021 switch (libOrdinal) {
3022 case BIND_SPECIAL_DYLIB_WEAK_LOOKUP:
3023 failureReason("has weak externals");
3024 retval = false;
3025 stop = true;
3026 break;
3027 case BIND_SPECIAL_DYLIB_FLAT_LOOKUP:
3028 failureReason("has dynamic_lookup binds");
3029 retval = false;
3030 stop = true;
3031 break;
3032 case BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE:
3033 failureReason("has reference to main executable (bundle loader)");
3034 retval = false;
3035 stop = true;
3036 break;
3037 }
3038 };
3039
3040 if (hasChainedFixups()) {
3041 forEachChainedFixupTarget(diag, ^(int libOrdinal, const char *symbolName, uint64_t addend, bool weakImport, bool &stop) {
3042 checkBind(libOrdinal, stop);
3043 });
3044 } else {
3045 forEachBind(diag, ^(uint64_t runtimeOffset, int libOrdinal, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
3046 checkBind(libOrdinal, stop);
3047 },
3048 ^(const char* symbolName) {
3049 },
3050 ^() {
3051 });
3052 }
3053
3054 // special system dylib overrides cannot have closure pre-computed
3055 if ( strncmp(path, "/usr/lib/system/introspection/", 30) == 0 ) {
3056 retval = false;
3057 failureReason("override of OS dylib");
3058 }
3059
3060 // Don't precompute iOSMac for now until dyld3 support is there.
3061 if ( supportsPlatform(Platform::iOSMac) && !supportsPlatform(Platform::macOS) ) {
3062 retval = false;
3063 failureReason("UIKitForMac binary");
3064 }
3065
3066 return retval;
3067 }
3068
3069
3070 bool MachOAnalyzer::hasUnalignedPointerFixups() const
3071 {
3072 // only look at 64-bit architectures
3073 if ( pointerSize() == 4 )
3074 return false;
3075
3076 __block Diagnostics diag;
3077 __block bool result = false;
3078 if ( hasChainedFixups() ) {
3079 withChainStarts(diag, chainStartsOffset(), ^(const dyld_chained_starts_in_image* startsInfo) {
3080 forEachFixupInAllChains(diag, startsInfo, false, ^(MachOLoaded::ChainedFixupPointerOnDisk* fixupLoc, const dyld_chained_starts_in_segment* segInfo, bool& fixupsStop) {
3081 if ( ((long)(fixupLoc) & 7) != 0 ) {
3082 result = true;
3083 fixupsStop = true;
3084 }
3085 });
3086 });
3087 }
3088 else {
3089 forEachBind(diag, ^(uint64_t runtimeOffset, int libOrdinal, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
3090 if ( (runtimeOffset & 7) != 0 ) {
3091 result = true;
3092 stop = true;
3093 }
3094 },
3095 ^(const char* symbolName) {
3096 },
3097 ^() {
3098 });
3099 forEachRebase(diag, true, ^(uint64_t runtimeOffset, bool& stop) {
3100 if ( (runtimeOffset & 7) != 0 ) {
3101 result = true;
3102 stop = true;
3103 }
3104 });
3105 }
3106
3107 return result;
3108 }
3109
3110 void MachOAnalyzer::recurseTrie(Diagnostics& diag, const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
3111 OverflowSafeArray<char>& cummulativeString, int curStrOffset, bool& stop, ExportsCallback callback) const
3112 {
3113 if ( p >= end ) {
3114 diag.error("malformed trie, node past end");
3115 return;
3116 }
3117 const uint64_t terminalSize = read_uleb128(diag, p, end);
3118 const uint8_t* children = p + terminalSize;
3119 if ( terminalSize != 0 ) {
3120 uint64_t imageOffset = 0;
3121 uint64_t flags = read_uleb128(diag, p, end);
3122 uint64_t other = 0;
3123 const char* importName = nullptr;
3124 if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
3125 other = read_uleb128(diag, p, end); // dylib ordinal
3126 importName = (char*)p;
3127 }
3128 else {
3129 imageOffset = read_uleb128(diag, p, end);
3130 if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
3131 other = read_uleb128(diag, p, end);
3132 else
3133 other = 0;
3134 }
3135 if ( diag.hasError() )
3136 return;
3137 callback(cummulativeString.begin(), imageOffset, flags, other, importName, stop);
3138 if ( stop )
3139 return;
3140 }
3141 if ( children > end ) {
3142 diag.error("malformed trie, terminalSize extends beyond trie data");
3143 return;
3144 }
3145 const uint8_t childrenCount = *children++;
3146 const uint8_t* s = children;
3147 for (uint8_t i=0; i < childrenCount; ++i) {
3148 int edgeStrLen = 0;
3149 while (*s != '\0') {
3150 cummulativeString.resize(curStrOffset+edgeStrLen + 1);
3151 cummulativeString[curStrOffset+edgeStrLen] = *s++;
3152 ++edgeStrLen;
3153 if ( s > end ) {
3154 diag.error("malformed trie node, child node extends past end of trie\n");
3155 return;
3156 }
3157 }
3158 cummulativeString.resize(curStrOffset+edgeStrLen + 1);
3159 cummulativeString[curStrOffset+edgeStrLen] = *s++;
3160 uint64_t childNodeOffset = read_uleb128(diag, s, end);
3161 if (childNodeOffset == 0) {
3162 diag.error("malformed trie, childNodeOffset==0");
3163 return;
3164 }
3165 recurseTrie(diag, start, start+childNodeOffset, end, cummulativeString, curStrOffset+edgeStrLen, stop, callback);
3166 if ( diag.hasError() || stop )
3167 return;
3168 }
3169 }
3170
3171 void MachOAnalyzer::forEachExportedSymbol(Diagnostics& diag, ExportsCallback callback) const
3172 {
3173 LinkEditInfo leInfo;
3174 getLinkEditPointers(diag, leInfo);
3175 if ( diag.hasError() )
3176 return;
3177 uint64_t trieSize;
3178 if ( const uint8_t* trieStart = getExportsTrie(leInfo, trieSize) ) {
3179 const uint8_t* trieEnd = trieStart + trieSize;
3180 bool stop = false;
3181 STACK_ALLOC_OVERFLOW_SAFE_ARRAY(char, cummulativeString, 4096);
3182 recurseTrie(diag, trieStart, trieStart, trieEnd, cummulativeString, 0, stop, callback);
3183 }
3184 }
3185
3186 bool MachOAnalyzer::canBePlacedInDyldCache(const char* path, void (^failureReason)(const char*)) const
3187 {
3188 if (!MachOFile::canBePlacedInDyldCache(path, failureReason))
3189 return false;
3190 if ( !(isArch("x86_64") || isArch("x86_64h")) )
3191 return true;
3192
3193 if ( hasChainedFixups() )
3194 return true;
3195
3196 __block bool rebasesOk = true;
3197 Diagnostics diag;
3198 uint64_t startVMAddr = preferredLoadAddress();
3199 uint64_t endVMAddr = startVMAddr + mappedSize();
3200 forEachRebase(diag, false, ^(uint64_t runtimeOffset, bool &stop) {
3201 // We allow TBI for x86_64 dylibs, but then require that the remainder of the offset
3202 // is a 32-bit offset from the mach-header.
3203 uint64_t value = *(uint64_t*)((uint8_t*)this + runtimeOffset);
3204 value &= 0x00FFFFFFFFFFFFFFULL;
3205 if ( (value < startVMAddr) || (value >= endVMAddr) ) {
3206 failureReason("rebase value out of range of dylib");
3207 rebasesOk = false;
3208 stop = true;
3209 return;
3210 }
3211
3212 // Also error if the rebase location is anything other than 4/8 byte aligned
3213 if ( (runtimeOffset & 0x3) != 0 ) {
3214 failureReason("rebase value is not 4-byte aligned");
3215 rebasesOk = false;
3216 stop = true;
3217 return;
3218 }
3219 });
3220 return rebasesOk;
3221 }
3222
3223 uint64_t MachOAnalyzer::chainStartsOffset() const
3224 {
3225 const dyld_chained_fixups_header* header = chainedFixupsHeader();
3226 // old arm64e binary has no dyld_chained_fixups_header
3227 if ( header == nullptr )
3228 return 0;
3229 return header->starts_offset + ((uint8_t*)header - (uint8_t*)this);
3230 }
3231
3232 const dyld_chained_fixups_header* MachOAnalyzer::chainedFixupsHeader() const
3233 {
3234 Diagnostics diag;
3235 LinkEditInfo leInfo;
3236 getLinkEditPointers(diag, leInfo);
3237 if ( diag.hasError() || (leInfo.chainedFixups == nullptr) )
3238 return nullptr;
3239
3240 return (dyld_chained_fixups_header*)getLinkEditContent(leInfo.layout, leInfo.chainedFixups->dataoff);
3241 }
3242
3243 uint16_t MachOAnalyzer::chainedPointerFormat(const dyld_chained_fixups_header* header)
3244 {
3245 const dyld_chained_starts_in_image* startsInfo = (dyld_chained_starts_in_image*)((uint8_t*)header + header->starts_offset);
3246 for (uint32_t i=0; i < startsInfo->seg_count; ++i) {
3247 uint32_t segInfoOffset = startsInfo->seg_info_offset[i];
3248 // 0 offset means this segment has no fixups
3249 if ( segInfoOffset == 0 )
3250 continue;
3251 const dyld_chained_starts_in_segment* segInfo = (dyld_chained_starts_in_segment*)((uint8_t*)startsInfo + segInfoOffset);
3252 if ( segInfo->page_count != 0 )
3253 return segInfo->pointer_format;
3254 }
3255 return 0; // no chains (perhaps no __DATA segment)
3256 }
3257
3258 uint16_t MachOAnalyzer::chainedPointerFormat() const
3259 {
3260 const dyld_chained_fixups_header* header = chainedFixupsHeader();
3261 if ( header != nullptr ) {
3262 // get pointer format from chain info struct in LINKEDIT
3263 return chainedPointerFormat(header);
3264 }
3265 assert(this->cputype == CPU_TYPE_ARM64 && this->cpusubtype == CPU_SUBTYPE_ARM64E && "chainedPointerFormat() called on non-chained binary");
3266 return DYLD_CHAINED_PTR_ARM64E;
3267 }
3268
3269
3270 #if (BUILDING_DYLD || BUILDING_LIBDYLD) && !__arm64e__
3271 #define SUPPORT_OLD_ARM64E_FORMAT 0
3272 #else
3273 #define SUPPORT_OLD_ARM64E_FORMAT 1
3274 #endif
3275
3276 // find dyld_chained_starts_in_image* in image
3277 // if old arm64e binary, synthesize dyld_chained_starts_in_image*
3278 void MachOAnalyzer::withChainStarts(Diagnostics& diag, uint64_t startsStructOffsetHint, void (^callback)(const dyld_chained_starts_in_image*)) const
3279 {
3280 if ( startsStructOffsetHint != 0 ) {
3281 // we have a pre-computed offset into LINKEDIT for dyld_chained_starts_in_image
3282 callback((dyld_chained_starts_in_image*)((uint8_t*)this + startsStructOffsetHint));
3283 return;
3284 }
3285
3286 LinkEditInfo leInfo;
3287 getLinkEditPointers(diag, leInfo);
3288 if ( diag.hasError() )
3289 return;
3290
3291 if ( leInfo.chainedFixups != nullptr ) {
3292 // find dyld_chained_starts_in_image from dyld_chained_fixups_header
3293 const dyld_chained_fixups_header* header = (dyld_chained_fixups_header*)getLinkEditContent(leInfo.layout, leInfo.chainedFixups->dataoff);
3294 callback((dyld_chained_starts_in_image*)((uint8_t*)header + header->starts_offset));
3295 }
3296 #if SUPPORT_OLD_ARM64E_FORMAT
3297 // don't want this code in non-arm64e dyld because it causes a stack protector which dereferences a GOT pointer before GOT is set up
3298 else if ( (leInfo.dyldInfo != nullptr) && (this->cputype == CPU_TYPE_ARM64) && (this->cpusubtype == CPU_SUBTYPE_ARM64E) ) {
3299 // old arm64e binary, create a dyld_chained_starts_in_image for caller
3300 uint64_t baseAddress = preferredLoadAddress();
3301 BLOCK_ACCCESSIBLE_ARRAY(uint8_t, buffer, leInfo.dyldInfo->bind_size + 512);
3302 dyld_chained_starts_in_image* header = (dyld_chained_starts_in_image*)buffer;
3303 header->seg_count = leInfo.layout.linkeditSegIndex;
3304 for (uint32_t i=0; i < header->seg_count; ++i)
3305 header->seg_info_offset[i] = 0;
3306 __block uint8_t curSegIndex = 0;
3307 __block dyld_chained_starts_in_segment* curSeg = (dyld_chained_starts_in_segment*)(&(header->seg_info_offset[header->seg_count]));
3308 parseOrgArm64eChainedFixups(diag, nullptr, nullptr, ^(const LinkEditInfo& leInfo2, const SegmentInfo segments[], uint8_t segmentIndex,
3309 bool segIndexSet, uint64_t segmentOffset, uint16_t format, bool& stop) {
3310 uint32_t pageIndex = (uint32_t)(segmentOffset/0x1000);
3311 if ( segmentIndex != curSegIndex ) {
3312 if ( curSegIndex == 0 ) {
3313 header->seg_info_offset[segmentIndex] = (uint32_t)((uint8_t*)curSeg - buffer);
3314 }
3315 else {
3316 header->seg_info_offset[segmentIndex] = (uint32_t)((uint8_t*)(&curSeg->page_start[curSeg->page_count]) - buffer);
3317 curSeg = (dyld_chained_starts_in_segment*)((uint8_t*)header+header->seg_info_offset[segmentIndex]);
3318 }
3319 curSeg->page_count = 0;
3320 curSegIndex = segmentIndex;
3321 }
3322 while ( curSeg->page_count != pageIndex ) {
3323 curSeg->page_start[curSeg->page_count] = 0xFFFF;
3324 curSeg->page_count++;
3325 }
3326 curSeg->size = (uint32_t)((uint8_t*)(&curSeg->page_start[pageIndex]) - (uint8_t*)curSeg);
3327 curSeg->page_size = 0x1000; // old arm64e encoding used 4KB pages
3328 curSeg->pointer_format = DYLD_CHAINED_PTR_ARM64E;
3329 curSeg->segment_offset = segments[segmentIndex].vmAddr - baseAddress;
3330 curSeg->max_valid_pointer = 0;
3331 curSeg->page_count = pageIndex+1;
3332 curSeg->page_start[pageIndex] = segmentOffset & 0xFFF;
3333 //fprintf(stderr, "segment_offset=0x%llX, vmAddr=0x%llX\n", curSeg->segment_offset, segments[segmentIndex].vmAddr );
3334 //printf("segIndex=%d, segOffset=0x%08llX, page_start[%d]=0x%04X, page_start[%d]=0x%04X\n",
3335 // segmentIndex, segmentOffset, pageIndex, curSeg->page_start[pageIndex], pageIndex-1, pageIndex ? curSeg->page_start[pageIndex-1] : 0);
3336 });
3337 callback(header);
3338 }
3339 #endif
3340 else {
3341 diag.error("image does not use chained fixups");
3342 }
3343 }
3344
3345 MachOAnalyzer::ObjCInfo MachOAnalyzer::getObjCInfo() const
3346 {
3347 __block ObjCInfo result;
3348 result.selRefCount = 0;
3349 result.classDefCount = 0;
3350 result.protocolDefCount = 0;
3351
3352 const uint32_t ptrSize = pointerSize();
3353 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3354 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) == 0 ) {
3355 if ( strcmp(sectInfo.sectName, "__objc_selrefs") == 0 )
3356 result.selRefCount += (sectInfo.sectSize/ptrSize);
3357 else if ( strcmp(sectInfo.sectName, "__objc_classlist") == 0 )
3358 result.classDefCount += (sectInfo.sectSize/ptrSize);
3359 else if ( strcmp(sectInfo.sectName, "__objc_protolist") == 0 )
3360 result.protocolDefCount += (sectInfo.sectSize/ptrSize);
3361 }
3362 else if ( (this->cputype == CPU_TYPE_I386) && (strcmp(sectInfo.segInfo.segName, "__OBJC") == 0) ) {
3363 if ( strcmp(sectInfo.sectName, "__message_refs") == 0 )
3364 result.selRefCount += (sectInfo.sectSize/4);
3365 else if ( strcmp(sectInfo.sectName, "__class") == 0 )
3366 result.classDefCount += (sectInfo.sectSize/48);
3367 else if ( strcmp(sectInfo.sectName, "__protocol") == 0 )
3368 result.protocolDefCount += (sectInfo.sectSize/20);
3369 }
3370 });
3371
3372 return result;
3373 }
3374
3375 // Convert from a (possibly) live pointer to a vmAddr
3376 static uint64_t convertToVMAddr(uint64_t value, MachOAnalyzer::VMAddrConverter vmAddrConverter) {
3377 if ( vmAddrConverter.contentRebased ) {
3378 // The value may have been signed. Strip the signature if that is the case
3379 #if __has_feature(ptrauth_calls)
3380 value = (uint64_t)__builtin_ptrauth_strip((void*)value, ptrauth_key_asia);
3381 #endif
3382 value -= vmAddrConverter.slide;
3383 }
3384 else if ( vmAddrConverter.chainedPointerFormat != 0 ) {
3385 auto* chainedValue = (MachOAnalyzer::ChainedFixupPointerOnDisk*)&value;
3386 uint64_t targetRuntimeOffset;
3387 if ( chainedValue->isRebase(vmAddrConverter.chainedPointerFormat, vmAddrConverter.preferredLoadAddress,
3388 targetRuntimeOffset) ) {
3389 value = vmAddrConverter.preferredLoadAddress + targetRuntimeOffset;
3390 }
3391 }
3392
3393 return value;
3394 }
3395
3396 uint64_t MachOAnalyzer::ObjCClassInfo::getReadOnlyDataField(ObjCClassInfo::ReadOnlyDataField field, uint32_t pointerSize) const {
3397 if (pointerSize == 8) {
3398 typedef uint64_t PtrTy;
3399 struct class_ro_t {
3400 uint32_t flags;
3401 uint32_t instanceStart;
3402 // Note there is 4-bytes of alignment padding between instanceSize and ivarLayout
3403 // on 64-bit archs, but no padding on 32-bit archs.
3404 // This union is a way to model that.
3405 union {
3406 uint32_t instanceSize;
3407 PtrTy pad;
3408 } instanceSize;
3409 PtrTy ivarLayoutVMAddr;
3410 PtrTy nameVMAddr;
3411 PtrTy baseMethodsVMAddr;
3412 PtrTy baseProtocolsVMAddr;
3413 PtrTy ivarsVMAddr;
3414 PtrTy weakIvarLayoutVMAddr;
3415 PtrTy basePropertiesVMAddr;
3416 };
3417 const class_ro_t* classData = (const class_ro_t*)(dataVMAddr + vmAddrConverter.slide);
3418 switch (field) {
3419 case ObjCClassInfo::ReadOnlyDataField::name:
3420 return convertToVMAddr(classData->nameVMAddr, vmAddrConverter);
3421 case ObjCClassInfo::ReadOnlyDataField::baseMethods:
3422 return convertToVMAddr(classData->baseMethodsVMAddr, vmAddrConverter);
3423 case ObjCClassInfo::ReadOnlyDataField::baseProperties:
3424 return convertToVMAddr(classData->basePropertiesVMAddr, vmAddrConverter);
3425 case ObjCClassInfo::ReadOnlyDataField::flags:
3426 return classData->flags;
3427 }
3428 } else {
3429 typedef uint32_t PtrTy;
3430 struct class_ro_t {
3431 uint32_t flags;
3432 uint32_t instanceStart;
3433 // Note there is 4-bytes of alignment padding between instanceSize and ivarLayout
3434 // on 64-bit archs, but no padding on 32-bit archs.
3435 // This union is a way to model that.
3436 union {
3437 uint32_t instanceSize;
3438 PtrTy pad;
3439 } instanceSize;
3440 PtrTy ivarLayoutVMAddr;
3441 PtrTy nameVMAddr;
3442 PtrTy baseMethodsVMAddr;
3443 PtrTy baseProtocolsVMAddr;
3444 PtrTy ivarsVMAddr;
3445 PtrTy weakIvarLayoutVMAddr;
3446 PtrTy basePropertiesVMAddr;
3447 };
3448 const class_ro_t* classData = (const class_ro_t*)(dataVMAddr + vmAddrConverter.slide);
3449 switch (field) {
3450 case ObjCClassInfo::ReadOnlyDataField::name:
3451 return convertToVMAddr(classData->nameVMAddr, vmAddrConverter);
3452 case ObjCClassInfo::ReadOnlyDataField::baseMethods:
3453 return convertToVMAddr(classData->baseMethodsVMAddr, vmAddrConverter);
3454 case ObjCClassInfo::ReadOnlyDataField::baseProperties:
3455 return convertToVMAddr(classData->basePropertiesVMAddr, vmAddrConverter);
3456 case ObjCClassInfo::ReadOnlyDataField::flags:
3457 return classData->flags;
3458 }
3459 }
3460 }
3461
3462 const char* MachOAnalyzer::getPrintableString(uint64_t stringVMAddr, MachOAnalyzer::PrintableStringResult& result,
3463 SectionCache* sectionCache,
3464 bool (^sectionHandler)(const SectionInfo& sectionInfo)) const {
3465 if ( sectionCache != nullptr ) {
3466 // Make sure the string is pointing in to one of the supported sections
3467 __block const dyld3::MachOAnalyzer::SectionInfo* nameSectionInfo = nullptr;
3468 for (const dyld3::MachOAnalyzer::SectionInfo& sectionInfo : sectionCache->sectionInfos) {
3469 if ( stringVMAddr < sectionInfo.sectAddr ) {
3470 continue;
3471 }
3472 if ( stringVMAddr >= ( sectionInfo.sectAddr + sectionInfo.sectSize) ) {
3473 continue;
3474 }
3475 nameSectionInfo = &sectionInfo;
3476 break;
3477 }
3478
3479 if ( nameSectionInfo != nullptr ) {
3480 // The section handler may also reject this section
3481 if ( sectionHandler != nullptr ) {
3482 if (!sectionHandler(*nameSectionInfo)) {
3483 result = PrintableStringResult::UnknownSection;
3484 return nullptr;
3485 }
3486 }
3487
3488 result = PrintableStringResult::CanPrint;
3489 return (const char*)(stringVMAddr + getSlide());
3490 }
3491 }
3492
3493 // If the name isn't in the cache then find the section its in
3494
3495 uint32_t fairplayTextOffsetStart;
3496 uint32_t fairplayTextOffsetEnd;
3497 uint32_t fairplaySize;
3498 if ( isFairPlayEncrypted(fairplayTextOffsetStart, fairplaySize) ) {
3499 fairplayTextOffsetEnd = fairplayTextOffsetStart + fairplaySize;
3500 } else {
3501 fairplayTextOffsetEnd = 0;
3502 }
3503
3504 result = PrintableStringResult::UnknownSection;
3505 forEachSection(^(const MachOAnalyzer::SectionInfo &sectInfo, bool malformedSectionRange, bool &stop) {
3506 if ( stringVMAddr < sectInfo.sectAddr ) {
3507 return;
3508 }
3509 if ( stringVMAddr >= ( sectInfo.sectAddr + sectInfo.sectSize) ) {
3510 return;
3511 }
3512
3513 // We can't scan this section if its protected
3514 if ( sectInfo.segInfo.isProtected ) {
3515 result = PrintableStringResult::ProtectedSection;
3516 stop = true;
3517 return;
3518 }
3519
3520 // We can't scan this section if it overlaps with the fairplay range
3521 if ( fairplayTextOffsetEnd < sectInfo.sectFileOffset ) {
3522 // Fairplay range ends before section
3523 } else if ( fairplayTextOffsetStart > (sectInfo.sectFileOffset + sectInfo.sectSize) ) {
3524 // Fairplay range starts after section
3525 } else {
3526 // Must overlap
3527 result = PrintableStringResult::FairPlayEncrypted;
3528 stop = true;
3529 return;
3530 }
3531
3532 // The section handler may also reject this section
3533 if ( sectionHandler != nullptr ) {
3534 if (!sectionHandler(sectInfo)) {
3535 result = PrintableStringResult::UnknownSection;
3536 stop = true;
3537 return;
3538 }
3539 }
3540 // Cache this section for later.
3541 if ( sectionCache != nullptr ) {
3542 sectionCache->sectionInfos.push_back(sectInfo);
3543 }
3544 result = PrintableStringResult::CanPrint;
3545 stop = true;
3546 });
3547
3548 #if BUILDING_SHARED_CACHE_UTIL
3549 // The shared cache coalesces strings in to their own section.
3550 // Assume its a valid pointer
3551 if (result == PrintableStringResult::UnknownSection) {
3552 result = PrintableStringResult::CanPrint;
3553 return (const char*)(stringVMAddr + getSlide());
3554 }
3555 #endif
3556
3557 if (result == PrintableStringResult::CanPrint)
3558 return (const char*)(stringVMAddr + getSlide());
3559 return nullptr;
3560 }
3561
3562 bool MachOAnalyzer::SectionCache::findSectionForVMAddr(uint64_t vmAddr, bool (^sectionHandler)(const SectionInfo& sectionInfo)) {
3563
3564 // Make sure the string is pointing in to one of the supported sections
3565 __block const dyld3::MachOAnalyzer::SectionInfo* foundSectionInfo = nullptr;
3566 for (const dyld3::MachOAnalyzer::SectionInfo& sectionInfo : sectionInfos) {
3567 if ( vmAddr < sectionInfo.sectAddr ) {
3568 continue;
3569 }
3570 if ( vmAddr >= ( sectionInfo.sectAddr + sectionInfo.sectSize) ) {
3571 continue;
3572 }
3573 foundSectionInfo = &sectionInfo;
3574 break;
3575 }
3576
3577 if ( foundSectionInfo != nullptr ) {
3578 // The section handler may also reject this section
3579 if ( sectionHandler != nullptr ) {
3580 if (!sectionHandler(*foundSectionInfo)) {
3581 return nullptr;
3582 }
3583 }
3584
3585 // Found a section, so return true
3586 return true;
3587 }
3588
3589 // If the name isn't in the cache then find the section its in
3590
3591 uint32_t fairplayTextOffsetStart;
3592 uint32_t fairplayTextOffsetEnd;
3593 uint32_t fairplaySize;
3594 if ( ma->isFairPlayEncrypted(fairplayTextOffsetStart, fairplaySize) ) {
3595 fairplayTextOffsetEnd = fairplayTextOffsetStart + fairplaySize;
3596 } else {
3597 fairplayTextOffsetEnd = 0;
3598 }
3599
3600 __block bool foundValidSection = false;
3601 ma->forEachSection(^(const MachOAnalyzer::SectionInfo &sectInfo, bool malformedSectionRange, bool &stop) {
3602 if ( vmAddr < sectInfo.sectAddr ) {
3603 return;
3604 }
3605 if ( vmAddr >= ( sectInfo.sectAddr + sectInfo.sectSize) ) {
3606 return;
3607 }
3608
3609 // We can't scan this section if it overlaps with the fairplay range
3610 if ( fairplayTextOffsetEnd < sectInfo.sectFileOffset ) {
3611 // Fairplay range ends before section
3612 } else if ( fairplayTextOffsetStart > (sectInfo.sectFileOffset + sectInfo.sectSize) ) {
3613 // Fairplay range starts after section
3614 } else {
3615 // Must overlap
3616 stop = true;
3617 return;
3618 }
3619
3620 // The section handler may also reject this section
3621 if ( sectionHandler != nullptr ) {
3622 if (!sectionHandler(sectInfo)) {
3623 stop = true;
3624 return;
3625 }
3626 }
3627 // Cache this section for later.
3628 sectionInfos.push_back(sectInfo);
3629 foundValidSection = true;
3630 stop = true;
3631 });
3632
3633 return foundValidSection;
3634 }
3635
3636 void MachOAnalyzer::forEachObjCClass(Diagnostics& diag, bool contentRebased,
3637 void (^handler)(Diagnostics& diag, uint64_t classVMAddr,
3638 uint64_t classSuperclassVMAddr, uint64_t classDataVMAddr,
3639 const ObjCClassInfo& objcClass, bool isMetaClass)) const {
3640 const uint64_t ptrSize = pointerSize();
3641 intptr_t slide = getSlide();
3642
3643 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3644 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3645 vmAddrConverter.slide = slide;
3646 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3647 vmAddrConverter.contentRebased = contentRebased;
3648
3649 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3650 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3651 return;
3652 if ( strcmp(sectInfo.sectName, "__objc_classlist") != 0 )
3653 return;
3654 const uint8_t* classList = (uint8_t*)(sectInfo.sectAddr + slide);
3655 uint64_t classListSize = sectInfo.sectSize;
3656
3657 if ( (classListSize % ptrSize) != 0 ) {
3658 diag.error("Invalid objc class section size");
3659 return;
3660 }
3661
3662 if ( ptrSize == 8 ) {
3663 typedef uint64_t PtrTy;
3664 struct objc_class_t {
3665 uint64_t isaVMAddr;
3666 uint64_t superclassVMAddr;
3667 uint64_t methodCacheBuckets;
3668 uint64_t methodCacheProperties;
3669 uint64_t dataVMAddrAndFastFlags;
3670 };
3671 // This matches "struct TargetClassMetadata" from Metadata.h in Swift
3672 struct swift_class_metadata_t : objc_class_t {
3673 uint32_t swiftClassFlags;
3674 };
3675 enum : uint64_t {
3676 FAST_DATA_MASK = 0x00007ffffffffff8ULL
3677 };
3678 for (uint64_t i = 0; i != classListSize; i += sizeof(PtrTy)) {
3679 uint64_t classVMAddr = convertToVMAddr(*(PtrTy*)(classList + i), vmAddrConverter);
3680 uint64_t classSuperclassVMAddr = classVMAddr + offsetof(objc_class_t, superclassVMAddr);
3681 uint64_t classDataVMAddr = classVMAddr + offsetof(objc_class_t, dataVMAddrAndFastFlags);
3682
3683 // First call the handler on the class
3684 const objc_class_t* classPtr = (const objc_class_t*)(classVMAddr + slide);
3685 const swift_class_metadata_t* swiftClassPtr = (const swift_class_metadata_t*)classPtr;
3686 ObjCClassInfo objcClass;
3687 objcClass.isaVMAddr = convertToVMAddr(classPtr->isaVMAddr, vmAddrConverter);
3688 objcClass.superclassVMAddr = convertToVMAddr(classPtr->superclassVMAddr, vmAddrConverter);
3689 objcClass.dataVMAddr = convertToVMAddr(classPtr->dataVMAddrAndFastFlags, vmAddrConverter) & FAST_DATA_MASK;
3690 objcClass.vmAddrConverter = vmAddrConverter;
3691 objcClass.isSwiftLegacy = classPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_LEGACY;
3692 objcClass.isSwiftStable = classPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_STABLE;
3693 // The Swift class flags are only present if the class is swift
3694 objcClass.swiftClassFlags = (objcClass.isSwiftLegacy || objcClass.isSwiftStable) ? swiftClassPtr->swiftClassFlags : 0;
3695 handler(diag, classVMAddr, classSuperclassVMAddr, classDataVMAddr, objcClass, false);
3696 if (diag.hasError())
3697 return;
3698
3699 // Then call it on the metaclass
3700 const objc_class_t* metaClassPtr = (const objc_class_t*)(objcClass.isaVMAddr + slide);
3701 const swift_class_metadata_t* swiftMetaClassPtr = (const swift_class_metadata_t*)metaClassPtr;
3702 ObjCClassInfo objcMetaClass;
3703 objcMetaClass.isaVMAddr = convertToVMAddr(metaClassPtr->isaVMAddr, vmAddrConverter);
3704 objcMetaClass.superclassVMAddr = convertToVMAddr(metaClassPtr->superclassVMAddr, vmAddrConverter);
3705 objcMetaClass.dataVMAddr = convertToVMAddr(metaClassPtr->dataVMAddrAndFastFlags, vmAddrConverter) & FAST_DATA_MASK;
3706 objcMetaClass.vmAddrConverter = vmAddrConverter;
3707 objcMetaClass.isSwiftLegacy = metaClassPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_LEGACY;
3708 objcMetaClass.isSwiftStable = metaClassPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_STABLE;
3709 // The Swift class flags are only present if the class is swift
3710 objcMetaClass.swiftClassFlags = (objcMetaClass.isSwiftLegacy || objcMetaClass.isSwiftStable) ? swiftMetaClassPtr->swiftClassFlags : 0;
3711 classSuperclassVMAddr = objcClass.isaVMAddr + offsetof(objc_class_t, superclassVMAddr);
3712 classDataVMAddr = objcClass.isaVMAddr + offsetof(objc_class_t, dataVMAddrAndFastFlags);
3713 handler(diag, objcClass.isaVMAddr, classSuperclassVMAddr, classDataVMAddr, objcMetaClass, true);
3714 if (diag.hasError())
3715 return;
3716 }
3717 } else {
3718 typedef uint32_t PtrTy;
3719 struct objc_class_t {
3720 uint32_t isaVMAddr;
3721 uint32_t superclassVMAddr;
3722 uint32_t methodCacheBuckets;
3723 uint32_t methodCacheProperties;
3724 uint32_t dataVMAddrAndFastFlags;
3725 };
3726 // This matches "struct TargetClassMetadata" from Metadata.h in Swift
3727 struct swift_class_metadata_t : objc_class_t {
3728 uint32_t swiftClassFlags;
3729 };
3730 enum : uint32_t {
3731 FAST_DATA_MASK = 0xfffffffcUL
3732 };
3733 for (uint64_t i = 0; i != classListSize; i += sizeof(PtrTy)) {
3734 uint64_t classVMAddr = convertToVMAddr(*(PtrTy*)(classList + i), vmAddrConverter);
3735 uint64_t classSuperclassVMAddr = classVMAddr + offsetof(objc_class_t, superclassVMAddr);
3736 uint64_t classDataVMAddr = classVMAddr + offsetof(objc_class_t, dataVMAddrAndFastFlags);
3737
3738 // First call the handler on the class
3739 const objc_class_t* classPtr = (const objc_class_t*)(classVMAddr + slide);
3740 const swift_class_metadata_t* swiftClassPtr = (const swift_class_metadata_t*)classPtr;
3741 ObjCClassInfo objcClass;
3742 objcClass.isaVMAddr = convertToVMAddr(classPtr->isaVMAddr, vmAddrConverter);
3743 objcClass.superclassVMAddr = convertToVMAddr(classPtr->superclassVMAddr, vmAddrConverter);
3744 objcClass.dataVMAddr = convertToVMAddr(classPtr->dataVMAddrAndFastFlags, vmAddrConverter) & FAST_DATA_MASK;
3745 objcClass.vmAddrConverter = vmAddrConverter;
3746 objcClass.isSwiftLegacy = classPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_LEGACY;
3747 objcClass.isSwiftStable = classPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_STABLE;
3748 // The Swift class flags are only present if the class is swift
3749 objcClass.swiftClassFlags = (objcClass.isSwiftLegacy || objcClass.isSwiftStable) ? swiftClassPtr->swiftClassFlags : 0;
3750 handler(diag, classVMAddr, classSuperclassVMAddr, classDataVMAddr, objcClass, false);
3751 if (diag.hasError())
3752 return;
3753
3754 // Then call it on the metaclass
3755 const objc_class_t* metaClassPtr = (const objc_class_t*)(objcClass.isaVMAddr + slide);
3756 const swift_class_metadata_t* swiftMetaClassPtr = (const swift_class_metadata_t*)metaClassPtr;
3757 ObjCClassInfo objcMetaClass;
3758 objcMetaClass.isaVMAddr = convertToVMAddr(metaClassPtr->isaVMAddr, vmAddrConverter);
3759 objcMetaClass.superclassVMAddr = convertToVMAddr(metaClassPtr->superclassVMAddr, vmAddrConverter);
3760 objcMetaClass.dataVMAddr = convertToVMAddr(metaClassPtr->dataVMAddrAndFastFlags, vmAddrConverter) & FAST_DATA_MASK;
3761 objcMetaClass.vmAddrConverter = vmAddrConverter;
3762 objcMetaClass.isSwiftLegacy = metaClassPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_LEGACY;
3763 objcMetaClass.isSwiftStable = metaClassPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_STABLE;
3764 // The Swift class flags are only present if the class is swift
3765 objcMetaClass.swiftClassFlags = (objcMetaClass.isSwiftLegacy || objcMetaClass.isSwiftStable) ? swiftMetaClassPtr->swiftClassFlags : 0;
3766 classSuperclassVMAddr = objcClass.isaVMAddr + offsetof(objc_class_t, superclassVMAddr);
3767 classDataVMAddr = objcClass.isaVMAddr + offsetof(objc_class_t, dataVMAddrAndFastFlags);
3768 handler(diag, objcClass.isaVMAddr, classSuperclassVMAddr, classDataVMAddr, objcMetaClass, true);
3769 if (diag.hasError())
3770 return;
3771 }
3772 }
3773 });
3774 }
3775
3776 void MachOAnalyzer::forEachObjCCategory(Diagnostics& diag, bool contentRebased,
3777 void (^handler)(Diagnostics& diag, uint64_t categoryVMAddr,
3778 const dyld3::MachOAnalyzer::ObjCCategory& objcCategory)) const {
3779 const uint64_t ptrSize = pointerSize();
3780 intptr_t slide = getSlide();
3781
3782 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3783 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3784 vmAddrConverter.slide = slide;
3785 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3786 vmAddrConverter.contentRebased = contentRebased;
3787
3788 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3789 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3790 return;
3791 if ( strcmp(sectInfo.sectName, "__objc_catlist") != 0 )
3792 return;
3793 const uint8_t* categoryList = (uint8_t*)(sectInfo.sectAddr + slide);
3794 uint64_t categoryListSize = sectInfo.sectSize;
3795
3796 if ( (categoryListSize % ptrSize) != 0 ) {
3797 diag.error("Invalid objc category section size");
3798 return;
3799 }
3800
3801 if ( ptrSize == 8 ) {
3802 typedef uint64_t PtrTy;
3803 struct objc_category_t {
3804 PtrTy nameVMAddr;
3805 PtrTy clsVMAddr;
3806 PtrTy instanceMethodsVMAddr;
3807 PtrTy classMethodsVMAddr;
3808 PtrTy protocolsVMAddr;
3809 PtrTy instancePropertiesVMAddr;
3810 };
3811 for (uint64_t i = 0; i != categoryListSize; i += sizeof(PtrTy)) {
3812 uint64_t categoryVMAddr = convertToVMAddr(*(PtrTy*)(categoryList + i), vmAddrConverter);
3813
3814 const objc_category_t* categoryPtr = (const objc_category_t*)(categoryVMAddr + slide);
3815 ObjCCategory objCCategory;
3816 objCCategory.nameVMAddr = convertToVMAddr(categoryPtr->nameVMAddr, vmAddrConverter);
3817 objCCategory.clsVMAddr = convertToVMAddr(categoryPtr->clsVMAddr, vmAddrConverter);
3818 objCCategory.instanceMethodsVMAddr = convertToVMAddr(categoryPtr->instanceMethodsVMAddr, vmAddrConverter);
3819 objCCategory.classMethodsVMAddr = convertToVMAddr(categoryPtr->classMethodsVMAddr, vmAddrConverter);
3820 objCCategory.protocolsVMAddr = convertToVMAddr(categoryPtr->protocolsVMAddr, vmAddrConverter);
3821 objCCategory.instancePropertiesVMAddr = convertToVMAddr(categoryPtr->instancePropertiesVMAddr, vmAddrConverter);
3822 handler(diag, categoryVMAddr, objCCategory);
3823 if (diag.hasError())
3824 return;
3825 }
3826 } else {
3827 typedef uint32_t PtrTy;
3828 struct objc_category_t {
3829 PtrTy nameVMAddr;
3830 PtrTy clsVMAddr;
3831 PtrTy instanceMethodsVMAddr;
3832 PtrTy classMethodsVMAddr;
3833 PtrTy protocolsVMAddr;
3834 PtrTy instancePropertiesVMAddr;
3835 };
3836 for (uint64_t i = 0; i != categoryListSize; i += sizeof(PtrTy)) {
3837 uint64_t categoryVMAddr = convertToVMAddr(*(PtrTy*)(categoryList + i), vmAddrConverter);
3838
3839 const objc_category_t* categoryPtr = (const objc_category_t*)(categoryVMAddr + slide);
3840 ObjCCategory objCCategory;
3841 objCCategory.nameVMAddr = convertToVMAddr(categoryPtr->nameVMAddr, vmAddrConverter);
3842 objCCategory.clsVMAddr = convertToVMAddr(categoryPtr->clsVMAddr, vmAddrConverter);
3843 objCCategory.instanceMethodsVMAddr = convertToVMAddr(categoryPtr->instanceMethodsVMAddr, vmAddrConverter);
3844 objCCategory.classMethodsVMAddr = convertToVMAddr(categoryPtr->classMethodsVMAddr, vmAddrConverter);
3845 objCCategory.protocolsVMAddr = convertToVMAddr(categoryPtr->protocolsVMAddr, vmAddrConverter);
3846 objCCategory.instancePropertiesVMAddr = convertToVMAddr(categoryPtr->instancePropertiesVMAddr, vmAddrConverter);
3847 handler(diag, categoryVMAddr, objCCategory);
3848 if (diag.hasError())
3849 return;
3850 }
3851 }
3852 });
3853 }
3854
3855 void MachOAnalyzer::forEachObjCProtocol(Diagnostics& diag, bool contentRebased,
3856 void (^handler)(Diagnostics& diag, uint64_t categoryVMAddr,
3857 const dyld3::MachOAnalyzer::ObjCProtocol& objCProtocol)) const {
3858 const uint64_t ptrSize = pointerSize();
3859 intptr_t slide = getSlide();
3860
3861 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3862 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3863 vmAddrConverter.slide = slide;
3864 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3865 vmAddrConverter.contentRebased = contentRebased;
3866
3867 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3868 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3869 return;
3870 if ( strcmp(sectInfo.sectName, "__objc_protolist") != 0 )
3871 return;
3872 const uint8_t* protocolList = (uint8_t*)(sectInfo.sectAddr + slide);
3873 uint64_t protocolListSize = sectInfo.sectSize;
3874
3875 if ( (protocolListSize % ptrSize) != 0 ) {
3876 diag.error("Invalid objc protocol section size");
3877 return;
3878 }
3879
3880 if ( ptrSize == 8 ) {
3881 typedef uint64_t PtrTy;
3882 struct protocol_t {
3883 PtrTy isaVMAddr;
3884 PtrTy nameVMAddr;
3885 PtrTy protocolsVMAddr;
3886 PtrTy instanceMethodsVMAddr;
3887 PtrTy classMethodsVMAddr;
3888 PtrTy optionalInstanceMethodsVMAddr;
3889 PtrTy optionalClassMethodsVMAddr;
3890 PtrTy instancePropertiesVMAddr;
3891 uint32_t size;
3892 uint32_t flags;
3893 // Fields below this point are not always present on disk.
3894 PtrTy extendedMethodTypesVMAddr;
3895 PtrTy demangledNameVMAddr;
3896 PtrTy classPropertiesVMAddr;
3897 };
3898 for (uint64_t i = 0; i != protocolListSize; i += sizeof(PtrTy)) {
3899 uint64_t protocolVMAddr = convertToVMAddr(*(PtrTy*)(protocolList + i), vmAddrConverter);
3900
3901 const protocol_t* protocolPtr = (const protocol_t*)(protocolVMAddr + slide);
3902 ObjCProtocol objCProtocol;
3903 objCProtocol.isaVMAddr = convertToVMAddr(protocolPtr->isaVMAddr, vmAddrConverter);
3904 objCProtocol.nameVMAddr = convertToVMAddr(protocolPtr->nameVMAddr, vmAddrConverter);
3905 objCProtocol.instanceMethodsVMAddr = convertToVMAddr(protocolPtr->instanceMethodsVMAddr, vmAddrConverter);
3906 objCProtocol.classMethodsVMAddr = convertToVMAddr(protocolPtr->classMethodsVMAddr, vmAddrConverter);
3907 objCProtocol.optionalInstanceMethodsVMAddr = convertToVMAddr(protocolPtr->optionalInstanceMethodsVMAddr, vmAddrConverter);
3908 objCProtocol.optionalClassMethodsVMAddr = convertToVMAddr(protocolPtr->optionalClassMethodsVMAddr, vmAddrConverter);
3909
3910 // Track if this protocol needs a reallocation in objc
3911 objCProtocol.requiresObjCReallocation = protocolPtr->size < sizeof(protocol_t);
3912
3913 handler(diag, protocolVMAddr, objCProtocol);
3914 if (diag.hasError())
3915 return;
3916 }
3917 } else {
3918 typedef uint32_t PtrTy;
3919 struct protocol_t {
3920 PtrTy isaVMAddr;
3921 PtrTy nameVMAddr;
3922 PtrTy protocolsVMAddr;
3923 PtrTy instanceMethodsVMAddr;
3924 PtrTy classMethodsVMAddr;
3925 PtrTy optionalInstanceMethodsVMAddr;
3926 PtrTy optionalClassMethodsVMAddr;
3927 PtrTy instancePropertiesVMAddr;
3928 uint32_t size;
3929 uint32_t flags;
3930 // Fields below this point are not always present on disk.
3931 PtrTy extendedMethodTypesVMAddr;
3932 PtrTy demangledNameVMAddr;
3933 PtrTy classPropertiesVMAddr;
3934 };
3935 for (uint64_t i = 0; i != protocolListSize; i += sizeof(PtrTy)) {
3936 uint64_t protocolVMAddr = convertToVMAddr(*(PtrTy*)(protocolList + i), vmAddrConverter);
3937
3938 const protocol_t* protocolPtr = (const protocol_t*)(protocolVMAddr + slide);
3939 ObjCProtocol objCProtocol;
3940 objCProtocol.isaVMAddr = convertToVMAddr(protocolPtr->isaVMAddr, vmAddrConverter);
3941 objCProtocol.nameVMAddr = convertToVMAddr(protocolPtr->nameVMAddr, vmAddrConverter);
3942 objCProtocol.instanceMethodsVMAddr = convertToVMAddr(protocolPtr->instanceMethodsVMAddr, vmAddrConverter);
3943 objCProtocol.classMethodsVMAddr = convertToVMAddr(protocolPtr->classMethodsVMAddr, vmAddrConverter);
3944 objCProtocol.optionalInstanceMethodsVMAddr = convertToVMAddr(protocolPtr->optionalInstanceMethodsVMAddr, vmAddrConverter);
3945 objCProtocol.optionalClassMethodsVMAddr = convertToVMAddr(protocolPtr->optionalClassMethodsVMAddr, vmAddrConverter);
3946
3947 // Track if this protocol needs a reallocation in objc
3948 objCProtocol.requiresObjCReallocation = protocolPtr->size < sizeof(protocol_t);
3949
3950 handler(diag, protocolVMAddr, objCProtocol);
3951 if (diag.hasError())
3952 return;
3953 }
3954 }
3955 });
3956 }
3957
3958 void MachOAnalyzer::forEachObjCMethod(uint64_t methodListVMAddr, bool contentRebased,
3959 void (^handler)(uint64_t methodVMAddr, const ObjCMethod& method)) const {
3960 if ( methodListVMAddr == 0 )
3961 return;
3962
3963 const uint64_t ptrSize = pointerSize();
3964 intptr_t slide = getSlide();
3965
3966 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3967 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3968 vmAddrConverter.slide = slide;
3969 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3970 vmAddrConverter.contentRebased = contentRebased;
3971
3972 if ( ptrSize == 8 ) {
3973 typedef uint64_t PtrTy;
3974 struct method_list_t {
3975 uint32_t entsize;
3976 uint32_t count;
3977 PtrTy methodArrayBase; // Note this is the start the array method_t[0]
3978
3979 uint32_t getEntsize() const {
3980 return (entsize) & ~(uint32_t)3;
3981 }
3982 };
3983
3984 struct method_t {
3985 PtrTy nameVMAddr; // SEL
3986 PtrTy typesVMAddr; // const char *
3987 PtrTy impVMAddr; // IMP
3988 };
3989
3990 const method_list_t* methodList = (const method_list_t*)(methodListVMAddr + slide);
3991 uint64_t methodListArrayBaseVMAddr = methodListVMAddr + offsetof(method_list_t, methodArrayBase);
3992 for (unsigned i = 0; i != methodList->count; ++i) {
3993 uint64_t methodEntryOffset = i * methodList->getEntsize();
3994 uint64_t methodVMAddr = methodListArrayBaseVMAddr + methodEntryOffset;
3995 const method_t* methodPtr = (const method_t*)(methodVMAddr + slide);
3996 ObjCMethod method;
3997 method.nameVMAddr = convertToVMAddr(methodPtr->nameVMAddr, vmAddrConverter);
3998 method.typesVMAddr = convertToVMAddr(methodPtr->typesVMAddr, vmAddrConverter);
3999 method.impVMAddr = convertToVMAddr(methodPtr->impVMAddr, vmAddrConverter);
4000 method.nameLocationVMAddr = methodVMAddr + offsetof(method_t, nameVMAddr);
4001 handler(methodVMAddr, method);
4002 }
4003 } else {
4004 typedef uint32_t PtrTy;
4005 struct method_list_t {
4006 uint32_t entsize;
4007 uint32_t count;
4008 PtrTy methodArrayBase; // Note this is the start the array method_t[0]
4009
4010 uint32_t getEntsize() const {
4011 return (entsize) & ~(uint32_t)3;
4012 }
4013 };
4014
4015 struct method_t {
4016 PtrTy nameVMAddr; // SEL
4017 PtrTy typesVMAddr; // const char *
4018 PtrTy impVMAddr; // IMP
4019 };
4020
4021 const method_list_t* methodList = (const method_list_t*)(methodListVMAddr + slide);
4022 uint64_t methodListArrayBaseVMAddr = methodListVMAddr + offsetof(method_list_t, methodArrayBase);
4023 for (unsigned i = 0; i != methodList->count; ++i) {
4024 uint64_t methodEntryOffset = i * methodList->getEntsize();
4025 uint64_t methodVMAddr = methodListArrayBaseVMAddr + methodEntryOffset;
4026 const method_t* methodPtr = (const method_t*)(methodVMAddr + slide);
4027 ObjCMethod method;
4028 method.nameVMAddr = convertToVMAddr(methodPtr->nameVMAddr, vmAddrConverter);
4029 method.typesVMAddr = convertToVMAddr(methodPtr->typesVMAddr, vmAddrConverter);
4030 method.impVMAddr = convertToVMAddr(methodPtr->impVMAddr, vmAddrConverter);
4031 method.nameLocationVMAddr = methodVMAddr + offsetof(method_t, nameVMAddr);
4032 handler(methodVMAddr, method);
4033 }
4034 }
4035 }
4036
4037 void MachOAnalyzer::forEachObjCProperty(uint64_t propertyListVMAddr, bool contentRebased,
4038 void (^handler)(uint64_t propertyVMAddr, const ObjCProperty& property)) const {
4039 if ( propertyListVMAddr == 0 )
4040 return;
4041
4042 const uint64_t ptrSize = pointerSize();
4043 intptr_t slide = getSlide();
4044
4045 MachOAnalyzer::VMAddrConverter vmAddrConverter;
4046 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
4047 vmAddrConverter.slide = slide;
4048 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
4049 vmAddrConverter.contentRebased = contentRebased;
4050
4051 if ( ptrSize == 8 ) {
4052 typedef uint64_t PtrTy;
4053 struct property_list_t {
4054 uint32_t entsize;
4055 uint32_t count;
4056 PtrTy propertyArrayBase; // Note this is the start the array property_t[0]
4057
4058 uint32_t getEntsize() const {
4059 return (entsize) & ~(uint32_t)3;
4060 }
4061 };
4062
4063 struct property_t {
4064 PtrTy nameVMAddr; // SEL
4065 PtrTy attributesVMAddr; // const char *
4066 };
4067
4068 const property_list_t* propertyList = (const property_list_t*)(propertyListVMAddr + slide);
4069 uint64_t propertyListArrayBaseVMAddr = propertyListVMAddr + offsetof(property_list_t, propertyArrayBase);
4070 for (unsigned i = 0; i != propertyList->count; ++i) {
4071 uint64_t propertyEntryOffset = i * propertyList->getEntsize();
4072 uint64_t propertyVMAddr = propertyListArrayBaseVMAddr + propertyEntryOffset;
4073 const property_t* propertyPtr = (const property_t*)(propertyVMAddr + slide);
4074 ObjCProperty property;
4075 property.nameVMAddr = convertToVMAddr(propertyPtr->nameVMAddr, vmAddrConverter);
4076 property.attributesVMAddr = convertToVMAddr(propertyPtr->attributesVMAddr, vmAddrConverter);
4077 handler(propertyVMAddr, property);
4078 }
4079 } else {
4080 typedef uint32_t PtrTy;
4081 struct property_list_t {
4082 uint32_t entsize;
4083 uint32_t count;
4084 PtrTy propertyArrayBase; // Note this is the start the array property_t[0]
4085
4086 uint32_t getEntsize() const {
4087 return (entsize) & ~(uint32_t)3;
4088 }
4089 };
4090
4091 struct property_t {
4092 PtrTy nameVMAddr; // SEL
4093 PtrTy attributesVMAddr; // const char *
4094 };
4095
4096 const property_list_t* propertyList = (const property_list_t*)(propertyListVMAddr + slide);
4097 uint64_t propertyListArrayBaseVMAddr = propertyListVMAddr + offsetof(property_list_t, propertyArrayBase);
4098 for (unsigned i = 0; i != propertyList->count; ++i) {
4099 uint64_t propertyEntryOffset = i * propertyList->getEntsize();
4100 uint64_t propertyVMAddr = propertyListArrayBaseVMAddr + propertyEntryOffset;
4101 const property_t* propertyPtr = (const property_t*)(propertyVMAddr + slide);
4102 ObjCProperty property;
4103 property.nameVMAddr = convertToVMAddr(propertyPtr->nameVMAddr, vmAddrConverter);
4104 property.attributesVMAddr = convertToVMAddr(propertyPtr->attributesVMAddr, vmAddrConverter);
4105 handler(propertyVMAddr, property);
4106 }
4107 }
4108 }
4109
4110
4111 void MachOAnalyzer::forEachObjCSelectorReference(Diagnostics& diag, bool contentRebased,
4112 void (^handler)(uint64_t selRefVMAddr, uint64_t selRefTargetVMAddr)) const {
4113 const uint64_t ptrSize = pointerSize();
4114 intptr_t slide = getSlide();
4115
4116 MachOAnalyzer::VMAddrConverter vmAddrConverter;
4117 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
4118 vmAddrConverter.slide = slide;
4119 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
4120 vmAddrConverter.contentRebased = contentRebased;
4121
4122 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
4123 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
4124 return;
4125 if ( strcmp(sectInfo.sectName, "__objc_selrefs") != 0 )
4126 return;
4127 uint64_t selRefSectionVMAddr = sectInfo.sectAddr;
4128 const uint8_t* selRefs = (uint8_t*)(selRefSectionVMAddr + slide);
4129 uint64_t selRefsSize = sectInfo.sectSize;
4130
4131 if ( (selRefsSize % ptrSize) != 0 ) {
4132 diag.error("Invalid sel ref section size");
4133 return;
4134 }
4135
4136 if ( ptrSize == 8 ) {
4137 typedef uint64_t PtrTy;
4138 for (uint64_t i = 0; i != selRefsSize; i += sizeof(PtrTy)) {
4139 uint64_t selRefVMAddr = selRefSectionVMAddr + i;
4140 uint64_t selRefTargetVMAddr = convertToVMAddr(*(PtrTy*)(selRefs + i), vmAddrConverter);
4141 handler(selRefVMAddr, selRefTargetVMAddr);
4142 if (diag.hasError()) {
4143 stop = true;
4144 return;
4145 }
4146 }
4147 } else {
4148 typedef uint32_t PtrTy;
4149 for (uint64_t i = 0; i != selRefsSize; i += sizeof(PtrTy)) {
4150 uint64_t selRefVMAddr = selRefSectionVMAddr + i;
4151 uint64_t selRefTargetVMAddr = convertToVMAddr(*(PtrTy*)(selRefs + i), vmAddrConverter);
4152 handler(selRefVMAddr, selRefTargetVMAddr);
4153 if (diag.hasError()) {
4154 stop = true;
4155 return;
4156 }
4157 }
4158 }
4159 });
4160 }
4161
4162 void MachOAnalyzer::forEachObjCMethodName(void (^handler)(const char* methodName)) const {
4163 intptr_t slide = getSlide();
4164 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
4165 if ( strcmp(sectInfo.segInfo.segName, "__TEXT") != 0 )
4166 return;
4167 if ( strcmp(sectInfo.sectName, "__objc_methname") != 0 )
4168 return;
4169 if ( sectInfo.segInfo.isProtected || ( (sectInfo.sectFlags & SECTION_TYPE) != S_CSTRING_LITERALS ) ) {
4170 stop = true;
4171 return;
4172 }
4173 if ( malformedSectionRange ) {
4174 stop = true;
4175 return;
4176 }
4177
4178 const char* content = (const char*)(sectInfo.sectAddr + slide);
4179 uint64_t sectionSize = sectInfo.sectSize;
4180
4181 const char* s = (const char*)content;
4182 const char* end = s + sectionSize;
4183 while ( s < end ) {
4184 handler(s);
4185 s += strlen(s) + 1;
4186 }
4187 });
4188 }
4189
4190
4191 bool MachOAnalyzer::hasObjCMessageReferences() const {
4192
4193 __block bool foundSection = false;
4194 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
4195 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
4196 return;
4197 if ( strcmp(sectInfo.sectName, "__objc_msgrefs") != 0 )
4198 return;
4199 foundSection = true;
4200 stop = true;
4201 });
4202 return foundSection;
4203 }
4204
4205 const MachOAnalyzer::ObjCImageInfo* MachOAnalyzer::objcImageInfo() const {
4206 int64_t slide = getSlide();
4207
4208 __block bool foundInvalidObjCImageInfo = false;
4209 __block const ObjCImageInfo* imageInfo = nullptr;
4210 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectionInfo, bool malformedSectionRange, bool& stop) {
4211 if ( strncmp(sectionInfo.segInfo.segName, "__DATA", 6) != 0 )
4212 return;
4213 if (strcmp(sectionInfo.sectName, "__objc_imageinfo") != 0)
4214 return;
4215 if ( malformedSectionRange ) {
4216 stop = true;
4217 return;
4218 }
4219 if ( sectionInfo.sectSize != 8 ) {
4220 stop = true;
4221 return;
4222 }
4223 imageInfo = (const ObjCImageInfo*)(sectionInfo.sectAddr + slide);
4224 if ( (imageInfo->flags & ObjCImageInfo::dyldPreoptimized) != 0 ) {
4225 foundInvalidObjCImageInfo = true;
4226 stop = true;
4227 return;
4228 }
4229 stop = true;
4230 });
4231 if ( foundInvalidObjCImageInfo )
4232 return nullptr;
4233 return imageInfo;
4234 }
4235
4236 uint32_t MachOAnalyzer::loadCommandsFreeSpace() const
4237 {
4238 __block uint32_t firstSectionFileOffset = 0;
4239 __block uint32_t firstSegmentFileOffset = 0;
4240 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
4241 firstSectionFileOffset = sectInfo.sectFileOffset;
4242 firstSegmentFileOffset = (uint32_t)sectInfo.segInfo.fileOffset;
4243 stop = true;
4244 });
4245
4246 uint32_t headerSize = (this->magic == MH_MAGIC_64) ? sizeof(mach_header_64) : sizeof(mach_header);
4247 uint32_t existSpaceUsed = this->sizeofcmds + headerSize;
4248 return firstSectionFileOffset - firstSegmentFileOffset - existSpaceUsed;
4249 }
4250
4251 void MachOAnalyzer::forEachWeakDef(Diagnostics& diag,
4252 void (^handler)(const char* symbolName, uintptr_t imageOffset, bool isFromExportTrie)) const {
4253 uint64_t baseAddress = preferredLoadAddress();
4254 forEachGlobalSymbol(diag, ^(const char *symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool &stop) {
4255 if ( (n_desc & N_WEAK_DEF) != 0 ) {
4256 handler(symbolName, n_value - baseAddress, false);
4257 }
4258 });
4259 forEachExportedSymbol(diag, ^(const char *symbolName, uint64_t imageOffset, uint64_t flags, uint64_t other, const char *importName, bool &stop) {
4260 if ( (flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION ) == 0 )
4261 return;
4262 // Skip resolvers and re-exports
4263 if ( (flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) != 0 )
4264 return;
4265 if ( (flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) != 0 )
4266 return;
4267 handler(symbolName, imageOffset, true);
4268 });
4269 }
4270
4271 } // dyld3
4272
4273