]> git.saurik.com Git - apple/dyld.git/blob - dyld3/MachOAnalyzer.cpp
ec3b8d7ab2f3aed6ba87855081a8ce6c20a97a5f
[apple/dyld.git] / dyld3 / MachOAnalyzer.cpp
1 /*
2 * Copyright (c) 2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 #include <sys/types.h>
25 #include <mach/mach.h>
26 #include <assert.h>
27 #include <limits.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <mach-o/reloc.h>
32 #include <mach-o/nlist.h>
33 #include <TargetConditionals.h>
34
35 #include "MachOAnalyzer.h"
36 #include "CodeSigningTypes.h"
37 #include "Array.h"
38
39
40 namespace dyld3 {
41
42
43 const MachOAnalyzer* MachOAnalyzer::validMainExecutable(Diagnostics& diag, const mach_header* mh, const char* path, uint64_t sliceLength,
44 const GradedArchs& archs, Platform platform)
45 {
46 const MachOAnalyzer* result = (const MachOAnalyzer*)mh;
47 if ( !result->validMachOForArchAndPlatform(diag, (size_t)sliceLength, path, archs, platform) )
48 return nullptr;
49 if ( !result->isDynamicExecutable() )
50 return nullptr;
51
52 return result;
53 }
54
55
56 closure::LoadedFileInfo MachOAnalyzer::load(Diagnostics& diag, const closure::FileSystem& fileSystem,
57 const char* path, const GradedArchs& archs, Platform platform, char realerPath[MAXPATHLEN])
58 {
59 // FIXME: This should probably be an assert, but if we happen to have a diagnostic here then something is wrong
60 // above us and we should quickly return instead of doing unnecessary work.
61 if (diag.hasError())
62 return closure::LoadedFileInfo();
63
64 closure::LoadedFileInfo info;
65 if (!fileSystem.loadFile(path, info, realerPath, ^(const char *format, ...) {
66 va_list list;
67 va_start(list, format);
68 diag.error(format, list);
69 va_end(list);
70 })) {
71 return closure::LoadedFileInfo();
72 }
73
74 // If we now have an error, but succeeded, then we must have tried multiple paths, one of which errored, but
75 // then succeeded on a later path. So clear the error.
76 if (diag.hasError())
77 diag.clearError();
78
79 // if fat, remap just slice needed
80 bool fatButMissingSlice;
81 const FatFile* fh = (FatFile*)info.fileContent;
82 uint64_t sliceOffset = info.sliceOffset;
83 uint64_t sliceLen = info.sliceLen;
84 if ( fh->isFatFileWithSlice(diag, info.fileContentLen, archs, sliceOffset, sliceLen, fatButMissingSlice) ) {
85 // unmap anything before slice
86 fileSystem.unloadPartialFile(info, sliceOffset, sliceLen);
87 // Update the info to keep track of the new slice offset.
88 info.sliceOffset = sliceOffset;
89 info.sliceLen = sliceLen;
90 }
91 else if ( diag.hasError() ) {
92 // We must have generated an error in the fat file parsing so use that error
93 fileSystem.unloadFile(info);
94 return closure::LoadedFileInfo();
95 }
96 else if ( fatButMissingSlice ) {
97 diag.error("missing compatible arch in %s", path);
98 fileSystem.unloadFile(info);
99 return closure::LoadedFileInfo();
100 }
101
102 const MachOAnalyzer* mh = (MachOAnalyzer*)info.fileContent;
103
104 // validate is mach-o of requested arch and platform
105 if ( !mh->validMachOForArchAndPlatform(diag, (size_t)info.sliceLen, path, archs, platform) ) {
106 fileSystem.unloadFile(info);
107 return closure::LoadedFileInfo();
108 }
109
110 // if has zero-fill expansion, re-map
111 mh = mh->remapIfZeroFill(diag, fileSystem, info);
112
113 // on error, remove mappings and return nullptr
114 if ( diag.hasError() ) {
115 fileSystem.unloadFile(info);
116 return closure::LoadedFileInfo();
117 }
118
119 // now that LINKEDIT is at expected offset, finish validation
120 mh->validLinkedit(diag, path);
121
122 // on error, remove mappings and return nullptr
123 if ( diag.hasError() ) {
124 fileSystem.unloadFile(info);
125 return closure::LoadedFileInfo();
126 }
127
128 return info;
129 }
130
131 #if DEBUG
132 // only used in debug builds of cache builder to verify segment moves are valid
133 void MachOAnalyzer::validateDyldCacheDylib(Diagnostics& diag, const char* path) const
134 {
135 validLinkedit(diag, path);
136 validSegments(diag, path, 0xffffffff);
137 }
138 #endif
139
140 uint64_t MachOAnalyzer::mappedSize() const
141 {
142 uint64_t vmSpace;
143 bool hasZeroFill;
144 analyzeSegmentsLayout(vmSpace, hasZeroFill);
145 return vmSpace;
146 }
147
148 bool MachOAnalyzer::validMachOForArchAndPlatform(Diagnostics& diag, size_t sliceLength, const char* path, const GradedArchs& archs, Platform platform) const
149 {
150 // must start with mach-o magic value
151 if ( (this->magic != MH_MAGIC) && (this->magic != MH_MAGIC_64) ) {
152 diag.error("could not use '%s' because it is not a mach-o file: 0x%08X 0x%08X", path, this->magic, this->cputype);
153 return false;
154 }
155
156 if ( !archs.grade(this->cputype, this->cpusubtype) ) {
157 diag.error("could not use '%s' because it is not a compatible arch", path);
158 return false;
159 }
160
161 // must be a filetype dyld can load
162 switch ( this->filetype ) {
163 case MH_EXECUTE:
164 case MH_DYLIB:
165 case MH_BUNDLE:
166 case MH_DYLINKER:
167 break;
168 default:
169 diag.error("could not use '%s' because it is not a dylib, bundle, or executable, filetype=0x%08X", path, this->filetype);
170 return false;
171 }
172
173 // validate load commands structure
174 if ( !this->validLoadCommands(diag, path, sliceLength) ) {
175 return false;
176 }
177
178 // filter out static executables
179 if ( (this->filetype == MH_EXECUTE) && !isDynamicExecutable() ) {
180 diag.error("could not use '%s' because it is a static executable", path);
181 return false;
182 }
183
184 // must match requested platform (do this after load commands are validated)
185 if ( !this->supportsPlatform(platform) ) {
186 diag.error("could not use '%s' because it was built for a different platform", path);
187 return false;
188 }
189
190 // validate dylib loads
191 if ( !validEmbeddedPaths(diag, platform, path) )
192 return false;
193
194 // validate segments
195 if ( !validSegments(diag, path, sliceLength) )
196 return false;
197
198 // validate entry
199 if ( this->filetype == MH_EXECUTE ) {
200 if ( !validMain(diag, path) )
201 return false;
202 }
203
204 // further validations done in validLinkedit()
205
206 return true;
207 }
208
209 bool MachOAnalyzer::validLinkedit(Diagnostics& diag, const char* path) const
210 {
211 // validate LINKEDIT layout
212 if ( !validLinkeditLayout(diag, path) )
213 return false;
214
215 if ( hasChainedFixups() ) {
216 if ( !validChainedFixupsInfo(diag, path) )
217 return false;
218 }
219 else {
220 // validate rebasing info
221 if ( !validRebaseInfo(diag, path) )
222 return false;
223
224 // validate binding info
225 if ( !validBindInfo(diag, path) )
226 return false;
227 }
228
229 return true;
230 }
231
232 bool MachOAnalyzer::validLoadCommands(Diagnostics& diag, const char* path, size_t fileLen) const
233 {
234 // check load command don't exceed file length
235 if ( this->sizeofcmds + machHeaderSize() > fileLen ) {
236 diag.error("in '%s' load commands exceed length of file", path);
237 return false;
238 }
239
240 // walk all load commands and sanity check them
241 Diagnostics walkDiag;
242 forEachLoadCommand(walkDiag, ^(const load_command* cmd, bool& stop) {});
243 if ( walkDiag.hasError() ) {
244 #if BUILDING_CACHE_BUILDER
245 diag.error("in '%s' %s", path, walkDiag.errorMessage().c_str());
246 #else
247 diag.error("in '%s' %s", path, walkDiag.errorMessage());
248 #endif
249 return false;
250 }
251
252 // check load commands fit in TEXT segment
253 __block bool foundTEXT = false;
254 forEachSegment(^(const SegmentInfo& info, bool& stop) {
255 if ( strcmp(info.segName, "__TEXT") == 0 ) {
256 foundTEXT = true;
257 if ( this->sizeofcmds + machHeaderSize() > info.fileSize ) {
258 diag.error("in '%s' load commands exceed length of __TEXT segment", path);
259 }
260 if ( info.fileOffset != 0 ) {
261 diag.error("in '%s' __TEXT segment not start of mach-o", path);
262 }
263 stop = true;
264 }
265 });
266 if ( !diag.noError() && !foundTEXT ) {
267 diag.error("in '%s' __TEXT segment not found", path);
268 return false;
269 }
270
271 return true;
272 }
273
274 const MachOAnalyzer* MachOAnalyzer::remapIfZeroFill(Diagnostics& diag, const closure::FileSystem& fileSystem, closure::LoadedFileInfo& info) const
275 {
276 uint64_t vmSpaceRequired;
277 bool hasZeroFill;
278 analyzeSegmentsLayout(vmSpaceRequired, hasZeroFill);
279
280 if ( hasZeroFill ) {
281 vm_address_t newMappedAddr;
282 if ( ::vm_allocate(mach_task_self(), &newMappedAddr, (size_t)vmSpaceRequired, VM_FLAGS_ANYWHERE) != 0 ) {
283 diag.error("vm_allocate failure");
284 return nullptr;
285 }
286 // re-map each segment read-only, with runtime layout
287 uint64_t textSegVmAddr = preferredLoadAddress();
288 forEachSegment(^(const SegmentInfo& segmentInfo, bool& stop) {
289 if ( segmentInfo.fileSize != 0 ) {
290 kern_return_t r = vm_copy(mach_task_self(), (vm_address_t)((long)info.fileContent+segmentInfo.fileOffset), (vm_size_t)segmentInfo.fileSize, (vm_address_t)(newMappedAddr+segmentInfo.vmAddr-textSegVmAddr));
291 if ( r != KERN_SUCCESS ) {
292 diag.error("vm_copy() failure");
293 stop = true;
294 }
295 }
296 });
297 if ( diag.noError() ) {
298 // remove original mapping and return new mapping
299 fileSystem.unloadFile(info);
300
301 // make the new mapping read-only
302 ::vm_protect(mach_task_self(), newMappedAddr, (vm_size_t)vmSpaceRequired, false, VM_PROT_READ);
303
304 // Set vm_deallocate as the unload method.
305 info.unload = [](const closure::LoadedFileInfo& info) {
306 ::vm_deallocate(mach_task_self(), (vm_address_t)info.fileContent, (size_t)info.fileContentLen);
307 };
308
309 // And update the file content to the new location
310 info.fileContent = (const void*)newMappedAddr;
311 info.fileContentLen = vmSpaceRequired;
312 return (const MachOAnalyzer*)info.fileContent;
313 }
314 else {
315 // new mapping failed, return old mapping with an error in diag
316 ::vm_deallocate(mach_task_self(), newMappedAddr, (size_t)vmSpaceRequired);
317 return nullptr;
318 }
319 }
320
321 return this;
322 }
323
324 void MachOAnalyzer::analyzeSegmentsLayout(uint64_t& vmSpace, bool& hasZeroFill) const
325 {
326 __block bool writeExpansion = false;
327 __block uint64_t lowestVmAddr = 0xFFFFFFFFFFFFFFFFULL;
328 __block uint64_t highestVmAddr = 0;
329 __block uint64_t sumVmSizes = 0;
330 forEachSegment(^(const SegmentInfo& segmentInfo, bool& stop) {
331 if ( strcmp(segmentInfo.segName, "__PAGEZERO") == 0 )
332 return;
333 if ( segmentInfo.writable() && (segmentInfo.fileSize != segmentInfo.vmSize) )
334 writeExpansion = true; // zerofill at end of __DATA
335 if ( segmentInfo.vmAddr < lowestVmAddr )
336 lowestVmAddr = segmentInfo.vmAddr;
337 if ( segmentInfo.vmAddr+segmentInfo.vmSize > highestVmAddr )
338 highestVmAddr = segmentInfo.vmAddr+segmentInfo.vmSize;
339 sumVmSizes += segmentInfo.vmSize;
340 });
341 uint64_t totalVmSpace = (highestVmAddr - lowestVmAddr);
342 // LINKEDIT vmSize is not required to be a multiple of page size. Round up if that is the case
343 const uint64_t pageSize = uses16KPages() ? 0x4000 : 0x1000;
344 totalVmSpace = (totalVmSpace + (pageSize - 1)) & ~(pageSize - 1);
345 bool hasHole = (totalVmSpace != sumVmSizes); // segments not contiguous
346
347 vmSpace = totalVmSpace;
348 hasZeroFill = writeExpansion || hasHole;
349 }
350
351 bool MachOAnalyzer::enforceFormat(Malformed kind) const
352 {
353 __block bool result = false;
354 forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) {
355 switch (platform) {
356 case Platform::macOS:
357 switch (kind) {
358 case Malformed::linkeditOrder:
359 case Malformed::linkeditAlignment:
360 case Malformed::dyldInfoAndlocalRelocs:
361 // enforce these checks on new binaries only
362 if (sdk >= 0x000A0E00) // macOS 10.14
363 result = true;
364 break;
365 case Malformed::segmentOrder:
366 case Malformed::linkeditPermissions:
367 case Malformed::textPermissions:
368 case Malformed::executableData:
369 case Malformed::codeSigAlignment:
370 // enforce these checks on new binaries only
371 if (sdk >= 0x000A0F00) // macOS 10.15
372 result = true;
373 break;
374 }
375 break;
376 case Platform::iOS:
377 switch (kind) {
378 case Malformed::linkeditOrder:
379 case Malformed::dyldInfoAndlocalRelocs:
380 case Malformed::textPermissions:
381 case Malformed::executableData:
382 result = true;
383 break;
384 case Malformed::linkeditAlignment:
385 case Malformed::segmentOrder:
386 case Malformed::linkeditPermissions:
387 case Malformed::codeSigAlignment:
388 // enforce these checks on new binaries only
389 if (sdk >= 0x000D0000) // iOS 13
390 result = true;
391 break;
392 }
393 break;
394 default:
395 result = true;
396 break;
397 }
398 });
399 // if binary is so old, there is no platform info, don't enforce malformed errors
400 return result;
401 }
402
403 bool MachOAnalyzer::validEmbeddedPaths(Diagnostics& diag, Platform platform, const char* path) const
404 {
405 __block int index = 1;
406 __block bool allGood = true;
407 __block bool foundInstallName = false;
408 __block int dependentsCount = 0;
409 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
410 const dylib_command* dylibCmd;
411 const rpath_command* rpathCmd;
412 switch ( cmd->cmd ) {
413 case LC_ID_DYLIB:
414 foundInstallName = true;
415 // fall through
416 [[clang::fallthrough]];
417 case LC_LOAD_DYLIB:
418 case LC_LOAD_WEAK_DYLIB:
419 case LC_REEXPORT_DYLIB:
420 case LC_LOAD_UPWARD_DYLIB:
421 dylibCmd = (dylib_command*)cmd;
422 if ( dylibCmd->dylib.name.offset > cmd->cmdsize ) {
423 diag.error("in '%s' load command #%d name offset (%u) outside its size (%u)", path, index, dylibCmd->dylib.name.offset, cmd->cmdsize);
424 stop = true;
425 allGood = false;
426 }
427 else {
428 bool foundEnd = false;
429 const char* start = (char*)dylibCmd + dylibCmd->dylib.name.offset;
430 const char* end = (char*)dylibCmd + cmd->cmdsize;
431 for (const char* s=start; s < end; ++s) {
432 if ( *s == '\0' ) {
433 foundEnd = true;
434 break;
435 }
436 }
437 if ( !foundEnd ) {
438 diag.error("in '%s' load command #%d string extends beyond end of load command", path, index);
439 stop = true;
440 allGood = false;
441 }
442 }
443 if ( cmd->cmd != LC_ID_DYLIB )
444 ++dependentsCount;
445 break;
446 case LC_RPATH:
447 rpathCmd = (rpath_command*)cmd;
448 if ( rpathCmd->path.offset > cmd->cmdsize ) {
449 diag.error("in '%s' load command #%d path offset (%u) outside its size (%u)", path, index, rpathCmd->path.offset, cmd->cmdsize);
450 stop = true;
451 allGood = false;
452 }
453 else {
454 bool foundEnd = false;
455 const char* start = (char*)rpathCmd + rpathCmd->path.offset;
456 const char* end = (char*)rpathCmd + cmd->cmdsize;
457 for (const char* s=start; s < end; ++s) {
458 if ( *s == '\0' ) {
459 foundEnd = true;
460 break;
461 }
462 }
463 if ( !foundEnd ) {
464 diag.error("in '%s' load command #%d string extends beyond end of load command", path, index);
465 stop = true;
466 allGood = false;
467 }
468 }
469 break;
470 }
471 ++index;
472 });
473 if ( !allGood )
474 return false;
475
476 if ( this->filetype == MH_DYLIB ) {
477 if ( !foundInstallName ) {
478 diag.error("in '%s' MH_DYLIB is missing LC_ID_DYLIB", path);
479 return false;
480 }
481 }
482 else {
483 if ( foundInstallName ) {
484 diag.error("in '%s' LC_ID_DYLIB found in non-MH_DYLIB", path);
485 return false;
486 }
487 }
488
489 if ( (dependentsCount == 0) && (this->filetype == MH_EXECUTE) ) {
490 diag.error("in '%s' missing LC_LOAD_DYLIB (must link with at least libSystem.dylib)", path);
491 return false;
492 }
493
494 return true;
495 }
496
497 bool MachOAnalyzer::validSegments(Diagnostics& diag, const char* path, size_t fileLen) const
498 {
499 // check segment load command size
500 __block bool badSegmentLoadCommand = false;
501 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
502 if ( cmd->cmd == LC_SEGMENT_64 ) {
503 const segment_command_64* seg = (segment_command_64*)cmd;
504 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command_64);
505 if ( sectionsSpace < 0 ) {
506 diag.error("in '%s' load command size too small for LC_SEGMENT_64", path);
507 badSegmentLoadCommand = true;
508 stop = true;
509 }
510 else if ( (sectionsSpace % sizeof(section_64)) != 0 ) {
511 diag.error("in '%s' segment load command size 0x%X will not fit whole number of sections", path, cmd->cmdsize);
512 badSegmentLoadCommand = true;
513 stop = true;
514 }
515 else if ( sectionsSpace != (seg->nsects * sizeof(section_64)) ) {
516 diag.error("in '%s' load command size 0x%X does not match nsects %d", path, cmd->cmdsize, seg->nsects);
517 badSegmentLoadCommand = true;
518 stop = true;
519 }
520 else if ( greaterThanAddOrOverflow(seg->fileoff, seg->filesize, fileLen) ) {
521 diag.error("in '%s' segment load command content extends beyond end of file", path);
522 badSegmentLoadCommand = true;
523 stop = true;
524 }
525 else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
526 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
527 diag.error("in '%s' segment filesize exceeds vmsize", path);
528 badSegmentLoadCommand = true;
529 stop = true;
530 }
531 }
532 else if ( cmd->cmd == LC_SEGMENT ) {
533 const segment_command* seg = (segment_command*)cmd;
534 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command);
535 if ( sectionsSpace < 0 ) {
536 diag.error("in '%s' load command size too small for LC_SEGMENT", path);
537 badSegmentLoadCommand = true;
538 stop = true;
539 }
540 else if ( (sectionsSpace % sizeof(section)) != 0 ) {
541 diag.error("in '%s' segment load command size 0x%X will not fit whole number of sections", path, cmd->cmdsize);
542 badSegmentLoadCommand = true;
543 stop = true;
544 }
545 else if ( sectionsSpace != (seg->nsects * sizeof(section)) ) {
546 diag.error("in '%s' load command size 0x%X does not match nsects %d", path, cmd->cmdsize, seg->nsects);
547 badSegmentLoadCommand = true;
548 stop = true;
549 }
550 else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
551 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
552 diag.error("in '%s' segment filesize exceeds vmsize", path);
553 badSegmentLoadCommand = true;
554 stop = true;
555 }
556 }
557 });
558 if ( badSegmentLoadCommand )
559 return false;
560
561 // check mapping permissions of segments
562 __block bool badPermissions = false;
563 __block bool badSize = false;
564 __block bool hasTEXT = false;
565 __block bool hasLINKEDIT = false;
566 forEachSegment(^(const SegmentInfo& info, bool& stop) {
567 if ( strcmp(info.segName, "__TEXT") == 0 ) {
568 if ( (info.protections != (VM_PROT_READ|VM_PROT_EXECUTE)) && enforceFormat(Malformed::textPermissions) ) {
569 diag.error("in '%s' __TEXT segment permissions is not 'r-x'", path);
570 badPermissions = true;
571 stop = true;
572 }
573 hasTEXT = true;
574 }
575 else if ( strcmp(info.segName, "__LINKEDIT") == 0 ) {
576 if ( (info.protections != VM_PROT_READ) && enforceFormat(Malformed::linkeditPermissions) ) {
577 diag.error("in '%s' __LINKEDIT segment permissions is not 'r--'", path);
578 badPermissions = true;
579 stop = true;
580 }
581 hasLINKEDIT = true;
582 }
583 else if ( (info.protections & 0xFFFFFFF8) != 0 ) {
584 diag.error("in '%s' %s segment permissions has invalid bits set", path, info.segName);
585 badPermissions = true;
586 stop = true;
587 }
588 if ( greaterThanAddOrOverflow(info.fileOffset, info.fileSize, fileLen) ) {
589 diag.error("in '%s' %s segment content extends beyond end of file", path, info.segName);
590 badSize = true;
591 stop = true;
592 }
593 if ( is64() ) {
594 if ( info.vmAddr+info.vmSize < info.vmAddr ) {
595 diag.error("in '%s' %s segment vm range wraps", path, info.segName);
596 badSize = true;
597 stop = true;
598 }
599 }
600 else {
601 if ( (uint32_t)(info.vmAddr+info.vmSize) < (uint32_t)(info.vmAddr) ) {
602 diag.error("in '%s' %s segment vm range wraps", path, info.segName);
603 badSize = true;
604 stop = true;
605 }
606 }
607 });
608 if ( badPermissions || badSize )
609 return false;
610 if ( !hasTEXT ) {
611 diag.error("in '%s' missing __TEXT segment", path);
612 return false;
613 }
614 if ( !hasLINKEDIT ) {
615 diag.error("in '%s' missing __LINKEDIT segment", path);
616 return false;
617 }
618
619 // check for overlapping segments
620 __block bool badSegments = false;
621 forEachSegment(^(const SegmentInfo& info1, bool& stop1) {
622 uint64_t seg1vmEnd = info1.vmAddr + info1.vmSize;
623 uint64_t seg1FileEnd = info1.fileOffset + info1.fileSize;
624 forEachSegment(^(const SegmentInfo& info2, bool& stop2) {
625 if ( info1.segIndex == info2.segIndex )
626 return;
627 uint64_t seg2vmEnd = info2.vmAddr + info2.vmSize;
628 uint64_t seg2FileEnd = info2.fileOffset + info2.fileSize;
629 if ( ((info2.vmAddr <= info1.vmAddr) && (seg2vmEnd > info1.vmAddr) && (seg1vmEnd > info1.vmAddr )) || ((info2.vmAddr >= info1.vmAddr ) && (info2.vmAddr < seg1vmEnd) && (seg2vmEnd > info2.vmAddr)) ) {
630 diag.error("in '%s' segment %s vm range overlaps segment %s", path, info1.segName, info2.segName);
631 badSegments = true;
632 stop1 = true;
633 stop2 = true;
634 }
635 if ( ((info2.fileOffset <= info1.fileOffset) && (seg2FileEnd > info1.fileOffset) && (seg1FileEnd > info1.fileOffset)) || ((info2.fileOffset >= info1.fileOffset) && (info2.fileOffset < seg1FileEnd) && (seg2FileEnd > info2.fileOffset )) ) {
636 diag.error("in '%s' segment %s file content overlaps segment %s", path, info1.segName, info2.segName);
637 badSegments = true;
638 stop1 = true;
639 stop2 = true;
640 }
641 if ( (info1.segIndex < info2.segIndex) && !stop1 ) {
642 if ( (info1.vmAddr > info2.vmAddr) || ((info1.fileOffset > info2.fileOffset ) && (info1.fileOffset != 0) && (info2.fileOffset != 0)) ){
643 if ( !inDyldCache() && enforceFormat(Malformed::segmentOrder) ) {
644 // dyld cache __DATA_* segments are moved around
645 diag.error("in '%s' segment load commands out of order with respect to layout for %s and %s", path, info1.segName, info2.segName);
646 badSegments = true;
647 stop1 = true;
648 stop2 = true;
649 }
650 }
651 }
652 });
653 });
654 if ( badSegments )
655 return false;
656
657 // check sections are within segment
658 __block bool badSections = false;
659 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
660 if ( cmd->cmd == LC_SEGMENT_64 ) {
661 const segment_command_64* seg = (segment_command_64*)cmd;
662 const section_64* const sectionsStart = (section_64*)((char*)seg + sizeof(struct segment_command_64));
663 const section_64* const sectionsEnd = &sectionsStart[seg->nsects];
664 for (const section_64* sect=sectionsStart; (sect < sectionsEnd); ++sect) {
665 if ( (int64_t)(sect->size) < 0 ) {
666 diag.error("in '%s' section %s size too large 0x%llX", path, sect->sectname, sect->size);
667 badSections = true;
668 }
669 else if ( sect->addr < seg->vmaddr ) {
670 diag.error("in '%s' section %s start address 0x%llX is before containing segment's address 0x%0llX", path, sect->sectname, sect->addr, seg->vmaddr);
671 badSections = true;
672 }
673 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
674 diag.error("in '%s' section %s end address 0x%llX is beyond containing segment's end address 0x%0llX", path, sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
675 badSections = true;
676 }
677 }
678 }
679 else if ( cmd->cmd == LC_SEGMENT ) {
680 const segment_command* seg = (segment_command*)cmd;
681 const section* const sectionsStart = (section*)((char*)seg + sizeof(struct segment_command));
682 const section* const sectionsEnd = &sectionsStart[seg->nsects];
683 for (const section* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) {
684 if ( (int64_t)(sect->size) < 0 ) {
685 diag.error("in '%s' section %s size too large 0x%X", path, sect->sectname, sect->size);
686 badSections = true;
687 }
688 else if ( sect->addr < seg->vmaddr ) {
689 diag.error("in '%s' section %s start address 0x%X is before containing segment's address 0x%0X", path, sect->sectname, sect->addr, seg->vmaddr);
690 badSections = true;
691 }
692 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
693 diag.error("in '%s' section %s end address 0x%X is beyond containing segment's end address 0x%0X", path, sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
694 badSections = true;
695 }
696 }
697 }
698 });
699
700 return !badSections;
701 }
702
703
704 bool MachOAnalyzer::validMain(Diagnostics& diag, const char* path) const
705 {
706 __block uint64_t textSegStartAddr = 0;
707 __block uint64_t textSegStartSize = 0;
708 forEachSegment(^(const SegmentInfo& info, bool& stop) {
709 if ( strcmp(info.segName, "__TEXT") == 0 ) {
710 textSegStartAddr = info.vmAddr;
711 textSegStartSize = info.vmSize;
712 stop = true;
713 }
714 });
715
716 __block int mainCount = 0;
717 __block int threadCount = 0;
718 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
719 entry_point_command* mainCmd;
720 uint64_t startAddress;
721 switch (cmd->cmd) {
722 case LC_MAIN:
723 ++mainCount;
724 mainCmd = (entry_point_command*)cmd;
725 if ( mainCmd->entryoff >= textSegStartSize ) {
726 startAddress = preferredLoadAddress() + mainCmd->entryoff;
727 __block bool foundSegment = false;
728 forEachSegment(^(const SegmentInfo& info, bool& stopSegment) {
729 // Skip segments which don't contain this address
730 if ( (startAddress < info.vmAddr) || (startAddress >= info.vmAddr+info.vmSize) )
731 return;
732 foundSegment = true;
733 if ( (info.protections & VM_PROT_EXECUTE) == 0 )
734 diag.error("LC_MAIN points to non-executable segment");
735 stopSegment = true;
736 });
737 if (!foundSegment)
738 diag.error("LC_MAIN entryoff is out of range");
739 stop = true;
740 }
741 break;
742 case LC_UNIXTHREAD:
743 ++threadCount;
744 startAddress = entryAddrFromThreadCmd((thread_command*)cmd);
745 if ( startAddress == 0 ) {
746 diag.error("LC_UNIXTHREAD not valid for arch %s", archName());
747 stop = true;
748 }
749 else if ( (startAddress < textSegStartAddr) || (startAddress >= textSegStartAddr+textSegStartSize) ) {
750 diag.error("LC_UNIXTHREAD entry not in __TEXT segment");
751 stop = true;
752 }
753 break;
754 }
755 });
756 if ( diag.hasError() )
757 return false;
758 if ( diag.noError() && (mainCount+threadCount == 1) )
759 return true;
760
761 if ( mainCount + threadCount == 0 )
762 diag.error("missing LC_MAIN or LC_UNIXTHREAD");
763 else
764 diag.error("only one LC_MAIN or LC_UNIXTHREAD is allowed");
765 return false;
766 }
767
768
769 namespace {
770 struct LinkEditContentChunk
771 {
772 const char* name;
773 uint32_t alignment;
774 uint32_t fileOffsetStart;
775 uint32_t size;
776
777 static int compareByFileOffset(const void* l, const void* r) {
778 if ( ((LinkEditContentChunk*)l)->fileOffsetStart < ((LinkEditContentChunk*)r)->fileOffsetStart )
779 return -1;
780 else
781 return 1;
782 }
783 };
784 } // anonymous namespace
785
786
787
788 bool MachOAnalyzer::validLinkeditLayout(Diagnostics& diag, const char* path) const
789 {
790 LinkEditInfo leInfo;
791 getLinkEditPointers(diag, leInfo);
792 if ( diag.hasError() )
793 return false;
794 const uint32_t ptrSize = pointerSize();
795
796 // build vector of all blobs in LINKEDIT
797 LinkEditContentChunk blobs[32];
798 LinkEditContentChunk* bp = blobs;
799 if ( leInfo.dyldInfo != nullptr ) {
800 if ( leInfo.dyldInfo->rebase_size != 0 )
801 *bp++ = {"rebase opcodes", ptrSize, leInfo.dyldInfo->rebase_off, leInfo.dyldInfo->rebase_size};
802 if ( leInfo.dyldInfo->bind_size != 0 )
803 *bp++ = {"bind opcodes", ptrSize, leInfo.dyldInfo->bind_off, leInfo.dyldInfo->bind_size};
804 if ( leInfo.dyldInfo->weak_bind_size != 0 )
805 *bp++ = {"weak bind opcodes", ptrSize, leInfo.dyldInfo->weak_bind_off, leInfo.dyldInfo->weak_bind_size};
806 if ( leInfo.dyldInfo->lazy_bind_size != 0 )
807 *bp++ = {"lazy bind opcodes", ptrSize, leInfo.dyldInfo->lazy_bind_off, leInfo.dyldInfo->lazy_bind_size};
808 if ( leInfo.dyldInfo->export_size!= 0 )
809 *bp++ = {"exports trie", ptrSize, leInfo.dyldInfo->export_off, leInfo.dyldInfo->export_size};
810 }
811 if ( leInfo.exportsTrie != nullptr ) {
812 if ( leInfo.exportsTrie->datasize != 0 )
813 *bp++ = {"exports trie", ptrSize, leInfo.exportsTrie->dataoff, leInfo.exportsTrie->datasize};
814 }
815
816 if ( leInfo.dynSymTab != nullptr ) {
817 if ( leInfo.dynSymTab->nlocrel != 0 )
818 *bp++ = {"local relocations", ptrSize, leInfo.dynSymTab->locreloff, static_cast<uint32_t>(leInfo.dynSymTab->nlocrel*sizeof(relocation_info))};
819 if ( leInfo.dynSymTab->nextrel != 0 )
820 *bp++ = {"external relocations", ptrSize, leInfo.dynSymTab->extreloff, static_cast<uint32_t>(leInfo.dynSymTab->nextrel*sizeof(relocation_info))};
821 if ( leInfo.dynSymTab->nindirectsyms != 0 )
822 *bp++ = {"indirect symbol table", 4, leInfo.dynSymTab->indirectsymoff, leInfo.dynSymTab->nindirectsyms*4};
823 }
824 if ( leInfo.splitSegInfo != nullptr ) {
825 if ( leInfo.splitSegInfo->datasize != 0 )
826 *bp++ = {"shared cache info", ptrSize, leInfo.splitSegInfo->dataoff, leInfo.splitSegInfo->datasize};
827 }
828 if ( leInfo.functionStarts != nullptr ) {
829 if ( leInfo.functionStarts->datasize != 0 )
830 *bp++ = {"function starts", ptrSize, leInfo.functionStarts->dataoff, leInfo.functionStarts->datasize};
831 }
832 if ( leInfo.dataInCode != nullptr ) {
833 if ( leInfo.dataInCode->datasize != 0 )
834 *bp++ = {"data in code", ptrSize, leInfo.dataInCode->dataoff, leInfo.dataInCode->datasize};
835 }
836 if ( leInfo.symTab != nullptr ) {
837 if ( leInfo.symTab->nsyms != 0 )
838 *bp++ = {"symbol table", ptrSize, leInfo.symTab->symoff, static_cast<uint32_t>(leInfo.symTab->nsyms*(ptrSize == 8 ? sizeof(nlist_64) : sizeof(struct nlist)))};
839 if ( leInfo.symTab->strsize != 0 )
840 *bp++ = {"symbol table strings", 1, leInfo.symTab->stroff, leInfo.symTab->strsize};
841 }
842 if ( leInfo.codeSig != nullptr ) {
843 if ( leInfo.codeSig->datasize != 0 )
844 *bp++ = {"code signature", ptrSize, leInfo.codeSig->dataoff, leInfo.codeSig->datasize};
845 }
846
847 // check for bad combinations
848 if ( (leInfo.dyldInfo != nullptr) && (leInfo.dyldInfo->cmd == LC_DYLD_INFO_ONLY) && (leInfo.dynSymTab != nullptr) ) {
849 if ( (leInfo.dynSymTab->nlocrel != 0) && enforceFormat(Malformed::dyldInfoAndlocalRelocs) ) {
850 diag.error("in '%s' malformed mach-o contains LC_DYLD_INFO_ONLY and local relocations", path);
851 return false;
852 }
853 if ( leInfo.dynSymTab->nextrel != 0 ) {
854 diag.error("in '%s' malformed mach-o contains LC_DYLD_INFO_ONLY and external relocations", path);
855 return false;
856 }
857 }
858 if ( (leInfo.dyldInfo == nullptr) && (leInfo.dynSymTab == nullptr) ) {
859 diag.error("in '%s' malformed mach-o misssing LC_DYLD_INFO and LC_DYSYMTAB", path);
860 return false;
861 }
862 const unsigned long blobCount = bp - blobs;
863 if ( blobCount == 0 ) {
864 diag.error("in '%s' malformed mach-o misssing LINKEDIT", path);
865 return false;
866 }
867
868 uint32_t linkeditFileEnd = leInfo.layout.linkeditFileOffset + leInfo.layout.linkeditFileSize;
869
870
871 // sort blobs by file-offset and error on overlaps
872 ::qsort(blobs, blobCount, sizeof(LinkEditContentChunk), &LinkEditContentChunk::compareByFileOffset);
873 uint32_t prevEnd = leInfo.layout.linkeditFileOffset;
874 const char* prevName = "start of LINKEDIT";
875 for (unsigned long i=0; i < blobCount; ++i) {
876 const LinkEditContentChunk& blob = blobs[i];
877 if ( blob.fileOffsetStart < prevEnd ) {
878 diag.error("in '%s' LINKEDIT overlap of %s and %s", path, prevName, blob.name);
879 return false;
880 }
881 if (greaterThanAddOrOverflow(blob.fileOffsetStart, blob.size, linkeditFileEnd)) {
882 diag.error("in '%s' LINKEDIT content '%s' extends beyond end of segment", path, blob.name);
883 return false;
884 }
885 if ( (blob.fileOffsetStart & (blob.alignment-1)) != 0 ) {
886 // <rdar://problem/51115705> relax code sig alignment for pre iOS13
887 Malformed kind = (strcmp(blob.name, "code signature") == 0) ? Malformed::codeSigAlignment : Malformed::linkeditAlignment;
888 if ( enforceFormat(kind) )
889 diag.error("in '%s' mis-aligned LINKEDIT content '%s'", path, blob.name);
890 }
891 prevEnd = blob.fileOffsetStart + blob.size;
892 prevName = blob.name;
893 }
894
895 // Check for invalid symbol table sizes
896 if ( leInfo.symTab != nullptr ) {
897 if ( leInfo.symTab->nsyms > 0x10000000 ) {
898 diag.error("in '%s' malformed mach-o image: symbol table too large", path);
899 return false;
900 }
901 if ( leInfo.dynSymTab != nullptr ) {
902 // validate indirect symbol table
903 if ( leInfo.dynSymTab->nindirectsyms != 0 ) {
904 if ( leInfo.dynSymTab->nindirectsyms > 0x10000000 ) {
905 diag.error("in '%s' malformed mach-o image: indirect symbol table too large", path);
906 return false;
907 }
908 }
909 if ( (leInfo.dynSymTab->nlocalsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->ilocalsym > leInfo.symTab->nsyms) ) {
910 diag.error("in '%s' malformed mach-o image: indirect symbol table local symbol count exceeds total symbols", path);
911 return false;
912 }
913 if ( leInfo.dynSymTab->ilocalsym + leInfo.dynSymTab->nlocalsym < leInfo.dynSymTab->ilocalsym ) {
914 diag.error("in '%s' malformed mach-o image: indirect symbol table local symbol count wraps", path);
915 return false;
916 }
917 if ( (leInfo.dynSymTab->nextdefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iextdefsym > leInfo.symTab->nsyms) ) {
918 diag.error("in '%s' malformed mach-o image: indirect symbol table extern symbol count exceeds total symbols", path);
919 return false;
920 }
921 if ( leInfo.dynSymTab->iextdefsym + leInfo.dynSymTab->nextdefsym < leInfo.dynSymTab->iextdefsym ) {
922 diag.error("in '%s' malformed mach-o image: indirect symbol table extern symbol count wraps", path);
923 return false;
924 }
925 if ( (leInfo.dynSymTab->nundefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iundefsym > leInfo.symTab->nsyms) ) {
926 diag.error("in '%s' malformed mach-o image: indirect symbol table undefined symbol count exceeds total symbols", path);
927 return false;
928 }
929 if ( leInfo.dynSymTab->iundefsym + leInfo.dynSymTab->nundefsym < leInfo.dynSymTab->iundefsym ) {
930 diag.error("in '%s' malformed mach-o image: indirect symbol table undefined symbol count wraps", path);
931 return false;
932 }
933 }
934 }
935
936 return true;
937 }
938
939
940
941 bool MachOAnalyzer::invalidRebaseState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
942 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type) const
943 {
944 if ( !segIndexSet ) {
945 diag.error("in '%s' %s missing preceding REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path, opcodeName);
946 return true;
947 }
948 if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
949 diag.error("in '%s' %s segment index %d too large", path, opcodeName, segmentIndex);
950 return true;
951 }
952 if ( segmentOffset > (segments[segmentIndex].vmSize-ptrSize) ) {
953 diag.error("in '%s' %s current segment offset 0x%08llX beyond segment size (0x%08llX)", path, opcodeName, segmentOffset, segments[segmentIndex].vmSize);
954 return true;
955 }
956 switch ( type ) {
957 case REBASE_TYPE_POINTER:
958 if ( !segments[segmentIndex].writable() ) {
959 diag.error("in '%s' %s pointer rebase is in non-writable segment", path, opcodeName);
960 return true;
961 }
962 if ( segments[segmentIndex].executable() && enforceFormat(Malformed::executableData) ) {
963 diag.error("in '%s' %s pointer rebase is in executable segment", path, opcodeName);
964 return true;
965 }
966 break;
967 case REBASE_TYPE_TEXT_ABSOLUTE32:
968 case REBASE_TYPE_TEXT_PCREL32:
969 if ( !segments[segmentIndex].textRelocs ) {
970 diag.error("in '%s' %s text rebase is in segment that does not support text relocations", path, opcodeName);
971 return true;
972 }
973 if ( segments[segmentIndex].writable() ) {
974 diag.error("in '%s' %s text rebase is in writable segment", path, opcodeName);
975 return true;
976 }
977 if ( !segments[segmentIndex].executable() ) {
978 diag.error("in '%s' %s pointer rebase is in non-executable segment", path, opcodeName);
979 return true;
980 }
981 break;
982 default:
983 diag.error("in '%s' %s unknown rebase type %d", path, opcodeName, type);
984 return true;
985 }
986 return false;
987 }
988
989
990 void MachOAnalyzer::getAllSegmentsInfos(Diagnostics& diag, SegmentInfo segments[]) const
991 {
992 forEachSegment(^(const SegmentInfo& info, bool& stop) {
993 segments[info.segIndex] = info;
994 });
995 }
996
997
998 bool MachOAnalyzer::validRebaseInfo(Diagnostics& diag, const char* path) const
999 {
1000 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1001 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1002 if ( invalidRebaseState(diag, opcodeName, path, leInfo, segments, segIndexSet, ptrSize, segmentIndex, segmentOffset, type) )
1003 stop = true;
1004 });
1005 return diag.noError();
1006 }
1007
1008
1009 void MachOAnalyzer::forEachTextRebase(Diagnostics& diag, void (^handler)(uint64_t runtimeOffset, bool& stop)) const
1010 {
1011 __block bool startVmAddrSet = false;
1012 __block uint64_t startVmAddr = 0;
1013 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1014 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1015 if ( type != REBASE_TYPE_TEXT_ABSOLUTE32 )
1016 return;
1017 if ( !startVmAddrSet ) {
1018 for (int i=0; i <= segmentIndex; ++i) {
1019 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1020 startVmAddr = segments[i].vmAddr;
1021 startVmAddrSet = true;
1022 break;
1023 }
1024 }
1025 }
1026 uint64_t rebaseVmAddr = segments[segmentIndex].vmAddr + segmentOffset;
1027 uint64_t runtimeOffset = rebaseVmAddr - startVmAddr;
1028 handler(runtimeOffset, stop);
1029 });
1030 }
1031
1032
1033 void MachOAnalyzer::forEachRebase(Diagnostics& diag, bool ignoreLazyPointers, void (^handler)(uint64_t runtimeOffset, bool& stop)) const
1034 {
1035 __block bool startVmAddrSet = false;
1036 __block uint64_t startVmAddr = 0;
1037 __block uint64_t lpVmAddr = 0;
1038 __block uint64_t lpEndVmAddr = 0;
1039 __block uint64_t shVmAddr = 0;
1040 __block uint64_t shEndVmAddr = 0;
1041 if ( ignoreLazyPointers ) {
1042 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
1043 if ( (info.sectFlags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS ) {
1044 lpVmAddr = info.sectAddr;
1045 lpEndVmAddr = info.sectAddr + info.sectSize;
1046 }
1047 else if ( (info.sectFlags & S_ATTR_PURE_INSTRUCTIONS) && (strcmp(info.sectName, "__stub_helper") == 0) ) {
1048 shVmAddr = info.sectAddr;
1049 shEndVmAddr = info.sectAddr + info.sectSize;
1050 }
1051 });
1052 }
1053 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1054 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1055 if ( type != REBASE_TYPE_POINTER )
1056 return;
1057 if ( !startVmAddrSet ) {
1058 for (int i=0; i < segmentIndex; ++i) {
1059 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1060 startVmAddr = segments[i].vmAddr;
1061 startVmAddrSet = true;
1062 break;
1063 }
1064 }
1065 }
1066 uint64_t rebaseVmAddr = segments[segmentIndex].vmAddr + segmentOffset;
1067 bool skipRebase = false;
1068 if ( (rebaseVmAddr >= lpVmAddr) && (rebaseVmAddr < lpEndVmAddr) ) {
1069 // rebase is in lazy pointer section
1070 uint64_t lpValue = 0;
1071 if ( ptrSize == 8 )
1072 lpValue = *((uint64_t*)(rebaseVmAddr-startVmAddr+(uint8_t*)this));
1073 else
1074 lpValue = *((uint32_t*)(rebaseVmAddr-startVmAddr+(uint8_t*)this));
1075 if ( (lpValue >= shVmAddr) && (lpValue < shEndVmAddr) ) {
1076 // content is into stub_helper section
1077 uint64_t lpTargetImageOffset = lpValue - startVmAddr;
1078 const uint8_t* helperContent = (uint8_t*)this + lpTargetImageOffset;
1079 bool isLazyStub = contentIsRegularStub(helperContent);
1080 // ignore rebases for normal lazy pointers, but leave rebase for resolver helper stub
1081 if ( isLazyStub )
1082 skipRebase = true;
1083 }
1084 else {
1085 // if lazy pointer does not point into stub_helper, then it points to weak-def symbol and we need rebase
1086 }
1087 }
1088 if ( !skipRebase ) {
1089 uint64_t runtimeOffset = rebaseVmAddr - startVmAddr;
1090 handler(runtimeOffset, stop);
1091 }
1092 });
1093 }
1094
1095
1096 bool MachOAnalyzer::contentIsRegularStub(const uint8_t* helperContent) const
1097 {
1098 switch (this->cputype) {
1099 case CPU_TYPE_X86_64:
1100 return ( (helperContent[0] == 0x68) && (helperContent[5] == 0xE9) ); // push $xxx / JMP pcRel
1101 case CPU_TYPE_I386:
1102 return ( (helperContent[0] == 0x68) && (helperContent[5] == 0xFF) && (helperContent[2] == 0x26) ); // push $xxx / JMP *pcRel
1103 case CPU_TYPE_ARM:
1104 return ( (helperContent[0] == 0x00) && (helperContent[1] == 0xC0) && (helperContent[2] == 0x9F) && (helperContent[3] == 0xE5) ); // ldr ip, [pc, #0]
1105 case CPU_TYPE_ARM64:
1106 return ( (helperContent[0] == 0x50) && (helperContent[1] == 0x00) && (helperContent[2] == 0x00) && (helperContent[3] == 0x18) ); // ldr w16, L0
1107
1108 }
1109 return false;
1110 }
1111
1112 static int uint32Sorter(const void* l, const void* r) {
1113 if ( *((uint32_t*)l) < *((uint32_t*)r) )
1114 return -1;
1115 else
1116 return 1;
1117 }
1118
1119
1120 void MachOAnalyzer::forEachRebase(Diagnostics& diag,
1121 void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1122 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1123 uint8_t type, bool& stop)) const
1124 {
1125 LinkEditInfo leInfo;
1126 getLinkEditPointers(diag, leInfo);
1127 if ( diag.hasError() )
1128 return;
1129
1130 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1131 getAllSegmentsInfos(diag, segmentsInfo);
1132 if ( diag.hasError() )
1133 return;
1134
1135 if ( leInfo.dyldInfo != nullptr ) {
1136 const uint8_t* const start = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->rebase_off);
1137 const uint8_t* const end = start + leInfo.dyldInfo->rebase_size;
1138 const uint8_t* p = start;
1139 const uint32_t ptrSize = pointerSize();
1140 uint8_t type = 0;
1141 int segIndex = 0;
1142 uint64_t segOffset = 0;
1143 uint64_t count;
1144 uint64_t skip;
1145 bool segIndexSet = false;
1146 bool stop = false;
1147 while ( !stop && diag.noError() && (p < end) ) {
1148 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
1149 uint8_t opcode = *p & REBASE_OPCODE_MASK;
1150 ++p;
1151 switch (opcode) {
1152 case REBASE_OPCODE_DONE:
1153 if ( (end - p) > 8 )
1154 diag.error("rebase opcodes terminated early at offset %d of %d", (int)(p-start), (int)(end-start));
1155 stop = true;
1156 break;
1157 case REBASE_OPCODE_SET_TYPE_IMM:
1158 type = immediate;
1159 break;
1160 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1161 segIndex = immediate;
1162 segOffset = read_uleb128(diag, p, end);
1163 segIndexSet = true;
1164 break;
1165 case REBASE_OPCODE_ADD_ADDR_ULEB:
1166 segOffset += read_uleb128(diag, p, end);
1167 break;
1168 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
1169 segOffset += immediate*ptrSize;
1170 break;
1171 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
1172 for (int i=0; i < immediate; ++i) {
1173 handler("REBASE_OPCODE_DO_REBASE_IMM_TIMES", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1174 segOffset += ptrSize;
1175 if ( stop )
1176 break;
1177 }
1178 break;
1179 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
1180 count = read_uleb128(diag, p, end);
1181 for (uint32_t i=0; i < count; ++i) {
1182 handler("REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1183 segOffset += ptrSize;
1184 if ( stop )
1185 break;
1186 }
1187 break;
1188 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
1189 handler("REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1190 segOffset += read_uleb128(diag, p, end) + ptrSize;
1191 break;
1192 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
1193 count = read_uleb128(diag, p, end);
1194 if ( diag.hasError() )
1195 break;
1196 skip = read_uleb128(diag, p, end);
1197 for (uint32_t i=0; i < count; ++i) {
1198 handler("REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1199 segOffset += skip + ptrSize;
1200 if ( stop )
1201 break;
1202 }
1203 break;
1204 default:
1205 diag.error("unknown rebase opcode 0x%02X", opcode);
1206 }
1207 }
1208 }
1209 else {
1210 // old binary, walk relocations
1211 const uint64_t relocsStartAddress = relocBaseAddress(segmentsInfo, leInfo.layout.linkeditSegIndex);
1212 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->locreloff);
1213 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nlocrel];
1214 bool stop = false;
1215 const uint8_t relocSize = (is64() ? 3 : 2);
1216 const uint8_t ptrSize = pointerSize();
1217 STACK_ALLOC_OVERFLOW_SAFE_ARRAY(uint32_t, relocAddrs, 2048);
1218 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
1219 if ( reloc->r_length != relocSize ) {
1220 diag.error("local relocation has wrong r_length");
1221 break;
1222 }
1223 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
1224 diag.error("local relocation has wrong r_type");
1225 break;
1226 }
1227 relocAddrs.push_back(reloc->r_address);
1228 }
1229 if ( !relocAddrs.empty() ) {
1230 ::qsort(&relocAddrs[0], relocAddrs.count(), sizeof(uint32_t), &uint32Sorter);
1231 for (uint32_t addrOff : relocAddrs) {
1232 uint32_t segIndex = 0;
1233 uint64_t segOffset = 0;
1234 if ( segIndexAndOffsetForAddress(relocsStartAddress+addrOff, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1235 uint8_t type = REBASE_TYPE_POINTER;
1236 if ( this->cputype == CPU_TYPE_I386 ) {
1237 if ( segmentsInfo[segIndex].executable() )
1238 type = REBASE_TYPE_TEXT_ABSOLUTE32;
1239 }
1240 handler("local relocation", leInfo, segmentsInfo, true, ptrSize, segIndex, segOffset, type , stop);
1241 }
1242 else {
1243 diag.error("local relocation has out of range r_address");
1244 break;
1245 }
1246 }
1247 }
1248 // then process indirect symbols
1249 forEachIndirectPointer(diag, ^(uint64_t address, bool bind, int bindLibOrdinal,
1250 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
1251 if ( bind )
1252 return;
1253 uint32_t segIndex = 0;
1254 uint64_t segOffset = 0;
1255 if ( segIndexAndOffsetForAddress(address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1256 handler("local relocation", leInfo, segmentsInfo, true, ptrSize, segIndex, segOffset, REBASE_TYPE_POINTER, indStop);
1257 }
1258 else {
1259 diag.error("local relocation has out of range r_address");
1260 indStop = true;
1261 }
1262 });
1263 }
1264 }
1265
1266 bool MachOAnalyzer::segIndexAndOffsetForAddress(uint64_t addr, const SegmentInfo segmentsInfos[], uint32_t segCount, uint32_t& segIndex, uint64_t& segOffset) const
1267 {
1268 for (uint32_t i=0; i < segCount; ++i) {
1269 if ( (segmentsInfos[i].vmAddr <= addr) && (addr < segmentsInfos[i].vmAddr+segmentsInfos[i].vmSize) ) {
1270 segIndex = i;
1271 segOffset = addr - segmentsInfos[i].vmAddr;
1272 return true;
1273 }
1274 }
1275 return false;
1276 }
1277
1278 uint64_t MachOAnalyzer::relocBaseAddress(const SegmentInfo segmentsInfos[], uint32_t segCount) const
1279 {
1280 if ( is64() ) {
1281 // x86_64 reloc base address is first writable segment
1282 for (uint32_t i=0; i < segCount; ++i) {
1283 if ( segmentsInfos[i].writable() )
1284 return segmentsInfos[i].vmAddr;
1285 }
1286 }
1287 return segmentsInfos[0].vmAddr;
1288 }
1289
1290
1291
1292 void MachOAnalyzer::forEachIndirectPointer(Diagnostics& diag, void (^handler)(uint64_t pointerAddress, bool bind, int bindLibOrdinal, const char* bindSymbolName,
1293 bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& stop)) const
1294 {
1295 LinkEditInfo leInfo;
1296 getLinkEditPointers(diag, leInfo);
1297 if ( diag.hasError() )
1298 return;
1299
1300 // find lazy and non-lazy pointer sections
1301 const bool is64Bit = is64();
1302 const uint32_t* const indirectSymbolTable = (uint32_t*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->indirectsymoff);
1303 const uint32_t indirectSymbolTableCount = leInfo.dynSymTab->nindirectsyms;
1304 const uint32_t ptrSize = pointerSize();
1305 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
1306 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
1307 const struct nlist* symbols32 = (struct nlist*)symbolTable;
1308 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1309 uint32_t symCount = leInfo.symTab->nsyms;
1310 uint32_t poolSize = leInfo.symTab->strsize;
1311 __block bool stop = false;
1312 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectInfo, bool malformedSectionRange, bool& sectionStop) {
1313 uint8_t sectionType = (sectInfo.sectFlags & SECTION_TYPE);
1314 bool selfModifyingStub = (sectionType == S_SYMBOL_STUBS) && (sectInfo.sectFlags & S_ATTR_SELF_MODIFYING_CODE) && (sectInfo.reserved2 == 5) && (this->cputype == CPU_TYPE_I386);
1315 if ( (sectionType != S_LAZY_SYMBOL_POINTERS) && (sectionType != S_NON_LAZY_SYMBOL_POINTERS) && !selfModifyingStub )
1316 return;
1317 if ( (flags & S_ATTR_SELF_MODIFYING_CODE) && !selfModifyingStub ) {
1318 diag.error("S_ATTR_SELF_MODIFYING_CODE section type only valid in old i386 binaries");
1319 sectionStop = true;
1320 return;
1321 }
1322 uint32_t elementSize = selfModifyingStub ? sectInfo.reserved2 : ptrSize;
1323 uint32_t elementCount = (uint32_t)(sectInfo.sectSize/elementSize);
1324 if ( greaterThanAddOrOverflow(sectInfo.reserved1, elementCount, indirectSymbolTableCount) ) {
1325 diag.error("section %s overflows indirect symbol table", sectInfo.sectName);
1326 sectionStop = true;
1327 return;
1328 }
1329
1330 for (uint32_t i=0; (i < elementCount) && !stop; ++i) {
1331 uint32_t symNum = indirectSymbolTable[sectInfo.reserved1 + i];
1332 if ( symNum == INDIRECT_SYMBOL_ABS )
1333 continue;
1334 if ( symNum == INDIRECT_SYMBOL_LOCAL ) {
1335 handler(sectInfo.sectAddr+i*elementSize, false, 0, "", false, false, false, stop);
1336 continue;
1337 }
1338 if ( symNum > symCount ) {
1339 diag.error("indirect symbol[%d] = %d which is invalid symbol index", sectInfo.reserved1 + i, symNum);
1340 sectionStop = true;
1341 return;
1342 }
1343 uint16_t n_desc = is64Bit ? symbols64[symNum].n_desc : symbols32[symNum].n_desc;
1344 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
1345 uint32_t strOffset = is64Bit ? symbols64[symNum].n_un.n_strx : symbols32[symNum].n_un.n_strx;
1346 if ( strOffset > poolSize ) {
1347 diag.error("symbol[%d] string offset out of range", sectInfo.reserved1 + i);
1348 sectionStop = true;
1349 return;
1350 }
1351 const char* symbolName = stringPool + strOffset;
1352 bool weakImport = (n_desc & N_WEAK_REF);
1353 bool lazy = (sectionType == S_LAZY_SYMBOL_POINTERS);
1354 handler(sectInfo.sectAddr+i*elementSize, true, libOrdinal, symbolName, weakImport, lazy, selfModifyingStub, stop);
1355 }
1356 sectionStop = stop;
1357 });
1358 }
1359
1360 int MachOAnalyzer::libOrdinalFromDesc(uint16_t n_desc) const
1361 {
1362 // -flat_namespace is always flat lookup
1363 if ( (this->flags & MH_TWOLEVEL) == 0 )
1364 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
1365
1366 // extract byte from undefined symbol entry
1367 int libIndex = GET_LIBRARY_ORDINAL(n_desc);
1368 switch ( libIndex ) {
1369 case SELF_LIBRARY_ORDINAL:
1370 return BIND_SPECIAL_DYLIB_SELF;
1371
1372 case DYNAMIC_LOOKUP_ORDINAL:
1373 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
1374
1375 case EXECUTABLE_ORDINAL:
1376 return BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
1377 }
1378
1379 return libIndex;
1380 }
1381
1382 bool MachOAnalyzer::validBindInfo(Diagnostics& diag, const char* path) const
1383 {
1384 forEachBind(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1385 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1386 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1387 uint8_t type, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
1388 if ( invalidBindState(diag, opcodeName, path, leInfo, segments, segIndexSet, libraryOrdinalSet, dylibCount,
1389 libOrdinal, ptrSize, segmentIndex, segmentOffset, type, symbolName) ) {
1390 stop = true;
1391 }
1392 }, ^(const char* symbolName) {
1393 }, ^() { });
1394 return diag.noError();
1395 }
1396
1397 bool MachOAnalyzer::invalidBindState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1398 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint32_t ptrSize,
1399 uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, const char* symbolName) const
1400 {
1401 if ( !segIndexSet ) {
1402 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path, opcodeName);
1403 return true;
1404 }
1405 if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
1406 diag.error("in '%s' %s segment index %d too large", path, opcodeName, segmentIndex);
1407 return true;
1408 }
1409 if ( segmentOffset > (segments[segmentIndex].vmSize-ptrSize) ) {
1410 diag.error("in '%s' %s current segment offset 0x%08llX beyond segment size (0x%08llX)", path, opcodeName, segmentOffset, segments[segmentIndex].vmSize);
1411 return true;
1412 }
1413 if ( symbolName == NULL ) {
1414 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", path, opcodeName);
1415 return true;
1416 }
1417 if ( !libraryOrdinalSet ) {
1418 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL", path, opcodeName);
1419 return true;
1420 }
1421 if ( libOrdinal > (int)dylibCount ) {
1422 diag.error("in '%s' %s has library ordinal too large (%d) max (%d)", path, opcodeName, libOrdinal, dylibCount);
1423 return true;
1424 }
1425 if ( libOrdinal < BIND_SPECIAL_DYLIB_WEAK_LOOKUP ) {
1426 diag.error("in '%s' %s has unknown library special ordinal (%d)", path, opcodeName, libOrdinal);
1427 return true;
1428 }
1429 switch ( type ) {
1430 case BIND_TYPE_POINTER:
1431 if ( !segments[segmentIndex].writable() ) {
1432 diag.error("in '%s' %s pointer bind is in non-writable segment", path, opcodeName);
1433 return true;
1434 }
1435 if ( segments[segmentIndex].executable() && enforceFormat(Malformed::executableData) ) {
1436 diag.error("in '%s' %s pointer bind is in executable segment", path, opcodeName);
1437 return true;
1438 }
1439 break;
1440 case BIND_TYPE_TEXT_ABSOLUTE32:
1441 case BIND_TYPE_TEXT_PCREL32:
1442 if ( !segments[segmentIndex].textRelocs ) {
1443 diag.error("in '%s' %s text bind is in segment that does not support text relocations", path, opcodeName);
1444 return true;
1445 }
1446 if ( segments[segmentIndex].writable() ) {
1447 diag.error("in '%s' %s text bind is in writable segment", path, opcodeName);
1448 return true;
1449 }
1450 if ( !segments[segmentIndex].executable() ) {
1451 diag.error("in '%s' %s pointer bind is in non-executable segment", path, opcodeName);
1452 return true;
1453 }
1454 break;
1455 default:
1456 diag.error("in '%s' %s unknown bind type %d", path, opcodeName, type);
1457 return true;
1458 }
1459 return false;
1460 }
1461
1462 void MachOAnalyzer::forEachBind(Diagnostics& diag, void (^handler)(uint64_t runtimeOffset, int libOrdinal, const char* symbolName,
1463 bool weakImport, bool lazyBind, uint64_t addend, bool& stop),
1464 void (^strongHandler)(const char* symbolName),
1465 void (^missingLazyBindHandler)()) const
1466 {
1467 __block bool startVmAddrSet = false;
1468 __block uint64_t startVmAddr = 0;
1469 forEachBind(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1470 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1471 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1472 uint8_t type, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
1473 if ( !startVmAddrSet ) {
1474 for (int i=0; i <= segmentIndex; ++i) {
1475 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1476 startVmAddr = segments[i].vmAddr;
1477 startVmAddrSet = true;
1478 break;
1479 }
1480 }
1481 }
1482 uint64_t bindVmOffset = segments[segmentIndex].vmAddr + segmentOffset;
1483 uint64_t runtimeOffset = bindVmOffset - startVmAddr;
1484 handler(runtimeOffset, libOrdinal, symbolName, weakImport, lazyBind, addend, stop);
1485 }, ^(const char* symbolName) {
1486 strongHandler(symbolName);
1487 }, ^() {
1488 missingLazyBindHandler();
1489 });
1490 }
1491
1492 void MachOAnalyzer::forEachBind(Diagnostics& diag,
1493 void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1494 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1495 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type,
1496 const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop),
1497 void (^strongHandler)(const char* symbolName),
1498 void (^missingLazyBindHandler)()) const
1499 {
1500 const uint32_t ptrSize = this->pointerSize();
1501 bool stop = false;
1502
1503 LinkEditInfo leInfo;
1504 getLinkEditPointers(diag, leInfo);
1505 if ( diag.hasError() )
1506 return;
1507
1508 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1509 getAllSegmentsInfos(diag, segmentsInfo);
1510 if ( diag.hasError() )
1511 return;
1512
1513
1514
1515 const uint32_t dylibCount = dependentDylibCount();
1516
1517 if ( leInfo.dyldInfo != nullptr ) {
1518 // process bind opcodes
1519 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
1520 const uint8_t* end = p + leInfo.dyldInfo->bind_size;
1521 uint8_t type = 0;
1522 uint64_t segmentOffset = 0;
1523 uint8_t segmentIndex = 0;
1524 const char* symbolName = NULL;
1525 int libraryOrdinal = 0;
1526 bool segIndexSet = false;
1527 bool libraryOrdinalSet = false;
1528
1529 int64_t addend = 0;
1530 uint64_t count;
1531 uint64_t skip;
1532 bool weakImport = false;
1533 while ( !stop && diag.noError() && (p < end) ) {
1534 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1535 uint8_t opcode = *p & BIND_OPCODE_MASK;
1536 ++p;
1537 switch (opcode) {
1538 case BIND_OPCODE_DONE:
1539 stop = true;
1540 break;
1541 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1542 libraryOrdinal = immediate;
1543 libraryOrdinalSet = true;
1544 break;
1545 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1546 libraryOrdinal = (int)read_uleb128(diag, p, end);
1547 libraryOrdinalSet = true;
1548 break;
1549 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1550 // the special ordinals are negative numbers
1551 if ( immediate == 0 )
1552 libraryOrdinal = 0;
1553 else {
1554 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1555 libraryOrdinal = signExtended;
1556 }
1557 libraryOrdinalSet = true;
1558 break;
1559 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1560 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1561 symbolName = (char*)p;
1562 while (*p != '\0')
1563 ++p;
1564 ++p;
1565 break;
1566 case BIND_OPCODE_SET_TYPE_IMM:
1567 type = immediate;
1568 break;
1569 case BIND_OPCODE_SET_ADDEND_SLEB:
1570 addend = read_sleb128(diag, p, end);
1571 break;
1572 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1573 segmentIndex = immediate;
1574 segmentOffset = read_uleb128(diag, p, end);
1575 segIndexSet = true;
1576 break;
1577 case BIND_OPCODE_ADD_ADDR_ULEB:
1578 segmentOffset += read_uleb128(diag, p, end);
1579 break;
1580 case BIND_OPCODE_DO_BIND:
1581 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1582 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1583 segmentOffset += ptrSize;
1584 break;
1585 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1586 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1587 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1588 segmentOffset += read_uleb128(diag, p, end) + ptrSize;
1589 break;
1590 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1591 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1592 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1593 segmentOffset += immediate*ptrSize + ptrSize;
1594 break;
1595 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1596 count = read_uleb128(diag, p, end);
1597 skip = read_uleb128(diag, p, end);
1598 for (uint32_t i=0; i < count; ++i) {
1599 handler("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1600 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1601 segmentOffset += skip + ptrSize;
1602 if ( stop )
1603 break;
1604 }
1605 break;
1606 default:
1607 diag.error("bad bind opcode 0x%02X", *p);
1608 }
1609 }
1610 if ( diag.hasError() )
1611 return;
1612
1613 // process lazy bind opcodes
1614 uint32_t lazyDoneCount = 0;
1615 uint32_t lazyBindCount = 0;
1616 if ( leInfo.dyldInfo->lazy_bind_size != 0 ) {
1617 p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->lazy_bind_off);
1618 end = p + leInfo.dyldInfo->lazy_bind_size;
1619 type = BIND_TYPE_POINTER;
1620 segmentOffset = 0;
1621 segmentIndex = 0;
1622 symbolName = NULL;
1623 libraryOrdinal = 0;
1624 segIndexSet = false;
1625 libraryOrdinalSet= false;
1626 addend = 0;
1627 weakImport = false;
1628 stop = false;
1629 while ( !stop && diag.noError() && (p < end) ) {
1630 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1631 uint8_t opcode = *p & BIND_OPCODE_MASK;
1632 ++p;
1633 switch (opcode) {
1634 case BIND_OPCODE_DONE:
1635 // this opcode marks the end of each lazy pointer binding
1636 ++lazyDoneCount;
1637 break;
1638 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1639 libraryOrdinal = immediate;
1640 libraryOrdinalSet = true;
1641 break;
1642 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1643 libraryOrdinal = (int)read_uleb128(diag, p, end);
1644 libraryOrdinalSet = true;
1645 break;
1646 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1647 // the special ordinals are negative numbers
1648 if ( immediate == 0 )
1649 libraryOrdinal = 0;
1650 else {
1651 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1652 libraryOrdinal = signExtended;
1653 }
1654 libraryOrdinalSet = true;
1655 break;
1656 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1657 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1658 symbolName = (char*)p;
1659 while (*p != '\0')
1660 ++p;
1661 ++p;
1662 break;
1663 case BIND_OPCODE_SET_ADDEND_SLEB:
1664 addend = read_sleb128(diag, p, end);
1665 break;
1666 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1667 segmentIndex = immediate;
1668 segmentOffset = read_uleb128(diag, p, end);
1669 segIndexSet = true;
1670 break;
1671 case BIND_OPCODE_DO_BIND:
1672 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1673 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, true, addend, stop);
1674 segmentOffset += ptrSize;
1675 ++lazyBindCount;
1676 break;
1677 case BIND_OPCODE_SET_TYPE_IMM:
1678 case BIND_OPCODE_ADD_ADDR_ULEB:
1679 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1680 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1681 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1682 default:
1683 diag.error("bad lazy bind opcode 0x%02X", opcode);
1684 break;
1685 }
1686 }
1687 if ( lazyDoneCount > lazyBindCount+7 )
1688 missingLazyBindHandler();
1689 // diag.error("lazy bind opcodes missing binds");
1690 }
1691 if ( diag.hasError() )
1692 return;
1693
1694 // process weak bind info
1695 if ( leInfo.dyldInfo->weak_bind_size != 0 ) {
1696 p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->weak_bind_off);
1697 end = p + leInfo.dyldInfo->weak_bind_size;
1698 type = BIND_TYPE_POINTER;
1699 segmentOffset = 0;
1700 segmentIndex = 0;
1701 symbolName = NULL;
1702 libraryOrdinal = BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
1703 segIndexSet = false;
1704 libraryOrdinalSet= true;
1705 addend = 0;
1706 weakImport = false;
1707 stop = false;
1708 while ( !stop && diag.noError() && (p < end) ) {
1709 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1710 uint8_t opcode = *p & BIND_OPCODE_MASK;
1711 ++p;
1712 switch (opcode) {
1713 case BIND_OPCODE_DONE:
1714 stop = true;
1715 break;
1716 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1717 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1718 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1719 diag.error("unexpected dylib ordinal in weak_bind");
1720 break;
1721 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1722 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1723 symbolName = (char*)p;
1724 while (*p != '\0')
1725 ++p;
1726 ++p;
1727 if ( immediate & BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION ) {
1728 strongHandler(symbolName);
1729 }
1730 break;
1731 case BIND_OPCODE_SET_TYPE_IMM:
1732 type = immediate;
1733 break;
1734 case BIND_OPCODE_SET_ADDEND_SLEB:
1735 addend = read_sleb128(diag, p, end);
1736 break;
1737 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1738 segmentIndex = immediate;
1739 segmentOffset = read_uleb128(diag, p, end);
1740 segIndexSet = true;
1741 break;
1742 case BIND_OPCODE_ADD_ADDR_ULEB:
1743 segmentOffset += read_uleb128(diag, p, end);
1744 break;
1745 case BIND_OPCODE_DO_BIND:
1746 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1747 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1748 segmentOffset += ptrSize;
1749 break;
1750 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1751 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1752 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1753 segmentOffset += read_uleb128(diag, p, end) + ptrSize;
1754 break;
1755 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1756 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1757 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1758 segmentOffset += immediate*ptrSize + ptrSize;
1759 break;
1760 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1761 count = read_uleb128(diag, p, end);
1762 skip = read_uleb128(diag, p, end);
1763 for (uint32_t i=0; i < count; ++i) {
1764 handler("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1765 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1766 segmentOffset += skip + ptrSize;
1767 if ( stop )
1768 break;
1769 }
1770 break;
1771 default:
1772 diag.error("bad bind opcode 0x%02X", *p);
1773 }
1774 }
1775 }
1776 }
1777 else {
1778 // old binary, process external relocations
1779 const uint64_t relocsStartAddress = relocBaseAddress(segmentsInfo, leInfo.layout.linkeditSegIndex);
1780 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->extreloff);
1781 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nextrel];
1782 bool is64Bit = is64() ;
1783 const uint8_t relocSize = (is64Bit ? 3 : 2);
1784 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
1785 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
1786 const struct nlist* symbols32 = (struct nlist*)symbolTable;
1787 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1788 uint32_t symCount = leInfo.symTab->nsyms;
1789 uint32_t poolSize = leInfo.symTab->strsize;
1790 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
1791 if ( reloc->r_length != relocSize ) {
1792 diag.error("external relocation has wrong r_length");
1793 break;
1794 }
1795 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
1796 diag.error("external relocation has wrong r_type");
1797 break;
1798 }
1799 uint32_t segIndex = 0;
1800 uint64_t segOffset = 0;
1801 if ( segIndexAndOffsetForAddress(relocsStartAddress+reloc->r_address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1802 uint32_t symbolIndex = reloc->r_symbolnum;
1803 if ( symbolIndex > symCount ) {
1804 diag.error("external relocation has out of range r_symbolnum");
1805 break;
1806 }
1807 else {
1808 uint32_t strOffset = is64Bit ? symbols64[symbolIndex].n_un.n_strx : symbols32[symbolIndex].n_un.n_strx;
1809 uint16_t n_desc = is64Bit ? symbols64[symbolIndex].n_desc : symbols32[symbolIndex].n_desc;
1810 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
1811 if ( strOffset >= poolSize ) {
1812 diag.error("external relocation has r_symbolnum=%d which has out of range n_strx", symbolIndex);
1813 break;
1814 }
1815 else {
1816 const char* symbolName = stringPool + strOffset;
1817 bool weakImport = (n_desc & N_WEAK_REF);
1818 const uint8_t* content = (uint8_t*)this + segmentsInfo[segIndex].vmAddr - leInfo.layout.textUnslidVMAddr + segOffset;
1819 uint64_t addend = is64Bit ? *((uint64_t*)content) : *((uint32_t*)content);
1820 handler("external relocation", leInfo, segmentsInfo, true, true, dylibCount, libOrdinal,
1821 ptrSize, segIndex, segOffset, BIND_TYPE_POINTER, symbolName, weakImport, false, addend, stop);
1822 }
1823 }
1824 }
1825 else {
1826 diag.error("local relocation has out of range r_address");
1827 break;
1828 }
1829 }
1830 // then process indirect symbols
1831 forEachIndirectPointer(diag, ^(uint64_t address, bool bind, int bindLibOrdinal,
1832 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
1833 if ( !bind )
1834 return;
1835 uint32_t segIndex = 0;
1836 uint64_t segOffset = 0;
1837 if ( segIndexAndOffsetForAddress(address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1838 handler("indirect symbol", leInfo, segmentsInfo, true, true, dylibCount, bindLibOrdinal,
1839 ptrSize, segIndex, segOffset, BIND_TYPE_POINTER, bindSymbolName, bindWeakImport, bindLazy, 0, indStop);
1840 }
1841 else {
1842 diag.error("indirect symbol has out of range address");
1843 indStop = true;
1844 }
1845 });
1846 }
1847
1848 }
1849
1850
1851 bool MachOAnalyzer::validChainedFixupsInfo(Diagnostics& diag, const char* path) const
1852 {
1853 __block uint32_t maxTargetCount = 0;
1854 __block uint32_t currentTargetCount = 0;
1855 parseOrgArm64eChainedFixups(diag,
1856 ^(uint32_t totalTargets, bool& stop) {
1857 maxTargetCount = totalTargets;
1858 },
1859 ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop) {
1860 if ( symbolName == NULL ) {
1861 diag.error("in '%s' missing BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", path);
1862 }
1863 else if ( !libraryOrdinalSet ) {
1864 diag.error("in '%s' missing BIND_OPCODE_SET_DYLIB_ORDINAL", path);
1865 }
1866 else if ( libOrdinal > (int)dylibCount ) {
1867 diag.error("in '%s' has library ordinal too large (%d) max (%d)", path, libOrdinal, dylibCount);
1868 }
1869 else if ( libOrdinal < BIND_SPECIAL_DYLIB_WEAK_LOOKUP ) {
1870 diag.error("in '%s' has unknown library special ordinal (%d)", path, libOrdinal);
1871 }
1872 else if ( type != BIND_TYPE_POINTER ) {
1873 diag.error("in '%s' unknown bind type %d", path, type);
1874 }
1875 else if ( currentTargetCount > maxTargetCount ) {
1876 diag.error("in '%s' chained target counts exceeds BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB", path);
1877 }
1878 ++currentTargetCount;
1879 if ( diag.hasError() )
1880 stop = true;
1881 },
1882 ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, uint16_t format, bool& stop) {
1883 if ( !segIndexSet ) {
1884 diag.error("in '%s' missing BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path);
1885 }
1886 else if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
1887 diag.error("in '%s' segment index %d too large", path, segmentIndex);
1888 }
1889 else if ( segmentOffset > (segments[segmentIndex].vmSize-8) ) {
1890 diag.error("in '%s' current segment offset 0x%08llX beyond segment size (0x%08llX)", path, segmentOffset, segments[segmentIndex].vmSize);
1891 }
1892 else if ( !segments[segmentIndex].writable() ) {
1893 diag.error("in '%s' pointer bind is in non-writable segment", path);
1894 }
1895 else if ( segments[segmentIndex].executable() ) {
1896 diag.error("in '%s' pointer bind is in executable segment", path);
1897 }
1898 if ( diag.hasError() )
1899 stop = true;
1900 }
1901 );
1902
1903 return diag.noError();
1904 }
1905
1906
1907
1908 void MachOAnalyzer::parseOrgArm64eChainedFixups(Diagnostics& diag, void (^targetCount)(uint32_t totalTargets, bool& stop),
1909 void (^addTarget)(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop),
1910 void (^addChainStart)(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, uint16_t format, bool& stop)) const
1911 {
1912 bool stop = false;
1913
1914 LinkEditInfo leInfo;
1915 getLinkEditPointers(diag, leInfo);
1916 if ( diag.hasError() )
1917 return;
1918
1919 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1920 getAllSegmentsInfos(diag, segmentsInfo);
1921 if ( diag.hasError() )
1922 return;
1923
1924 const uint32_t dylibCount = dependentDylibCount();
1925
1926 if ( leInfo.dyldInfo != nullptr ) {
1927 // process bind opcodes
1928 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
1929 const uint8_t* end = p + leInfo.dyldInfo->bind_size;
1930 uint8_t type = 0;
1931 uint64_t segmentOffset = 0;
1932 uint8_t segmentIndex = 0;
1933 const char* symbolName = NULL;
1934 int libraryOrdinal = 0;
1935 bool segIndexSet = false;
1936 bool libraryOrdinalSet = false;
1937 uint64_t targetTableCount;
1938 uint64_t addend = 0;
1939 bool weakImport = false;
1940 while ( !stop && diag.noError() && (p < end) ) {
1941 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1942 uint8_t opcode = *p & BIND_OPCODE_MASK;
1943 ++p;
1944 switch (opcode) {
1945 case BIND_OPCODE_DONE:
1946 stop = true;
1947 break;
1948 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1949 libraryOrdinal = immediate;
1950 libraryOrdinalSet = true;
1951 break;
1952 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1953 libraryOrdinal = (int)read_uleb128(diag, p, end);
1954 libraryOrdinalSet = true;
1955 break;
1956 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1957 // the special ordinals are negative numbers
1958 if ( immediate == 0 )
1959 libraryOrdinal = 0;
1960 else {
1961 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1962 libraryOrdinal = signExtended;
1963 }
1964 libraryOrdinalSet = true;
1965 break;
1966 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1967 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1968 symbolName = (char*)p;
1969 while (*p != '\0')
1970 ++p;
1971 ++p;
1972 break;
1973 case BIND_OPCODE_SET_TYPE_IMM:
1974 type = immediate;
1975 break;
1976 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1977 segmentIndex = immediate;
1978 segmentOffset = read_uleb128(diag, p, end);
1979 segIndexSet = true;
1980 break;
1981 case BIND_OPCODE_SET_ADDEND_SLEB:
1982 addend = read_sleb128(diag, p, end);
1983 break;
1984 case BIND_OPCODE_DO_BIND:
1985 if ( addTarget )
1986 addTarget(leInfo, segmentsInfo, libraryOrdinalSet, dylibCount, libraryOrdinal, type, symbolName, addend, weakImport, stop);
1987 break;
1988 case BIND_OPCODE_THREADED:
1989 switch (immediate) {
1990 case BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB:
1991 targetTableCount = read_uleb128(diag, p, end);
1992 if ( targetTableCount > 65535 ) {
1993 diag.error("BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB size too large");
1994 stop = true;
1995 }
1996 else {
1997 if ( targetCount )
1998 targetCount((uint32_t)targetTableCount, stop);
1999 }
2000 break;
2001 case BIND_SUBOPCODE_THREADED_APPLY:
2002 if ( addChainStart )
2003 addChainStart(leInfo, segmentsInfo, segmentIndex, segIndexSet, segmentOffset, DYLD_CHAINED_PTR_ARM64E, stop);
2004 break;
2005 default:
2006 diag.error("bad BIND_OPCODE_THREADED sub-opcode 0x%02X", immediate);
2007 }
2008 break;
2009 default:
2010 diag.error("bad bind opcode 0x%02X", immediate);
2011 }
2012 }
2013 if ( diag.hasError() )
2014 return;
2015 }
2016 }
2017
2018 void MachOAnalyzer::forEachChainedFixupTarget(Diagnostics& diag, void (^callback)(int libOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop)) const
2019 {
2020 LinkEditInfo leInfo;
2021 getLinkEditPointers(diag, leInfo);
2022 if ( diag.hasError() )
2023 return;
2024
2025 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
2026 getAllSegmentsInfos(diag, segmentsInfo);
2027 if ( diag.hasError() )
2028 return;
2029
2030 bool stop = false;
2031 if ( leInfo.dyldInfo != nullptr ) {
2032 parseOrgArm64eChainedFixups(diag, nullptr, ^(const LinkEditInfo& leInfo2, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount,
2033 int libOrdinal, uint8_t type, const char* symbolName, uint64_t fixAddend, bool weakImport, bool& stopChain) {
2034 callback(libOrdinal, symbolName, fixAddend, weakImport, stopChain);
2035 }, nullptr);
2036 }
2037 else if ( leInfo.chainedFixups != nullptr ) {
2038 const dyld_chained_fixups_header* header = (dyld_chained_fixups_header*)getLinkEditContent(leInfo.layout, leInfo.chainedFixups->dataoff);
2039 if ( (header->imports_offset > leInfo.chainedFixups->datasize) || (header->symbols_offset > leInfo.chainedFixups->datasize) ) {
2040 diag.error("malformed import table");
2041 return;
2042 }
2043 const dyld_chained_import* imports;
2044 const dyld_chained_import_addend* importsA32;
2045 const dyld_chained_import_addend64* importsA64;
2046 const char* symbolsPool = (char*)header + header->symbols_offset;
2047 uint32_t maxSymbolOffset = leInfo.chainedFixups->datasize - header->symbols_offset;
2048 int libOrdinal;
2049 switch (header->imports_format) {
2050 case DYLD_CHAINED_IMPORT:
2051 imports = (dyld_chained_import*)((uint8_t*)header + header->imports_offset);
2052 for (uint32_t i=0; i < header->imports_count; ++i) {
2053 const char* symbolName = &symbolsPool[imports[i].name_offset];
2054 if ( imports[i].name_offset > maxSymbolOffset ) {
2055 diag.error("malformed import table, string overflow");
2056 return;
2057 }
2058 uint8_t libVal = imports[i].lib_ordinal;
2059 if ( libVal > 0xF0 )
2060 libOrdinal = (int8_t)libVal;
2061 else
2062 libOrdinal = libVal;
2063 callback(libOrdinal, symbolName, 0, imports[i].weak_import, stop);
2064 }
2065 break;
2066 case DYLD_CHAINED_IMPORT_ADDEND:
2067 importsA32 = (dyld_chained_import_addend*)((uint8_t*)header + header->imports_offset);
2068 for (uint32_t i=0; i < header->imports_count; ++i) {
2069 const char* symbolName = &symbolsPool[importsA32[i].name_offset];
2070 if ( importsA32[i].name_offset > maxSymbolOffset ) {
2071 diag.error("malformed import table, string overflow");
2072 return;
2073 }
2074 uint8_t libVal = importsA32[i].lib_ordinal;
2075 if ( libVal > 0xF0 )
2076 libOrdinal = (int8_t)libVal;
2077 else
2078 libOrdinal = libVal;
2079 callback(libOrdinal, symbolName, importsA32[i].addend, importsA32[i].weak_import, stop);
2080 }
2081 break;
2082 case DYLD_CHAINED_IMPORT_ADDEND64:
2083 importsA64 = (dyld_chained_import_addend64*)((uint8_t*)header + header->imports_offset);
2084 for (uint32_t i=0; i < header->imports_count; ++i) {
2085 const char* symbolName = &symbolsPool[importsA64[i].name_offset];
2086 if ( importsA64[i].name_offset > maxSymbolOffset ) {
2087 diag.error("malformed import table, string overflow");
2088 return;
2089 }
2090 uint16_t libVal = importsA64[i].lib_ordinal;
2091 if ( libVal > 0xFFF0 )
2092 libOrdinal = (int16_t)libVal;
2093 else
2094 libOrdinal = libVal;
2095 callback(libOrdinal, symbolName, importsA64[i].addend, importsA64[i].weak_import, stop);
2096 }
2097 break;
2098 default:
2099 diag.error("unknown imports format");
2100 return;
2101 }
2102 }
2103 }
2104
2105 uint32_t MachOAnalyzer::segmentCount() const
2106 {
2107 __block uint32_t count = 0;
2108 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2109 ++count;
2110 });
2111 return count;
2112 }
2113
2114 bool MachOAnalyzer::hasCodeSignature(uint32_t& fileOffset, uint32_t& size) const
2115 {
2116 fileOffset = 0;
2117 size = 0;
2118
2119 Diagnostics diag;
2120 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2121 if ( cmd->cmd == LC_CODE_SIGNATURE ) {
2122 const linkedit_data_command* sigCmd = (linkedit_data_command*)cmd;
2123 fileOffset = sigCmd->dataoff;
2124 size = sigCmd->datasize;
2125 stop = true;
2126 }
2127 });
2128 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2129
2130 // early exist if no LC_CODE_SIGNATURE
2131 if ( fileOffset == 0 )
2132 return false;
2133
2134 // <rdar://problem/13622786> ignore code signatures in macOS binaries built with pre-10.9 tools
2135 if ( (this->cputype == CPU_TYPE_X86_64) || (this->cputype == CPU_TYPE_I386) ) {
2136 __block bool foundPlatform = false;
2137 __block bool badSignature = false;
2138 forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) {
2139 foundPlatform = true;
2140 if ( (platform == Platform::macOS) && (sdk < 0x000A0900) )
2141 badSignature = true;
2142 });
2143 return foundPlatform && !badSignature;
2144 }
2145
2146 return true;
2147 }
2148
2149 bool MachOAnalyzer::hasInitializer(Diagnostics& diag, bool contentRebased, const void* dyldCache) const
2150 {
2151 __block bool result = false;
2152 forEachInitializer(diag, contentRebased, ^(uint32_t offset) {
2153 result = true;
2154 }, dyldCache);
2155 return result;
2156 }
2157
2158 void MachOAnalyzer::forEachInitializerPointerSection(Diagnostics& diag, void (^callback)(uint32_t sectionOffset, uint32_t sectionSize, const uint8_t* content, bool& stop)) const
2159 {
2160 const unsigned ptrSize = pointerSize();
2161 const uint64_t baseAddress = preferredLoadAddress();
2162 const uint64_t slide = (uint64_t)this - baseAddress;
2163 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& sectStop) {
2164 if ( (info.sectFlags & SECTION_TYPE) == S_MOD_INIT_FUNC_POINTERS ) {
2165 if ( (info.sectSize % ptrSize) != 0 ) {
2166 diag.error("initializer section %s/%s has bad size", info.segInfo.segName, info.sectName);
2167 sectStop = true;
2168 return;
2169 }
2170 if ( malformedSectionRange ) {
2171 diag.error("initializer section %s/%s extends beyond its segment", info.segInfo.segName, info.sectName);
2172 sectStop = true;
2173 return;
2174 }
2175 const uint8_t* content = (uint8_t*)(info.sectAddr + slide);
2176 if ( ((long)content % ptrSize) != 0 ) {
2177 diag.error("initializer section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2178 sectStop = true;
2179 return;
2180 }
2181 callback((uint32_t)(info.sectAddr - baseAddress), (uint32_t)info.sectSize, content, sectStop);
2182 }
2183 });
2184 }
2185
2186 struct VIS_HIDDEN SegmentRanges
2187 {
2188 struct SegmentRange {
2189 uint64_t vmAddrStart;
2190 uint64_t vmAddrEnd;
2191 uint32_t fileSize;
2192 };
2193
2194 bool contains(uint64_t vmAddr) const {
2195 for (const SegmentRange& range : segments) {
2196 if ( (range.vmAddrStart <= vmAddr) && (vmAddr < range.vmAddrEnd) )
2197 return true;
2198 }
2199 return false;
2200 }
2201
2202 private:
2203 SegmentRange localAlloc[1];
2204
2205 public:
2206 dyld3::OverflowSafeArray<SegmentRange> segments { localAlloc, sizeof(localAlloc) / sizeof(localAlloc[0]) };
2207 };
2208
2209 void MachOAnalyzer::forEachInitializer(Diagnostics& diag, bool contentRebased, void (^callback)(uint32_t offset), const void* dyldCache) const
2210 {
2211 __block SegmentRanges executableSegments;
2212 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2213 if ( (info.protections & VM_PROT_EXECUTE) != 0 ) {
2214 executableSegments.segments.push_back({ info.vmAddr, info.vmAddr + info.vmSize, (uint32_t)info.fileSize });
2215 }
2216 });
2217
2218 if (executableSegments.segments.empty()) {
2219 diag.error("no exeutable segments");
2220 return;
2221 }
2222
2223 uint64_t loadAddress = preferredLoadAddress();
2224 intptr_t slide = getSlide();
2225
2226 // if dylib linked with -init linker option, that initializer is first
2227 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2228 if ( cmd->cmd == LC_ROUTINES ) {
2229 const routines_command* routines = (routines_command*)cmd;
2230 uint64_t dashInit = routines->init_address;
2231 if ( executableSegments.contains(dashInit) )
2232 callback((uint32_t)(dashInit - loadAddress));
2233 else
2234 diag.error("-init does not point within __TEXT segment");
2235 }
2236 else if ( cmd->cmd == LC_ROUTINES_64 ) {
2237 const routines_command_64* routines = (routines_command_64*)cmd;
2238 uint64_t dashInit = routines->init_address;
2239 if ( executableSegments.contains(dashInit) )
2240 callback((uint32_t)(dashInit - loadAddress));
2241 else
2242 diag.error("-init does not point within __TEXT segment");
2243 }
2244 });
2245
2246 // next any function pointers in mod-init section
2247 const unsigned ptrSize = pointerSize();
2248 const bool useChainedFixups = hasChainedFixups();
2249 const uint16_t pointerFormat = useChainedFixups ? this->chainedPointerFormat() : 0;
2250 forEachInitializerPointerSection(diag, ^(uint32_t sectionOffset, uint32_t sectionSize, const uint8_t* content, bool& stop) {
2251 if ( ptrSize == 8 ) {
2252 const uint64_t* initsStart = (uint64_t*)content;
2253 const uint64_t* initsEnd = (uint64_t*)((uint8_t*)content + sectionSize);
2254 for (const uint64_t* p=initsStart; p < initsEnd; ++p) {
2255 uint64_t anInit = *p;
2256 if ( contentRebased ) {
2257 // The function pointer may have been signed. Strip the signature if that is the case
2258 #if __has_feature(ptrauth_calls)
2259 anInit = (uint64_t)__builtin_ptrauth_strip((void*)anInit, ptrauth_key_asia);
2260 #endif
2261 anInit -= slide;
2262 }
2263 else if ( useChainedFixups ) {
2264 uint64_t initFuncRuntimeOffset;
2265 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2266 if ( aChainedInit->isRebase(pointerFormat, loadAddress, initFuncRuntimeOffset) ) {
2267 anInit = loadAddress+initFuncRuntimeOffset;
2268 }
2269 else {
2270 diag.error("initializer is not rebased");
2271 stop = true;
2272 break;
2273 }
2274 }
2275 if ( !executableSegments.contains(anInit) ) {
2276 diag.error("initializer 0x%0llX does not point within executable segment", anInit);
2277 stop = true;
2278 break;
2279 }
2280 callback((uint32_t)(anInit - loadAddress));
2281 }
2282 }
2283 else {
2284 const uint32_t* initsStart = (uint32_t*)content;
2285 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + sectionSize);
2286 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2287 uint32_t anInit = *p;
2288 if ( contentRebased ) {
2289 anInit -= slide;
2290 }
2291 else if ( useChainedFixups ) {
2292 uint64_t initFuncRuntimeOffset;
2293 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2294 if ( aChainedInit->isRebase(pointerFormat, loadAddress, initFuncRuntimeOffset) ) {
2295 anInit = (uint32_t)(loadAddress+initFuncRuntimeOffset);
2296 }
2297 else {
2298 diag.error("initializer is not rebased");
2299 stop = true;
2300 break;
2301 }
2302 }
2303 if ( !executableSegments.contains(anInit) ) {
2304 diag.error("initializer 0x%0X does not point within executable segment", anInit);
2305 stop = true;
2306 break;
2307 }
2308 callback(anInit - (uint32_t)loadAddress);
2309 }
2310 }
2311 });
2312
2313 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2314 if ( (info.sectFlags & SECTION_TYPE) != S_INIT_FUNC_OFFSETS )
2315 return;
2316 const uint8_t* content = (uint8_t*)(info.sectAddr + slide);
2317 if ( info.segInfo.writable() ) {
2318 diag.error("initializer offsets section %s/%s must be in read-only segment", info.segInfo.segName, info.sectName);
2319 stop = true;
2320 return;
2321 }
2322 if ( (info.sectSize % 4) != 0 ) {
2323 diag.error("initializer offsets section %s/%s has bad size", info.segInfo.segName, info.sectName);
2324 stop = true;
2325 return;
2326 }
2327 if ( malformedSectionRange ) {
2328 diag.error("initializer offsets section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2329 stop = true;
2330 return;
2331 }
2332 if ( (info.sectAddr % 4) != 0 ) {
2333 diag.error("initializer offsets section %s/%s is not 4-byte aligned", info.segInfo.segName, info.sectName);
2334 stop = true;
2335 return;
2336 }
2337 const uint32_t* initsStart = (uint32_t*)content;
2338 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + info.sectSize);
2339 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2340 uint32_t anInitOffset = *p;
2341 if ( anInitOffset > executableSegments.segments[0].fileSize ) {
2342 diag.error("initializer 0x%0X is not an offset within __TEXT segment", anInitOffset);
2343 stop = true;
2344 break;
2345 }
2346 callback(anInitOffset);
2347 }
2348 });
2349 }
2350
2351 bool MachOAnalyzer::hasTerminators(Diagnostics& diag, bool contentRebased) const
2352 {
2353 __block bool result = false;
2354 forEachTerminator(diag, contentRebased, ^(uint32_t offset) {
2355 result = true;
2356 });
2357 return result;
2358 }
2359
2360 void MachOAnalyzer::forEachTerminator(Diagnostics& diag, bool contentRebased, void (^callback)(uint32_t offset)) const
2361 {
2362 __block SegmentRanges executableSegments;
2363 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2364 if ( (info.protections & VM_PROT_EXECUTE) != 0 ) {
2365 executableSegments.segments.push_back({ info.vmAddr, info.vmAddr + info.vmSize, (uint32_t)info.fileSize });
2366 }
2367 });
2368
2369 if (executableSegments.segments.empty()) {
2370 diag.error("no exeutable segments");
2371 return;
2372 }
2373
2374 uint64_t loadAddress = preferredLoadAddress();
2375 intptr_t slide = getSlide();
2376
2377 // next any function pointers in mod-term section
2378 const unsigned ptrSize = pointerSize();
2379 const bool useChainedFixups = hasChainedFixups();
2380 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2381 if ( (info.sectFlags & SECTION_TYPE) == S_MOD_TERM_FUNC_POINTERS ) {
2382 uint64_t initFuncRuntimeOffset;
2383 const uint16_t pointerFormat = useChainedFixups ? this->chainedPointerFormat() : 0;
2384 const uint8_t* content;
2385 content = (uint8_t*)(info.sectAddr + slide);
2386 if ( (info.sectSize % ptrSize) != 0 ) {
2387 diag.error("terminator section %s/%s has bad size", info.segInfo.segName, info.sectName);
2388 stop = true;
2389 return;
2390 }
2391 if ( malformedSectionRange ) {
2392 diag.error("terminator section %s/%s extends beyond its segment", info.segInfo.segName, info.sectName);
2393 stop = true;
2394 return;
2395 }
2396 if ( ((long)content % ptrSize) != 0 ) {
2397 diag.error("terminator section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2398 stop = true;
2399 return;
2400 }
2401 if ( ptrSize == 8 ) {
2402 const uint64_t* initsStart = (uint64_t*)content;
2403 const uint64_t* initsEnd = (uint64_t*)((uint8_t*)content + info.sectSize);
2404 for (const uint64_t* p=initsStart; p < initsEnd; ++p) {
2405 uint64_t anInit = *p;
2406 if ( contentRebased ) {
2407 // The function pointer may have been signed. Strip the signature if that is the case
2408 #if __has_feature(ptrauth_calls)
2409 anInit = (uint64_t)__builtin_ptrauth_strip((void*)anInit, ptrauth_key_asia);
2410 #endif
2411 anInit -= slide;
2412 }
2413 else if ( useChainedFixups ) {
2414 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2415 if ( aChainedInit->isRebase(pointerFormat, loadAddress, initFuncRuntimeOffset) ) {
2416 anInit = loadAddress+initFuncRuntimeOffset;
2417 }
2418 else {
2419 diag.error("terminator is not rebased");
2420 stop = true;
2421 break;
2422 }
2423 }
2424 if ( !executableSegments.contains(anInit) ) {
2425 diag.error("terminator 0x%0llX does not point within executable segment", anInit);
2426 stop = true;
2427 break;
2428 }
2429 callback((uint32_t)(anInit - loadAddress));
2430 }
2431 }
2432 else {
2433 const uint32_t* initsStart = (uint32_t*)content;
2434 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + info.sectSize);
2435 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2436 uint32_t anInit = *p;
2437 if ( contentRebased ) {
2438 anInit -= slide;
2439 }
2440 else if ( useChainedFixups ) {
2441 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2442 if ( aChainedInit->isRebase(pointerFormat, loadAddress, initFuncRuntimeOffset) ) {
2443 anInit = (uint32_t)(loadAddress+initFuncRuntimeOffset);
2444 }
2445 else {
2446 diag.error("terminator is not rebased");
2447 stop = true;
2448 break;
2449 }
2450 }
2451 if ( !executableSegments.contains(anInit) ) {
2452 diag.error("terminator 0x%0X does not point within executable segment", anInit);
2453 stop = true;
2454 break;
2455 }
2456 callback(anInit - (uint32_t)loadAddress);
2457 }
2458 }
2459 }
2460 });
2461 }
2462
2463
2464
2465 void MachOAnalyzer::forEachRPath(void (^callback)(const char* rPath, bool& stop)) const
2466 {
2467 Diagnostics diag;
2468 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2469 if ( cmd->cmd == LC_RPATH ) {
2470 const char* rpath = (char*)cmd + ((struct rpath_command*)cmd)->path.offset;
2471 callback(rpath, stop);
2472 }
2473 });
2474 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2475 }
2476
2477
2478 bool MachOAnalyzer::hasObjC() const
2479 {
2480 __block bool result = false;
2481 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2482 if ( (strcmp(info.sectName, "__objc_imageinfo") == 0) && (strncmp(info.segInfo.segName, "__DATA", 6) == 0) ) {
2483 result = true;
2484 stop = true;
2485 }
2486 if ( (this->cputype == CPU_TYPE_I386) && (strcmp(info.sectName, "__image_info") == 0) && (strcmp(info.segInfo.segName, "__OBJC") == 0) ) {
2487 result = true;
2488 stop = true;
2489 }
2490 });
2491 return result;
2492 }
2493
2494 bool MachOAnalyzer::hasPlusLoadMethod(Diagnostics& diag) const
2495 {
2496 __block bool result = false;
2497 if ( (this->cputype == CPU_TYPE_I386) && supportsPlatform(Platform::macOS) ) {
2498 // old objc runtime has no special section for +load methods, scan for string
2499 int64_t slide = getSlide();
2500 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2501 if ( ( (info.sectFlags & SECTION_TYPE) == S_CSTRING_LITERALS ) ) {
2502 if ( malformedSectionRange ) {
2503 diag.error("cstring section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2504 stop = true;
2505 return;
2506 }
2507 const uint8_t* content = (uint8_t*)(info.sectAddr + slide);
2508 const char* s = (char*)content;
2509 const char* end = s + info.sectSize;
2510 while ( s < end ) {
2511 if ( strcmp(s, "load") == 0 ) {
2512 result = true;
2513 stop = true;
2514 return;
2515 }
2516 while (*s != '\0' )
2517 ++s;
2518 ++s;
2519 }
2520 }
2521 });
2522 }
2523 else {
2524 // in new objc runtime compiler puts classes/categories with +load method in specical section
2525 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2526 if ( strncmp(info.segInfo.segName, "__DATA", 6) != 0 )
2527 return;
2528 if ( (strcmp(info.sectName, "__objc_nlclslist") == 0) || (strcmp(info.sectName, "__objc_nlcatlist") == 0)) {
2529 result = true;
2530 stop = true;
2531 }
2532 });
2533 }
2534 return result;
2535 }
2536
2537 const void* MachOAnalyzer::getRebaseOpcodes(uint32_t& size) const
2538 {
2539 Diagnostics diag;
2540 LinkEditInfo leInfo;
2541 getLinkEditPointers(diag, leInfo);
2542 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2543 return nullptr;
2544
2545 size = leInfo.dyldInfo->rebase_size;
2546 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->rebase_off);
2547 }
2548
2549 const void* MachOAnalyzer::getBindOpcodes(uint32_t& size) const
2550 {
2551 Diagnostics diag;
2552 LinkEditInfo leInfo;
2553 getLinkEditPointers(diag, leInfo);
2554 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2555 return nullptr;
2556
2557 size = leInfo.dyldInfo->bind_size;
2558 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
2559 }
2560
2561 const void* MachOAnalyzer::getLazyBindOpcodes(uint32_t& size) const
2562 {
2563 Diagnostics diag;
2564 LinkEditInfo leInfo;
2565 getLinkEditPointers(diag, leInfo);
2566 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2567 return nullptr;
2568
2569 size = leInfo.dyldInfo->lazy_bind_size;
2570 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->lazy_bind_off);
2571 }
2572
2573 const void* MachOAnalyzer::getSplitSeg(uint32_t& size) const
2574 {
2575 Diagnostics diag;
2576 LinkEditInfo leInfo;
2577 getLinkEditPointers(diag, leInfo);
2578 if ( diag.hasError() || (leInfo.splitSegInfo == nullptr) )
2579 return nullptr;
2580
2581 size = leInfo.splitSegInfo->datasize;
2582 return getLinkEditContent(leInfo.layout, leInfo.splitSegInfo->dataoff);
2583 }
2584
2585
2586 uint64_t MachOAnalyzer::segAndOffsetToRuntimeOffset(uint8_t targetSegIndex, uint64_t targetSegOffset) const
2587 {
2588 __block uint64_t textVmAddr = 0;
2589 __block uint64_t result = 0;
2590 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2591 if ( strcmp(info.segName, "__TEXT") == 0 )
2592 textVmAddr = info.vmAddr;
2593 if ( info.segIndex == targetSegIndex ) {
2594 result = (info.vmAddr - textVmAddr) + targetSegOffset;
2595 }
2596 });
2597 return result;
2598 }
2599
2600 bool MachOAnalyzer::hasLazyPointers(uint32_t& runtimeOffset, uint32_t& size) const
2601 {
2602 size = 0;
2603 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2604 if ( (info.sectFlags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS ) {
2605 runtimeOffset = (uint32_t)(info.sectAddr - preferredLoadAddress());
2606 size = (uint32_t)info.sectSize;
2607 stop = true;
2608 }
2609 });
2610 return (size != 0);
2611 }
2612
2613 uint64_t MachOAnalyzer::preferredLoadAddress() const
2614 {
2615 __block uint64_t textVmAddr = 0;
2616 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2617 if ( strcmp(info.segName, "__TEXT") == 0 ) {
2618 textVmAddr = info.vmAddr;
2619 stop = true;
2620 }
2621 });
2622 return textVmAddr;
2623 }
2624
2625
2626 bool MachOAnalyzer::getEntry(uint32_t& offset, bool& usesCRT) const
2627 {
2628 Diagnostics diag;
2629 offset = 0;
2630 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2631 if ( cmd->cmd == LC_MAIN ) {
2632 entry_point_command* mainCmd = (entry_point_command*)cmd;
2633 usesCRT = false;
2634 offset = (uint32_t)mainCmd->entryoff;
2635 stop = true;
2636 }
2637 else if ( cmd->cmd == LC_UNIXTHREAD ) {
2638 stop = true;
2639 usesCRT = true;
2640 uint64_t startAddress = entryAddrFromThreadCmd((thread_command*)cmd);
2641 offset = (uint32_t)(startAddress - preferredLoadAddress());
2642 }
2643 });
2644 return (offset != 0);
2645 }
2646
2647 uint64_t MachOAnalyzer::entryAddrFromThreadCmd(const thread_command* cmd) const
2648 {
2649 assert(cmd->cmd == LC_UNIXTHREAD);
2650 const uint32_t* regs32 = (uint32_t*)(((char*)cmd) + 16);
2651 const uint64_t* regs64 = (uint64_t*)(((char*)cmd) + 16);
2652 uint64_t startAddress = 0;
2653 switch ( this->cputype ) {
2654 case CPU_TYPE_I386:
2655 startAddress = regs32[10]; // i386_thread_state_t.eip
2656 break;
2657 case CPU_TYPE_X86_64:
2658 startAddress = regs64[16]; // x86_thread_state64_t.rip
2659 break;
2660 case CPU_TYPE_ARM:
2661 startAddress = regs32[15]; // arm_thread_state_t.pc
2662 break;
2663 case CPU_TYPE_ARM64:
2664 startAddress = regs64[32]; // arm_thread_state64_t.__pc
2665 break;
2666 }
2667 return startAddress;
2668 }
2669
2670
2671 void MachOAnalyzer::forEachInterposingSection(Diagnostics& diag, void (^handler)(uint64_t vmOffset, uint64_t vmSize, bool& stop)) const
2672 {
2673 const unsigned ptrSize = pointerSize();
2674 const unsigned entrySize = 2 * ptrSize;
2675 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2676 if ( ((info.sectFlags & SECTION_TYPE) == S_INTERPOSING) || ((strcmp(info.sectName, "__interpose") == 0) && (strcmp(info.segInfo.segName, "__DATA") == 0)) ) {
2677 if ( info.sectSize % entrySize != 0 ) {
2678 diag.error("interposing section %s/%s has bad size", info.segInfo.segName, info.sectName);
2679 stop = true;
2680 return;
2681 }
2682 if ( malformedSectionRange ) {
2683 diag.error("interposing section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2684 stop = true;
2685 return;
2686 }
2687 if ( (info.sectAddr % ptrSize) != 0 ) {
2688 diag.error("interposing section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2689 stop = true;
2690 return;
2691 }
2692 handler(info.sectAddr - preferredLoadAddress(), info.sectSize, stop);
2693 }
2694 });
2695 }
2696
2697 void MachOAnalyzer::forEachDOFSection(Diagnostics& diag, void (^callback)(uint32_t offset)) const
2698 {
2699 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2700 if ( ( (info.sectFlags & SECTION_TYPE) == S_DTRACE_DOF ) && !malformedSectionRange ) {
2701 callback((uint32_t)(info.sectAddr - info.segInfo.vmAddr));
2702 }
2703 });
2704 }
2705
2706 void MachOAnalyzer::forEachCDHash(void (^handler)(const uint8_t cdHash[20])) const
2707 {
2708 Diagnostics diag;
2709 LinkEditInfo leInfo;
2710 getLinkEditPointers(diag, leInfo);
2711 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2712 return;
2713
2714 forEachCDHashOfCodeSignature(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff),
2715 leInfo.codeSig->datasize,
2716 handler);
2717 }
2718
2719 bool MachOAnalyzer::isRestricted() const
2720 {
2721 __block bool result = false;
2722 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2723 if ( (strcmp(info.segInfo.segName, "__RESTRICT") == 0) && (strcmp(info.sectName, "__restrict") == 0) ) {
2724 result = true;
2725 stop = true;
2726 }
2727 });
2728 return result;
2729 }
2730
2731 bool MachOAnalyzer::usesLibraryValidation() const
2732 {
2733 Diagnostics diag;
2734 LinkEditInfo leInfo;
2735 getLinkEditPointers(diag, leInfo);
2736 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2737 return false;
2738
2739 // check for CS_REQUIRE_LV in CS_CodeDirectory.flags
2740 __block bool requiresLV = false;
2741 forEachCodeDirectoryBlob(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff),
2742 leInfo.codeSig->datasize,
2743 ^(const void *cdBuffer) {
2744 const CS_CodeDirectory* cd = (const CS_CodeDirectory*)cdBuffer;
2745 requiresLV |= (htonl(cd->flags) & CS_REQUIRE_LV);
2746 });
2747
2748 return requiresLV;
2749 }
2750
2751 bool MachOAnalyzer::canHavePrecomputedDlopenClosure(const char* path, void (^failureReason)(const char*)) const
2752 {
2753 __block bool retval = true;
2754
2755 // only dylibs can go in cache
2756 if ( (this->filetype != MH_DYLIB) && (this->filetype != MH_BUNDLE) ) {
2757 retval = false;
2758 failureReason("not MH_DYLIB or MH_BUNDLE");
2759 }
2760
2761 // flat namespace files cannot go in cache
2762 if ( (this->flags & MH_TWOLEVEL) == 0 ) {
2763 retval = false;
2764 failureReason("not built with two level namespaces");
2765 }
2766
2767 // can only depend on other dylibs with absolute paths
2768 __block bool allDepPathsAreGood = true;
2769 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
2770 if ( loadPath[0] != '/' ) {
2771 allDepPathsAreGood = false;
2772 stop = true;
2773 }
2774 });
2775 if ( !allDepPathsAreGood ) {
2776 retval = false;
2777 failureReason("depends on dylibs that are not absolute paths");
2778 }
2779
2780 // dylibs with interposing info cannot have dlopen closure pre-computed
2781 __block bool hasInterposing = false;
2782 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool &stop) {
2783 if ( ((info.sectFlags & SECTION_TYPE) == S_INTERPOSING) || ((strcmp(info.sectName, "__interpose") == 0) && (strcmp(info.segInfo.segName, "__DATA") == 0)) )
2784 hasInterposing = true;
2785 });
2786 if ( hasInterposing ) {
2787 retval = false;
2788 failureReason("has interposing tuples");
2789 }
2790
2791 // images that use dynamic_lookup, bundle_loader, or have weak-defs cannot have dlopen closure pre-computed
2792 Diagnostics diag;
2793 auto checkBind = ^(int libOrdinal, bool& stop) {
2794 switch (libOrdinal) {
2795 case BIND_SPECIAL_DYLIB_WEAK_LOOKUP:
2796 failureReason("has weak externals");
2797 retval = false;
2798 stop = true;
2799 break;
2800 case BIND_SPECIAL_DYLIB_FLAT_LOOKUP:
2801 failureReason("has dynamic_lookup binds");
2802 retval = false;
2803 stop = true;
2804 break;
2805 case BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE:
2806 failureReason("has reference to main executable (bundle loader)");
2807 retval = false;
2808 stop = true;
2809 break;
2810 }
2811 };
2812
2813 if (hasChainedFixups()) {
2814 forEachChainedFixupTarget(diag, ^(int libOrdinal, const char *symbolName, uint64_t addend, bool weakImport, bool &stop) {
2815 checkBind(libOrdinal, stop);
2816 });
2817 } else {
2818 forEachBind(diag, ^(uint64_t runtimeOffset, int libOrdinal, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
2819 checkBind(libOrdinal, stop);
2820 },
2821 ^(const char* symbolName) {
2822 },
2823 ^() {
2824 });
2825 }
2826
2827 // special system dylib overrides cannot have closure pre-computed
2828 if ( strncmp(path, "/usr/lib/system/introspection/", 30) == 0 ) {
2829 retval = false;
2830 failureReason("override of OS dylib");
2831 }
2832
2833 // Don't precompute iOSMac for now until dyld3 support is there.
2834 if ( supportsPlatform(Platform::iOSMac) && !supportsPlatform(Platform::macOS) ) {
2835 retval = false;
2836 failureReason("UIKitForMac binary");
2837 }
2838
2839 return retval;
2840 }
2841
2842
2843 bool MachOAnalyzer::hasUnalignedPointerFixups() const
2844 {
2845 // only look at 64-bit architectures
2846 if ( pointerSize() == 4 )
2847 return false;
2848
2849 __block Diagnostics diag;
2850 __block bool result = false;
2851 if ( hasChainedFixups() ) {
2852 withChainStarts(diag, chainStartsOffset(), ^(const dyld_chained_starts_in_image* startsInfo) {
2853 forEachFixupInAllChains(diag, startsInfo, false, ^(MachOLoaded::ChainedFixupPointerOnDisk* fixupLoc, const dyld_chained_starts_in_segment* segInfo, bool& fixupsStop) {
2854 if ( ((long)(fixupLoc) & 7) != 0 ) {
2855 result = true;
2856 fixupsStop = true;
2857 }
2858 });
2859 });
2860 }
2861 else {
2862 forEachBind(diag, ^(uint64_t runtimeOffset, int libOrdinal, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
2863 if ( (runtimeOffset & 7) != 0 ) {
2864 result = true;
2865 stop = true;
2866 }
2867 },
2868 ^(const char* symbolName) {
2869 },
2870 ^() {
2871 });
2872 forEachRebase(diag, true, ^(uint64_t runtimeOffset, bool& stop) {
2873 if ( (runtimeOffset & 7) != 0 ) {
2874 result = true;
2875 stop = true;
2876 }
2877 });
2878 }
2879
2880 return result;
2881 }
2882
2883 void MachOAnalyzer::recurseTrie(Diagnostics& diag, const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
2884 OverflowSafeArray<char>& cummulativeString, int curStrOffset, bool& stop, ExportsCallback callback) const
2885 {
2886 if ( p >= end ) {
2887 diag.error("malformed trie, node past end");
2888 return;
2889 }
2890 const uint64_t terminalSize = read_uleb128(diag, p, end);
2891 const uint8_t* children = p + terminalSize;
2892 if ( terminalSize != 0 ) {
2893 uint64_t imageOffset = 0;
2894 uint64_t flags = read_uleb128(diag, p, end);
2895 uint64_t other = 0;
2896 const char* importName = nullptr;
2897 if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
2898 other = read_uleb128(diag, p, end); // dylib ordinal
2899 importName = (char*)p;
2900 }
2901 else {
2902 imageOffset = read_uleb128(diag, p, end);
2903 if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
2904 other = read_uleb128(diag, p, end);
2905 else
2906 other = 0;
2907 }
2908 if ( diag.hasError() )
2909 return;
2910 callback(cummulativeString.begin(), imageOffset, flags, other, importName, stop);
2911 if ( stop )
2912 return;
2913 }
2914 if ( children > end ) {
2915 diag.error("malformed trie, terminalSize extends beyond trie data");
2916 return;
2917 }
2918 const uint8_t childrenCount = *children++;
2919 const uint8_t* s = children;
2920 for (uint8_t i=0; i < childrenCount; ++i) {
2921 int edgeStrLen = 0;
2922 while (*s != '\0') {
2923 cummulativeString.resize(curStrOffset+edgeStrLen + 1);
2924 cummulativeString[curStrOffset+edgeStrLen] = *s++;
2925 ++edgeStrLen;
2926 if ( s > end ) {
2927 diag.error("malformed trie node, child node extends past end of trie\n");
2928 return;
2929 }
2930 }
2931 cummulativeString.resize(curStrOffset+edgeStrLen + 1);
2932 cummulativeString[curStrOffset+edgeStrLen] = *s++;
2933 uint64_t childNodeOffset = read_uleb128(diag, s, end);
2934 if (childNodeOffset == 0) {
2935 diag.error("malformed trie, childNodeOffset==0");
2936 return;
2937 }
2938 recurseTrie(diag, start, start+childNodeOffset, end, cummulativeString, curStrOffset+edgeStrLen, stop, callback);
2939 if ( diag.hasError() || stop )
2940 return;
2941 }
2942 }
2943
2944 void MachOAnalyzer::forEachExportedSymbol(Diagnostics& diag, ExportsCallback callback) const
2945 {
2946 LinkEditInfo leInfo;
2947 getLinkEditPointers(diag, leInfo);
2948 if ( diag.hasError() )
2949 return;
2950 uint64_t trieSize;
2951 if ( const uint8_t* trieStart = getExportsTrie(leInfo, trieSize) ) {
2952 const uint8_t* trieEnd = trieStart + trieSize;
2953 bool stop = false;
2954 STACK_ALLOC_OVERFLOW_SAFE_ARRAY(char, cummulativeString, 4096);
2955 recurseTrie(diag, trieStart, trieStart, trieEnd, cummulativeString, 0, stop, callback);
2956 }
2957 }
2958
2959 bool MachOAnalyzer::canBePlacedInDyldCache(const char* path, void (^failureReason)(const char*)) const
2960 {
2961 if (!MachOFile::canBePlacedInDyldCache(path, failureReason))
2962 return false;
2963 if ( !(isArch("x86_64") || isArch("x86_64h")) )
2964 return true;
2965
2966 __block bool rebasesOk = true;
2967 Diagnostics diag;
2968 uint64_t startVMAddr = preferredLoadAddress();
2969 uint64_t endVMAddr = startVMAddr + mappedSize();
2970 forEachRebase(diag, false, ^(uint64_t runtimeOffset, bool &stop) {
2971 // We allow TBI for x86_64 dylibs, but then require that the remainder of the offset
2972 // is a 32-bit offset from the mach-header.
2973 uint64_t value = *(uint64_t*)((uint8_t*)this + runtimeOffset);
2974 value &= 0x00FFFFFFFFFFFFFFULL;
2975 if ( (value < startVMAddr) || (value >= endVMAddr) ) {
2976 failureReason("rebase value out of range of dylib");
2977 rebasesOk = false;
2978 stop = true;
2979 return;
2980 }
2981
2982 // Also error if the rebase location is anything other than 4/8 byte aligned
2983 if ( (runtimeOffset & 0x3) != 0 ) {
2984 failureReason("rebase value is not 4-byte aligned");
2985 rebasesOk = false;
2986 stop = true;
2987 return;
2988 }
2989 });
2990 return rebasesOk;
2991 }
2992
2993 uint64_t MachOAnalyzer::chainStartsOffset() const
2994 {
2995 Diagnostics diag;
2996 LinkEditInfo leInfo;
2997 getLinkEditPointers(diag, leInfo);
2998 if ( diag.hasError() || (leInfo.chainedFixups == nullptr) )
2999 return 0;
3000
3001 const dyld_chained_fixups_header* header = (dyld_chained_fixups_header*)getLinkEditContent(leInfo.layout, leInfo.chainedFixups->dataoff);
3002 return header->starts_offset + ((uint8_t*)header - (uint8_t*)this);
3003 }
3004
3005 uint16_t MachOAnalyzer::chainedPointerFormat() const
3006 {
3007 uint64_t infoOffset = chainStartsOffset();
3008 if ( infoOffset != 0 ) {
3009 // get pointer format from chain info struct in LINKEDIT
3010 const dyld_chained_starts_in_image* startsInfo = (dyld_chained_starts_in_image*)((uint8_t*)this + infoOffset);
3011 for (uint32_t i=0; i < startsInfo->seg_count; ++i) {
3012 uint32_t segInfoOffset = startsInfo->seg_info_offset[i];
3013 // 0 offset means this segment has no fixups
3014 if ( segInfoOffset == 0 )
3015 continue;
3016 const dyld_chained_starts_in_segment* segInfo = (dyld_chained_starts_in_segment*)((uint8_t*)startsInfo + segInfoOffset);
3017 if ( segInfo->page_count != 0 )
3018 return segInfo->pointer_format;
3019 }
3020 }
3021 assert(this->cputype == CPU_TYPE_ARM64 && this->cpusubtype == CPU_SUBTYPE_ARM64E && "chainedPointerFormat() called on non-chained binary");
3022 return DYLD_CHAINED_PTR_ARM64E;
3023 }
3024
3025 #if (BUILDING_DYLD || BUILDING_LIBDYLD) && !__arm64e__
3026 #define SUPPORT_OLD_ARM64E_FORMAT 0
3027 #else
3028 #define SUPPORT_OLD_ARM64E_FORMAT 1
3029 #endif
3030
3031 // find dyld_chained_starts_in_image* in image
3032 // if old arm64e binary, synthesize dyld_chained_starts_in_image*
3033 void MachOAnalyzer::withChainStarts(Diagnostics& diag, uint64_t startsStructOffsetHint, void (^callback)(const dyld_chained_starts_in_image*)) const
3034 {
3035 if ( startsStructOffsetHint != 0 ) {
3036 // we have a pre-computed offset into LINKEDIT for dyld_chained_starts_in_image
3037 callback((dyld_chained_starts_in_image*)((uint8_t*)this + startsStructOffsetHint));
3038 return;
3039 }
3040
3041 LinkEditInfo leInfo;
3042 getLinkEditPointers(diag, leInfo);
3043 if ( diag.hasError() )
3044 return;
3045
3046 if ( leInfo.chainedFixups != nullptr ) {
3047 // find dyld_chained_starts_in_image from dyld_chained_fixups_header
3048 const dyld_chained_fixups_header* header = (dyld_chained_fixups_header*)getLinkEditContent(leInfo.layout, leInfo.chainedFixups->dataoff);
3049 callback((dyld_chained_starts_in_image*)((uint8_t*)header + header->starts_offset));
3050 }
3051 #if SUPPORT_OLD_ARM64E_FORMAT
3052 // don't want this code in non-arm64e dyld because it causes a stack protector which dereferences a GOT pointer before GOT is set up
3053 else if ( (leInfo.dyldInfo != nullptr) && (this->cputype == CPU_TYPE_ARM64) && (this->cpusubtype == CPU_SUBTYPE_ARM64E) ) {
3054 // old arm64e binary, create a dyld_chained_starts_in_image for caller
3055 uint64_t baseAddress = preferredLoadAddress();
3056 BLOCK_ACCCESSIBLE_ARRAY(uint8_t, buffer, leInfo.dyldInfo->bind_size + 512);
3057 dyld_chained_starts_in_image* header = (dyld_chained_starts_in_image*)buffer;
3058 header->seg_count = leInfo.layout.linkeditSegIndex;
3059 for (uint32_t i=0; i < header->seg_count; ++i)
3060 header->seg_info_offset[i] = 0;
3061 __block uint8_t curSegIndex = 0;
3062 __block dyld_chained_starts_in_segment* curSeg = (dyld_chained_starts_in_segment*)(&(header->seg_info_offset[header->seg_count]));
3063 parseOrgArm64eChainedFixups(diag, nullptr, nullptr, ^(const LinkEditInfo& leInfo2, const SegmentInfo segments[], uint8_t segmentIndex,
3064 bool segIndexSet, uint64_t segmentOffset, uint16_t format, bool& stop) {
3065 uint32_t pageIndex = (uint32_t)(segmentOffset/0x1000);
3066 if ( segmentIndex != curSegIndex ) {
3067 if ( curSegIndex == 0 ) {
3068 header->seg_info_offset[segmentIndex] = (uint32_t)((uint8_t*)curSeg - buffer);
3069 }
3070 else {
3071 header->seg_info_offset[segmentIndex] = (uint32_t)((uint8_t*)(&curSeg->page_start[curSeg->page_count]) - buffer);
3072 curSeg = (dyld_chained_starts_in_segment*)((uint8_t*)header+header->seg_info_offset[segmentIndex]);
3073 }
3074 curSeg->page_count = 0;
3075 curSegIndex = segmentIndex;
3076 }
3077 while ( curSeg->page_count != pageIndex ) {
3078 curSeg->page_start[curSeg->page_count] = 0xFFFF;
3079 curSeg->page_count++;
3080 }
3081 curSeg->size = (uint32_t)((uint8_t*)(&curSeg->page_start[pageIndex]) - (uint8_t*)curSeg);
3082 curSeg->page_size = 0x1000; // old arm64e encoding used 4KB pages
3083 curSeg->pointer_format = DYLD_CHAINED_PTR_ARM64E;
3084 curSeg->segment_offset = segments[segmentIndex].vmAddr - baseAddress;
3085 curSeg->max_valid_pointer = 0;
3086 curSeg->page_count = pageIndex+1;
3087 curSeg->page_start[pageIndex] = segmentOffset & 0xFFF;
3088 //fprintf(stderr, "segment_offset=0x%llX, vmAddr=0x%llX\n", curSeg->segment_offset, segments[segmentIndex].vmAddr );
3089 //printf("segIndex=%d, segOffset=0x%08llX, page_start[%d]=0x%04X, page_start[%d]=0x%04X\n",
3090 // segmentIndex, segmentOffset, pageIndex, curSeg->page_start[pageIndex], pageIndex-1, pageIndex ? curSeg->page_start[pageIndex-1] : 0);
3091 });
3092 callback(header);
3093 }
3094 #endif
3095 else {
3096 diag.error("image does not use chained fixups");
3097 }
3098 }
3099
3100 MachOAnalyzer::ObjCInfo MachOAnalyzer::getObjCInfo() const
3101 {
3102 __block ObjCInfo result;
3103 result.selRefCount = 0;
3104 result.classDefCount = 0;
3105 result.protocolDefCount = 0;
3106
3107 const uint32_t ptrSize = pointerSize();
3108 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3109 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) == 0 ) {
3110 if ( strcmp(sectInfo.sectName, "__objc_selrefs") == 0 )
3111 result.selRefCount += (sectInfo.sectSize/ptrSize);
3112 else if ( strcmp(sectInfo.sectName, "__objc_classlist") == 0 )
3113 result.classDefCount += (sectInfo.sectSize/ptrSize);
3114 else if ( strcmp(sectInfo.sectName, "__objc_protolist") == 0 )
3115 result.protocolDefCount += (sectInfo.sectSize/ptrSize);
3116 }
3117 else if ( (this->cputype == CPU_TYPE_I386) && (strcmp(sectInfo.segInfo.segName, "__OBJC") == 0) ) {
3118 if ( strcmp(sectInfo.sectName, "__message_refs") == 0 )
3119 result.selRefCount += (sectInfo.sectSize/4);
3120 else if ( strcmp(sectInfo.sectName, "__class") == 0 )
3121 result.classDefCount += (sectInfo.sectSize/48);
3122 else if ( strcmp(sectInfo.sectName, "__protocol") == 0 )
3123 result.protocolDefCount += (sectInfo.sectSize/20);
3124 }
3125 });
3126
3127 return result;
3128 }
3129
3130 // Convert from a (possibly) live pointer to a vmAddr
3131 static uint64_t convertToVMAddr(uint64_t value, MachOAnalyzer::VMAddrConverter vmAddrConverter) {
3132 if ( vmAddrConverter.contentRebased ) {
3133 // The value may have been signed. Strip the signature if that is the case
3134 #if __has_feature(ptrauth_calls)
3135 value = (uint64_t)__builtin_ptrauth_strip((void*)value, ptrauth_key_asia);
3136 #endif
3137 value -= vmAddrConverter.slide;
3138 }
3139 else if ( vmAddrConverter.chainedPointerFormat != 0 ) {
3140 auto* chainedValue = (MachOAnalyzer::ChainedFixupPointerOnDisk*)&value;
3141 uint64_t targetRuntimeOffset;
3142 if ( chainedValue->isRebase(vmAddrConverter.chainedPointerFormat, vmAddrConverter.preferredLoadAddress,
3143 targetRuntimeOffset) ) {
3144 value = vmAddrConverter.preferredLoadAddress + targetRuntimeOffset;
3145 }
3146 }
3147
3148 return value;
3149 }
3150
3151 uint64_t MachOAnalyzer::ObjCClassInfo::getReadOnlyDataField(ObjCClassInfo::ReadOnlyDataField field, uint32_t pointerSize) const {
3152 if (pointerSize == 8) {
3153 typedef uint64_t PtrTy;
3154 struct class_ro_t {
3155 uint32_t flags;
3156 uint32_t instanceStart;
3157 // Note there is 4-bytes of alignment padding between instanceSize and ivarLayout
3158 // on 64-bit archs, but no padding on 32-bit archs.
3159 // This union is a way to model that.
3160 union {
3161 uint32_t instanceSize;
3162 PtrTy pad;
3163 } instanceSize;
3164 PtrTy ivarLayoutVMAddr;
3165 PtrTy nameVMAddr;
3166 PtrTy baseMethodsVMAddr;
3167 PtrTy baseProtocolsVMAddr;
3168 PtrTy ivarsVMAddr;
3169 PtrTy weakIvarLayoutVMAddr;
3170 PtrTy basePropertiesVMAddr;
3171 };
3172 const class_ro_t* classData = (const class_ro_t*)(dataVMAddr + vmAddrConverter.slide);
3173 switch (field) {
3174 case ObjCClassInfo::ReadOnlyDataField::name:
3175 return convertToVMAddr(classData->nameVMAddr, vmAddrConverter);
3176 case ObjCClassInfo::ReadOnlyDataField::baseMethods:
3177 return convertToVMAddr(classData->baseMethodsVMAddr, vmAddrConverter);
3178 }
3179 } else {
3180 typedef uint32_t PtrTy;
3181 struct class_ro_t {
3182 uint32_t flags;
3183 uint32_t instanceStart;
3184 // Note there is 4-bytes of alignment padding between instanceSize and ivarLayout
3185 // on 64-bit archs, but no padding on 32-bit archs.
3186 // This union is a way to model that.
3187 union {
3188 uint32_t instanceSize;
3189 PtrTy pad;
3190 } instanceSize;
3191 PtrTy ivarLayoutVMAddr;
3192 PtrTy nameVMAddr;
3193 PtrTy baseMethodsVMAddr;
3194 PtrTy baseProtocolsVMAddr;
3195 PtrTy ivarsVMAddr;
3196 PtrTy weakIvarLayoutVMAddr;
3197 PtrTy basePropertiesVMAddr;
3198 };
3199 const class_ro_t* classData = (const class_ro_t*)(dataVMAddr + vmAddrConverter.slide);
3200 switch (field) {
3201 case ObjCClassInfo::ReadOnlyDataField::name:
3202 return convertToVMAddr(classData->nameVMAddr, vmAddrConverter);
3203 case ObjCClassInfo::ReadOnlyDataField::baseMethods:
3204 return convertToVMAddr(classData->baseMethodsVMAddr, vmAddrConverter);
3205 }
3206 }
3207 }
3208
3209 const char* MachOAnalyzer::getPrintableString(uint64_t stringVMAddr, MachOAnalyzer::PrintableStringResult& result,
3210 SectionCache* sectionCache,
3211 bool (^sectionHandler)(const SectionInfo& sectionInfo)) const {
3212 if ( sectionCache != nullptr ) {
3213 // Make sure the string is pointing in to one of the supported sections
3214 __block const dyld3::MachOAnalyzer::SectionInfo* nameSectionInfo = nullptr;
3215 for (const dyld3::MachOAnalyzer::SectionInfo& sectionInfo : sectionCache->sectionInfos) {
3216 if ( stringVMAddr < sectionInfo.sectAddr ) {
3217 continue;
3218 }
3219 if ( stringVMAddr >= ( sectionInfo.sectAddr + sectionInfo.sectSize) ) {
3220 continue;
3221 }
3222 nameSectionInfo = &sectionInfo;
3223 break;
3224 }
3225
3226 if ( nameSectionInfo != nullptr ) {
3227 // The section handler may also reject this section
3228 if ( sectionHandler != nullptr ) {
3229 if (!sectionHandler(*nameSectionInfo)) {
3230 result = PrintableStringResult::UnknownSection;
3231 return nullptr;
3232 }
3233 }
3234
3235 result = PrintableStringResult::CanPrint;
3236 return (const char*)(stringVMAddr + getSlide());
3237 }
3238 }
3239
3240 // If the name isn't in the cache then find the section its in
3241
3242 uint32_t fairplayTextOffsetStart;
3243 uint32_t fairplayTextOffsetEnd;
3244 uint32_t fairplaySize;
3245 if ( isFairPlayEncrypted(fairplayTextOffsetStart, fairplaySize) ) {
3246 fairplayTextOffsetEnd = fairplayTextOffsetStart + fairplaySize;
3247 } else {
3248 fairplayTextOffsetEnd = 0;
3249 }
3250
3251 result = PrintableStringResult::UnknownSection;
3252 forEachSection(^(const MachOAnalyzer::SectionInfo &sectInfo, bool malformedSectionRange, bool &stop) {
3253 if ( stringVMAddr < sectInfo.sectAddr ) {
3254 return;
3255 }
3256 if ( stringVMAddr >= ( sectInfo.sectAddr + sectInfo.sectSize) ) {
3257 return;
3258 }
3259
3260 // We can't scan this section if its protected or not cstrings.
3261 if ( sectInfo.segInfo.isProtected || ( (sectInfo.sectFlags & SECTION_TYPE) != S_CSTRING_LITERALS ) ) {
3262 result = PrintableStringResult::ProtectedSection;
3263 stop = true;
3264 return;
3265 }
3266
3267 // We can't scan this section if it overlaps with the fairplay range
3268 if ( fairplayTextOffsetEnd < sectInfo.sectFileOffset ) {
3269 // Fairplay range ends before section
3270 } else if ( fairplayTextOffsetStart > (sectInfo.sectFileOffset + sectInfo.sectSize) ) {
3271 // Fairplay range starts after section
3272 } else {
3273 // Must overlap
3274 result = PrintableStringResult::FairPlayEncrypted;
3275 stop = true;
3276 return;
3277 }
3278
3279 // The section handler may also reject this section
3280 if ( sectionHandler != nullptr ) {
3281 if (!sectionHandler(sectInfo)) {
3282 result = PrintableStringResult::UnknownSection;
3283 stop = true;
3284 return;
3285 }
3286 }
3287 // Cache this section for later.
3288 if ( sectionCache != nullptr ) {
3289 sectionCache->sectionInfos.push_back(sectInfo);
3290 }
3291 result = PrintableStringResult::CanPrint;
3292 stop = true;
3293 });
3294
3295 if (result == PrintableStringResult::CanPrint)
3296 return (const char*)(stringVMAddr + getSlide());
3297 return nullptr;
3298 }
3299
3300 bool MachOAnalyzer::SectionCache::findSectionForVMAddr(uint64_t vmAddr, bool (^sectionHandler)(const SectionInfo& sectionInfo)) {
3301
3302 // Make sure the string is pointing in to one of the supported sections
3303 __block const dyld3::MachOAnalyzer::SectionInfo* foundSectionInfo = nullptr;
3304 for (const dyld3::MachOAnalyzer::SectionInfo& sectionInfo : sectionInfos) {
3305 if ( vmAddr < sectionInfo.sectAddr ) {
3306 continue;
3307 }
3308 if ( vmAddr >= ( sectionInfo.sectAddr + sectionInfo.sectSize) ) {
3309 continue;
3310 }
3311 foundSectionInfo = &sectionInfo;
3312 break;
3313 }
3314
3315 if ( foundSectionInfo != nullptr ) {
3316 // The section handler may also reject this section
3317 if ( sectionHandler != nullptr ) {
3318 if (!sectionHandler(*foundSectionInfo)) {
3319 return nullptr;
3320 }
3321 }
3322
3323 // Found a section, so return true
3324 return true;
3325 }
3326
3327 // If the name isn't in the cache then find the section its in
3328
3329 uint32_t fairplayTextOffsetStart;
3330 uint32_t fairplayTextOffsetEnd;
3331 uint32_t fairplaySize;
3332 if ( ma->isFairPlayEncrypted(fairplayTextOffsetStart, fairplaySize) ) {
3333 fairplayTextOffsetEnd = fairplayTextOffsetStart + fairplaySize;
3334 } else {
3335 fairplayTextOffsetEnd = 0;
3336 }
3337
3338 __block bool foundValidSection = false;
3339 ma->forEachSection(^(const MachOAnalyzer::SectionInfo &sectInfo, bool malformedSectionRange, bool &stop) {
3340 if ( vmAddr < sectInfo.sectAddr ) {
3341 return;
3342 }
3343 if ( vmAddr >= ( sectInfo.sectAddr + sectInfo.sectSize) ) {
3344 return;
3345 }
3346
3347 // We can't scan this section if it overlaps with the fairplay range
3348 if ( fairplayTextOffsetEnd < sectInfo.sectFileOffset ) {
3349 // Fairplay range ends before section
3350 } else if ( fairplayTextOffsetStart > (sectInfo.sectFileOffset + sectInfo.sectSize) ) {
3351 // Fairplay range starts after section
3352 } else {
3353 // Must overlap
3354 stop = true;
3355 return;
3356 }
3357
3358 // The section handler may also reject this section
3359 if ( sectionHandler != nullptr ) {
3360 if (!sectionHandler(sectInfo)) {
3361 stop = true;
3362 return;
3363 }
3364 }
3365 // Cache this section for later.
3366 sectionInfos.push_back(sectInfo);
3367 foundValidSection = true;
3368 stop = true;
3369 });
3370
3371 return foundValidSection;
3372 }
3373
3374 void MachOAnalyzer::forEachObjCClass(Diagnostics& diag, bool contentRebased,
3375 void (^handler)(Diagnostics& diag, uint64_t classVMAddr,
3376 uint64_t classSuperclassVMAddr, uint64_t classDataVMAddr,
3377 const ObjCClassInfo& objcClass, bool isMetaClass)) const {
3378 const uint64_t ptrSize = pointerSize();
3379 intptr_t slide = getSlide();
3380
3381 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3382 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3383 vmAddrConverter.slide = slide;
3384 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3385 vmAddrConverter.contentRebased = contentRebased;
3386
3387 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3388 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3389 return;
3390 if ( strcmp(sectInfo.sectName, "__objc_classlist") != 0 )
3391 return;
3392 const uint8_t* classList = (uint8_t*)(sectInfo.sectAddr + slide);
3393 uint64_t classListSize = sectInfo.sectSize;
3394
3395 if ( (classListSize % ptrSize) != 0 ) {
3396 diag.error("Invalid objc class section size");
3397 return;
3398 }
3399
3400 if ( ptrSize == 8 ) {
3401 typedef uint64_t PtrTy;
3402 struct objc_class_t {
3403 uint64_t isaVMAddr;
3404 uint64_t superclassVMAddr;
3405 uint64_t methodCacheBuckets;
3406 uint64_t methodCacheProperties;
3407 uint64_t dataVMAddrAndFastFlags;
3408 };
3409 // This matches "struct TargetClassMetadata" from Metadata.h in Swift
3410 struct swift_class_metadata_t : objc_class_t {
3411 uint32_t swiftClassFlags;
3412 };
3413 enum : uint64_t {
3414 FAST_DATA_MASK = 0x00007ffffffffff8ULL
3415 };
3416 for (uint64_t i = 0; i != classListSize; i += sizeof(PtrTy)) {
3417 uint64_t classVMAddr = convertToVMAddr(*(PtrTy*)(classList + i), vmAddrConverter);
3418 uint64_t classSuperclassVMAddr = classVMAddr + offsetof(objc_class_t, superclassVMAddr);
3419 uint64_t classDataVMAddr = classVMAddr + offsetof(objc_class_t, dataVMAddrAndFastFlags);
3420
3421 // First call the handler on the class
3422 const objc_class_t* classPtr = (const objc_class_t*)(classVMAddr + slide);
3423 const swift_class_metadata_t* swiftClassPtr = (const swift_class_metadata_t*)classPtr;
3424 ObjCClassInfo objcClass;
3425 objcClass.isaVMAddr = convertToVMAddr(classPtr->isaVMAddr, vmAddrConverter);
3426 objcClass.superclassVMAddr = convertToVMAddr(classPtr->superclassVMAddr, vmAddrConverter);
3427 objcClass.dataVMAddr = convertToVMAddr(classPtr->dataVMAddrAndFastFlags, vmAddrConverter) & FAST_DATA_MASK;
3428 objcClass.vmAddrConverter = vmAddrConverter;
3429 objcClass.isSwiftLegacy = classPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_LEGACY;
3430 objcClass.isSwiftStable = classPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_STABLE;
3431 // The Swift class flags are only present if the class is swift
3432 objcClass.swiftClassFlags = (objcClass.isSwiftLegacy || objcClass.isSwiftStable) ? swiftClassPtr->swiftClassFlags : 0;
3433 handler(diag, classVMAddr, classSuperclassVMAddr, classDataVMAddr, objcClass, false);
3434 if (diag.hasError())
3435 return;
3436
3437 // Then call it on the metaclass
3438 const objc_class_t* metaClassPtr = (const objc_class_t*)(objcClass.isaVMAddr + slide);
3439 const swift_class_metadata_t* swiftMetaClassPtr = (const swift_class_metadata_t*)metaClassPtr;
3440 ObjCClassInfo objcMetaClass;
3441 objcMetaClass.isaVMAddr = convertToVMAddr(metaClassPtr->isaVMAddr, vmAddrConverter);
3442 objcMetaClass.superclassVMAddr = convertToVMAddr(metaClassPtr->superclassVMAddr, vmAddrConverter);
3443 objcMetaClass.dataVMAddr = convertToVMAddr(metaClassPtr->dataVMAddrAndFastFlags, vmAddrConverter) & FAST_DATA_MASK;
3444 objcMetaClass.vmAddrConverter = vmAddrConverter;
3445 objcMetaClass.isSwiftLegacy = metaClassPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_LEGACY;
3446 objcMetaClass.isSwiftStable = metaClassPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_STABLE;
3447 // The Swift class flags are only present if the class is swift
3448 objcMetaClass.swiftClassFlags = (objcMetaClass.isSwiftLegacy || objcMetaClass.isSwiftStable) ? swiftMetaClassPtr->swiftClassFlags : 0;
3449 classSuperclassVMAddr = objcClass.isaVMAddr + offsetof(objc_class_t, superclassVMAddr);
3450 classDataVMAddr = objcClass.isaVMAddr + offsetof(objc_class_t, dataVMAddrAndFastFlags);
3451 handler(diag, classVMAddr, classSuperclassVMAddr, classDataVMAddr, objcMetaClass, true);
3452 if (diag.hasError())
3453 return;
3454 }
3455 } else {
3456 typedef uint32_t PtrTy;
3457 struct objc_class_t {
3458 uint32_t isaVMAddr;
3459 uint32_t superclassVMAddr;
3460 uint32_t methodCacheBuckets;
3461 uint32_t methodCacheProperties;
3462 uint32_t dataVMAddrAndFastFlags;
3463 };
3464 // This matches "struct TargetClassMetadata" from Metadata.h in Swift
3465 struct swift_class_metadata_t : objc_class_t {
3466 uint32_t swiftClassFlags;
3467 };
3468 enum : uint32_t {
3469 FAST_DATA_MASK = 0xfffffffcUL
3470 };
3471 for (uint64_t i = 0; i != classListSize; i += sizeof(PtrTy)) {
3472 uint64_t classVMAddr = convertToVMAddr(*(PtrTy*)(classList + i), vmAddrConverter);
3473 uint64_t classSuperclassVMAddr = classVMAddr + offsetof(objc_class_t, superclassVMAddr);
3474 uint64_t classDataVMAddr = classVMAddr + offsetof(objc_class_t, dataVMAddrAndFastFlags);
3475
3476 // First call the handler on the class
3477 const objc_class_t* classPtr = (const objc_class_t*)(classVMAddr + slide);
3478 const swift_class_metadata_t* swiftClassPtr = (const swift_class_metadata_t*)classPtr;
3479 ObjCClassInfo objcClass;
3480 objcClass.isaVMAddr = convertToVMAddr(classPtr->isaVMAddr, vmAddrConverter);
3481 objcClass.superclassVMAddr = convertToVMAddr(classPtr->superclassVMAddr, vmAddrConverter);
3482 objcClass.dataVMAddr = convertToVMAddr(classPtr->dataVMAddrAndFastFlags, vmAddrConverter) & FAST_DATA_MASK;
3483 objcClass.vmAddrConverter = vmAddrConverter;
3484 objcClass.isSwiftLegacy = classPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_LEGACY;
3485 objcClass.isSwiftStable = classPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_STABLE;
3486 // The Swift class flags are only present if the class is swift
3487 objcClass.swiftClassFlags = (objcClass.isSwiftLegacy || objcClass.isSwiftStable) ? swiftClassPtr->swiftClassFlags : 0;
3488 handler(diag, classVMAddr, classSuperclassVMAddr, classDataVMAddr, objcClass, false);
3489 if (diag.hasError())
3490 return;
3491
3492 // Then call it on the metaclass
3493 const objc_class_t* metaClassPtr = (const objc_class_t*)(objcClass.isaVMAddr + slide);
3494 const swift_class_metadata_t* swiftMetaClassPtr = (const swift_class_metadata_t*)metaClassPtr;
3495 ObjCClassInfo objcMetaClass;
3496 objcMetaClass.isaVMAddr = convertToVMAddr(metaClassPtr->isaVMAddr, vmAddrConverter);
3497 objcMetaClass.superclassVMAddr = convertToVMAddr(metaClassPtr->superclassVMAddr, vmAddrConverter);
3498 objcMetaClass.dataVMAddr = convertToVMAddr(metaClassPtr->dataVMAddrAndFastFlags, vmAddrConverter) & FAST_DATA_MASK;
3499 objcMetaClass.vmAddrConverter = vmAddrConverter;
3500 objcMetaClass.isSwiftLegacy = metaClassPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_LEGACY;
3501 objcMetaClass.isSwiftStable = metaClassPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_STABLE;
3502 // The Swift class flags are only present if the class is swift
3503 objcMetaClass.swiftClassFlags = (objcMetaClass.isSwiftLegacy || objcMetaClass.isSwiftStable) ? swiftMetaClassPtr->swiftClassFlags : 0;
3504 classSuperclassVMAddr = objcClass.isaVMAddr + offsetof(objc_class_t, superclassVMAddr);
3505 classDataVMAddr = objcClass.isaVMAddr + offsetof(objc_class_t, dataVMAddrAndFastFlags);
3506 handler(diag, classVMAddr, classSuperclassVMAddr, classDataVMAddr, objcMetaClass, true);
3507 if (diag.hasError())
3508 return;
3509 }
3510 }
3511 });
3512 }
3513
3514 void MachOAnalyzer::forEachObjCCategory(Diagnostics& diag, bool contentRebased,
3515 void (^handler)(Diagnostics& diag, uint64_t categoryVMAddr,
3516 const dyld3::MachOAnalyzer::ObjCCategory& objcCategory)) const {
3517 const uint64_t ptrSize = pointerSize();
3518 intptr_t slide = getSlide();
3519
3520 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3521 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3522 vmAddrConverter.slide = slide;
3523 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3524 vmAddrConverter.contentRebased = contentRebased;
3525
3526 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3527 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3528 return;
3529 if ( strcmp(sectInfo.sectName, "__objc_catlist") != 0 )
3530 return;
3531 const uint8_t* categoryList = (uint8_t*)(sectInfo.sectAddr + slide);
3532 uint64_t categoryListSize = sectInfo.sectSize;
3533
3534 if ( (categoryListSize % ptrSize) != 0 ) {
3535 diag.error("Invalid objc category section size");
3536 return;
3537 }
3538
3539 if ( ptrSize == 8 ) {
3540 typedef uint64_t PtrTy;
3541 struct objc_category_t {
3542 PtrTy nameVMAddr;
3543 PtrTy clsVMAddr;
3544 PtrTy instanceMethodsVMAddr;
3545 PtrTy classMethodsVMAddr;
3546 PtrTy protocolsVMAddr;
3547 PtrTy instancePropertiesVMAddr;
3548 };
3549 for (uint64_t i = 0; i != categoryListSize; i += sizeof(PtrTy)) {
3550 uint64_t categoryVMAddr = convertToVMAddr(*(PtrTy*)(categoryList + i), vmAddrConverter);
3551
3552 const objc_category_t* categoryPtr = (const objc_category_t*)(categoryVMAddr + slide);
3553 ObjCCategory objCCategory;
3554 objCCategory.nameVMAddr = convertToVMAddr(categoryPtr->nameVMAddr, vmAddrConverter);
3555 objCCategory.clsVMAddr = convertToVMAddr(categoryPtr->clsVMAddr, vmAddrConverter);
3556 objCCategory.instanceMethodsVMAddr = convertToVMAddr(categoryPtr->instanceMethodsVMAddr, vmAddrConverter);
3557 objCCategory.classMethodsVMAddr = convertToVMAddr(categoryPtr->classMethodsVMAddr, vmAddrConverter);
3558 objCCategory.protocolsVMAddr = convertToVMAddr(categoryPtr->protocolsVMAddr, vmAddrConverter);
3559 objCCategory.instancePropertiesVMAddr = convertToVMAddr(categoryPtr->instancePropertiesVMAddr, vmAddrConverter);
3560 handler(diag, categoryVMAddr, objCCategory);
3561 if (diag.hasError())
3562 return;
3563 }
3564 } else {
3565 typedef uint32_t PtrTy;
3566 struct objc_category_t {
3567 PtrTy nameVMAddr;
3568 PtrTy clsVMAddr;
3569 PtrTy instanceMethodsVMAddr;
3570 PtrTy classMethodsVMAddr;
3571 PtrTy protocolsVMAddr;
3572 PtrTy instancePropertiesVMAddr;
3573 };
3574 for (uint64_t i = 0; i != categoryListSize; i += sizeof(PtrTy)) {
3575 uint64_t categoryVMAddr = convertToVMAddr(*(PtrTy*)(categoryList + i), vmAddrConverter);
3576
3577 const objc_category_t* categoryPtr = (const objc_category_t*)(categoryVMAddr + slide);
3578 ObjCCategory objCCategory;
3579 objCCategory.nameVMAddr = convertToVMAddr(categoryPtr->nameVMAddr, vmAddrConverter);
3580 objCCategory.clsVMAddr = convertToVMAddr(categoryPtr->clsVMAddr, vmAddrConverter);
3581 objCCategory.instanceMethodsVMAddr = convertToVMAddr(categoryPtr->instanceMethodsVMAddr, vmAddrConverter);
3582 objCCategory.classMethodsVMAddr = convertToVMAddr(categoryPtr->classMethodsVMAddr, vmAddrConverter);
3583 objCCategory.protocolsVMAddr = convertToVMAddr(categoryPtr->protocolsVMAddr, vmAddrConverter);
3584 objCCategory.instancePropertiesVMAddr = convertToVMAddr(categoryPtr->instancePropertiesVMAddr, vmAddrConverter);
3585 handler(diag, categoryVMAddr, objCCategory);
3586 if (diag.hasError())
3587 return;
3588 }
3589 }
3590 });
3591 }
3592
3593 void MachOAnalyzer::forEachObjCProtocol(Diagnostics& diag, bool contentRebased,
3594 void (^handler)(Diagnostics& diag, uint64_t categoryVMAddr,
3595 const dyld3::MachOAnalyzer::ObjCProtocol& objCProtocol)) const {
3596 const uint64_t ptrSize = pointerSize();
3597 intptr_t slide = getSlide();
3598
3599 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3600 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3601 vmAddrConverter.slide = slide;
3602 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3603 vmAddrConverter.contentRebased = contentRebased;
3604
3605 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3606 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3607 return;
3608 if ( strcmp(sectInfo.sectName, "__objc_protolist") != 0 )
3609 return;
3610 const uint8_t* protocolList = (uint8_t*)(sectInfo.sectAddr + slide);
3611 uint64_t protocolListSize = sectInfo.sectSize;
3612
3613 if ( (protocolListSize % ptrSize) != 0 ) {
3614 diag.error("Invalid objc protocol section size");
3615 return;
3616 }
3617
3618 if ( ptrSize == 8 ) {
3619 typedef uint64_t PtrTy;
3620 struct protocol_t {
3621 PtrTy isaVMAddr;
3622 PtrTy nameVMAddr;
3623 PtrTy protocolsVMAddr;
3624 PtrTy instanceMethodsVMAddr;
3625 PtrTy classMethodsVMAddr;
3626 PtrTy optionalInstanceMethodsVMAddr;
3627 PtrTy optionalClassMethodsVMAddr;
3628 PtrTy instancePropertiesVMAddr;
3629 uint32_t size;
3630 uint32_t flags;
3631 // Fields below this point are not always present on disk.
3632 PtrTy extendedMethodTypesVMAddr;
3633 PtrTy demangledNameVMAddr;
3634 PtrTy classPropertiesVMAddr;
3635 };
3636 for (uint64_t i = 0; i != protocolListSize; i += sizeof(PtrTy)) {
3637 uint64_t protocolVMAddr = convertToVMAddr(*(PtrTy*)(protocolList + i), vmAddrConverter);
3638
3639 const protocol_t* protocolPtr = (const protocol_t*)(protocolVMAddr + slide);
3640 ObjCProtocol objCProtocol;
3641 objCProtocol.isaVMAddr = convertToVMAddr(protocolPtr->isaVMAddr, vmAddrConverter);
3642 objCProtocol.nameVMAddr = convertToVMAddr(protocolPtr->nameVMAddr, vmAddrConverter);
3643 objCProtocol.instanceMethodsVMAddr = convertToVMAddr(protocolPtr->instanceMethodsVMAddr, vmAddrConverter);
3644 objCProtocol.classMethodsVMAddr = convertToVMAddr(protocolPtr->classMethodsVMAddr, vmAddrConverter);
3645 objCProtocol.optionalInstanceMethodsVMAddr = convertToVMAddr(protocolPtr->optionalInstanceMethodsVMAddr, vmAddrConverter);
3646 objCProtocol.optionalClassMethodsVMAddr = convertToVMAddr(protocolPtr->optionalClassMethodsVMAddr, vmAddrConverter);
3647
3648 // Track if this protocol needs a reallocation in objc
3649 objCProtocol.requiresObjCReallocation = protocolPtr->size < sizeof(protocol_t);
3650
3651 handler(diag, protocolVMAddr, objCProtocol);
3652 if (diag.hasError())
3653 return;
3654 }
3655 } else {
3656 typedef uint32_t PtrTy;
3657 struct protocol_t {
3658 PtrTy isaVMAddr;
3659 PtrTy nameVMAddr;
3660 PtrTy protocolsVMAddr;
3661 PtrTy instanceMethodsVMAddr;
3662 PtrTy classMethodsVMAddr;
3663 PtrTy optionalInstanceMethodsVMAddr;
3664 PtrTy optionalClassMethodsVMAddr;
3665 PtrTy instancePropertiesVMAddr;
3666 uint32_t size;
3667 uint32_t flags;
3668 // Fields below this point are not always present on disk.
3669 PtrTy extendedMethodTypesVMAddr;
3670 PtrTy demangledNameVMAddr;
3671 PtrTy classPropertiesVMAddr;
3672 };
3673 for (uint64_t i = 0; i != protocolListSize; i += sizeof(PtrTy)) {
3674 uint64_t protocolVMAddr = convertToVMAddr(*(PtrTy*)(protocolList + i), vmAddrConverter);
3675
3676 const protocol_t* protocolPtr = (const protocol_t*)(protocolVMAddr + slide);
3677 ObjCProtocol objCProtocol;
3678 objCProtocol.isaVMAddr = convertToVMAddr(protocolPtr->isaVMAddr, vmAddrConverter);
3679 objCProtocol.nameVMAddr = convertToVMAddr(protocolPtr->nameVMAddr, vmAddrConverter);
3680 objCProtocol.instanceMethodsVMAddr = convertToVMAddr(protocolPtr->instanceMethodsVMAddr, vmAddrConverter);
3681 objCProtocol.classMethodsVMAddr = convertToVMAddr(protocolPtr->classMethodsVMAddr, vmAddrConverter);
3682 objCProtocol.optionalInstanceMethodsVMAddr = convertToVMAddr(protocolPtr->optionalInstanceMethodsVMAddr, vmAddrConverter);
3683 objCProtocol.optionalClassMethodsVMAddr = convertToVMAddr(protocolPtr->optionalClassMethodsVMAddr, vmAddrConverter);
3684
3685 // Track if this protocol needs a reallocation in objc
3686 objCProtocol.requiresObjCReallocation = protocolPtr->size < sizeof(protocol_t);
3687
3688 handler(diag, protocolVMAddr, objCProtocol);
3689 if (diag.hasError())
3690 return;
3691 }
3692 }
3693 });
3694 }
3695
3696 void MachOAnalyzer::forEachObjCMethod(uint64_t methodListVMAddr, bool contentRebased,
3697 void (^handler)(uint64_t methodVMAddr, const ObjCMethod& method)) const {
3698 if ( methodListVMAddr == 0 )
3699 return;
3700
3701 const uint64_t ptrSize = pointerSize();
3702 intptr_t slide = getSlide();
3703
3704 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3705 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3706 vmAddrConverter.slide = slide;
3707 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3708 vmAddrConverter.contentRebased = contentRebased;
3709
3710 if ( ptrSize == 8 ) {
3711 typedef uint64_t PtrTy;
3712 struct method_list_t {
3713 uint32_t entsize;
3714 uint32_t count;
3715 PtrTy methodArrayBase; // Note this is the start the array method_t[0]
3716
3717 uint32_t getEntsize() const {
3718 return (entsize) & ~(uint32_t)3;
3719 }
3720 };
3721
3722 struct method_t {
3723 PtrTy nameVMAddr; // SEL
3724 PtrTy typesVMAddr; // const char *
3725 PtrTy impVMAddr; // IMP
3726 };
3727
3728 const method_list_t* methodList = (const method_list_t*)(methodListVMAddr + slide);
3729 uint64_t methodListArrayBaseVMAddr = methodListVMAddr + offsetof(method_list_t, methodArrayBase);
3730 for (unsigned i = 0; i != methodList->count; ++i) {
3731 uint64_t methodEntryOffset = i * methodList->getEntsize();
3732 uint64_t methodVMAddr = methodListArrayBaseVMAddr + methodEntryOffset;
3733 const method_t* methodPtr = (const method_t*)(methodVMAddr + slide);
3734 ObjCMethod method;
3735 method.nameVMAddr = convertToVMAddr(methodPtr->nameVMAddr, vmAddrConverter);
3736 method.typesVMAddr = convertToVMAddr(methodPtr->typesVMAddr, vmAddrConverter);
3737 method.impVMAddr = convertToVMAddr(methodPtr->impVMAddr, vmAddrConverter);
3738 method.nameLocationVMAddr = methodVMAddr + offsetof(method_t, nameVMAddr);
3739 handler(methodVMAddr, method);
3740 }
3741 } else {
3742 typedef uint32_t PtrTy;
3743 struct method_list_t {
3744 uint32_t entsize;
3745 uint32_t count;
3746 PtrTy methodArrayBase; // Note this is the start the array method_t[0]
3747
3748 uint32_t getEntsize() const {
3749 return (entsize) & ~(uint32_t)3;
3750 }
3751 };
3752
3753 struct method_t {
3754 PtrTy nameVMAddr; // SEL
3755 PtrTy typesVMAddr; // const char *
3756 PtrTy impVMAddr; // IMP
3757 };
3758
3759 const method_list_t* methodList = (const method_list_t*)(methodListVMAddr + slide);
3760 uint64_t methodListArrayBaseVMAddr = methodListVMAddr + offsetof(method_list_t, methodArrayBase);
3761 for (unsigned i = 0; i != methodList->count; ++i) {
3762 uint64_t methodEntryOffset = i * methodList->getEntsize();
3763 uint64_t methodVMAddr = methodListArrayBaseVMAddr + methodEntryOffset;
3764 const method_t* methodPtr = (const method_t*)(methodVMAddr + slide);
3765 ObjCMethod method;
3766 method.nameVMAddr = convertToVMAddr(methodPtr->nameVMAddr, vmAddrConverter);
3767 method.typesVMAddr = convertToVMAddr(methodPtr->typesVMAddr, vmAddrConverter);
3768 method.impVMAddr = convertToVMAddr(methodPtr->impVMAddr, vmAddrConverter);
3769 method.nameLocationVMAddr = methodVMAddr + offsetof(method_t, nameVMAddr);
3770 handler(methodVMAddr, method);
3771 }
3772 }
3773 }
3774
3775
3776 void MachOAnalyzer::forEachObjCSelectorReference(Diagnostics& diag, bool contentRebased,
3777 void (^handler)(uint64_t selRefVMAddr, uint64_t selRefTargetVMAddr)) const {
3778 const uint64_t ptrSize = pointerSize();
3779 intptr_t slide = getSlide();
3780
3781 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3782 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3783 vmAddrConverter.slide = slide;
3784 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3785 vmAddrConverter.contentRebased = contentRebased;
3786
3787 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3788 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3789 return;
3790 if ( strcmp(sectInfo.sectName, "__objc_selrefs") != 0 )
3791 return;
3792 uint64_t selRefSectionVMAddr = sectInfo.sectAddr;
3793 const uint8_t* selRefs = (uint8_t*)(selRefSectionVMAddr + slide);
3794 uint64_t selRefsSize = sectInfo.sectSize;
3795
3796 if ( (selRefsSize % ptrSize) != 0 ) {
3797 diag.error("Invalid sel ref section size");
3798 return;
3799 }
3800
3801 if ( ptrSize == 8 ) {
3802 typedef uint64_t PtrTy;
3803 for (uint64_t i = 0; i != selRefsSize; i += sizeof(PtrTy)) {
3804 uint64_t selRefVMAddr = selRefSectionVMAddr + i;
3805 uint64_t selRefTargetVMAddr = convertToVMAddr(*(PtrTy*)(selRefs + i), vmAddrConverter);
3806 handler(selRefVMAddr, selRefTargetVMAddr);
3807 if (diag.hasError()) {
3808 stop = true;
3809 return;
3810 }
3811 }
3812 } else {
3813 typedef uint32_t PtrTy;
3814 for (uint64_t i = 0; i != selRefsSize; i += sizeof(PtrTy)) {
3815 uint64_t selRefVMAddr = selRefSectionVMAddr + i;
3816 uint64_t selRefTargetVMAddr = convertToVMAddr(*(PtrTy*)(selRefs + i), vmAddrConverter);
3817 handler(selRefVMAddr, selRefTargetVMAddr);
3818 if (diag.hasError()) {
3819 stop = true;
3820 return;
3821 }
3822 }
3823 }
3824 });
3825 }
3826
3827 void MachOAnalyzer::forEachObjCMethodName(void (^handler)(const char* methodName)) const {
3828 intptr_t slide = getSlide();
3829 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3830 if ( strcmp(sectInfo.segInfo.segName, "__TEXT") != 0 )
3831 return;
3832 if ( strcmp(sectInfo.sectName, "__objc_methname") != 0 )
3833 return;
3834 if ( sectInfo.segInfo.isProtected || ( (sectInfo.sectFlags & SECTION_TYPE) != S_CSTRING_LITERALS ) ) {
3835 stop = true;
3836 return;
3837 }
3838 if ( malformedSectionRange ) {
3839 stop = true;
3840 return;
3841 }
3842
3843 const char* content = (const char*)(sectInfo.sectAddr + slide);
3844 uint64_t sectionSize = sectInfo.sectSize;
3845
3846 const char* s = (const char*)content;
3847 const char* end = s + sectionSize;
3848 while ( s < end ) {
3849 handler(s);
3850 s += strlen(s) + 1;
3851 }
3852 });
3853 }
3854
3855
3856 bool MachOAnalyzer::hasObjCMessageReferences() const {
3857
3858 __block bool foundSection = false;
3859 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3860 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3861 return;
3862 if ( strcmp(sectInfo.sectName, "__objc_msgrefs") != 0 )
3863 return;
3864 foundSection = true;
3865 stop = true;
3866 });
3867 return foundSection;
3868 }
3869
3870 const MachOAnalyzer::ObjCImageInfo* MachOAnalyzer::objcImageInfo() const {
3871 int64_t slide = getSlide();
3872
3873 __block bool foundInvalidObjCImageInfo = false;
3874 __block const ObjCImageInfo* imageInfo = nullptr;
3875 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectionInfo, bool malformedSectionRange, bool& stop) {
3876 if ( strncmp(sectionInfo.segInfo.segName, "__DATA", 6) != 0 )
3877 return;
3878 if (strcmp(sectionInfo.sectName, "__objc_imageinfo") != 0)
3879 return;
3880 if ( malformedSectionRange ) {
3881 stop = true;
3882 return;
3883 }
3884 if ( sectionInfo.sectSize != 8 ) {
3885 stop = true;
3886 return;
3887 }
3888 imageInfo = (const ObjCImageInfo*)(sectionInfo.sectAddr + slide);
3889 if ( (imageInfo->flags & ObjCImageInfo::dyldPreoptimized) != 0 ) {
3890 foundInvalidObjCImageInfo = true;
3891 stop = true;
3892 return;
3893 }
3894 stop = true;
3895 });
3896 if ( foundInvalidObjCImageInfo )
3897 return nullptr;
3898 return imageInfo;
3899 }
3900
3901 uint32_t MachOAnalyzer::loadCommandsFreeSpace() const
3902 {
3903 __block uint32_t firstSectionFileOffset = 0;
3904 __block uint32_t firstSegmentFileOffset = 0;
3905 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3906 firstSectionFileOffset = sectInfo.sectFileOffset;
3907 firstSegmentFileOffset = (uint32_t)sectInfo.segInfo.fileOffset;
3908 stop = true;
3909 });
3910
3911 uint32_t headerSize = (this->magic == MH_MAGIC_64) ? sizeof(mach_header_64) : sizeof(mach_header);
3912 uint32_t existSpaceUsed = this->sizeofcmds + headerSize;
3913 return firstSectionFileOffset - firstSegmentFileOffset - existSpaceUsed;
3914 }
3915
3916 void MachOAnalyzer::forEachWeakDef(Diagnostics& diag,
3917 void (^handler)(const char* symbolName, uintptr_t imageOffset, bool isFromExportTrie)) const {
3918 uint64_t baseAddress = preferredLoadAddress();
3919 forEachGlobalSymbol(diag, ^(const char *symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool &stop) {
3920 if ( (n_desc & N_WEAK_DEF) != 0 ) {
3921 handler(symbolName, n_value - baseAddress, false);
3922 }
3923 });
3924 forEachExportedSymbol(diag, ^(const char *symbolName, uint64_t imageOffset, uint64_t flags, uint64_t other, const char *importName, bool &stop) {
3925 if ( (flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION ) == 0 )
3926 return;
3927 // Skip resolvers and re-exports
3928 if ( (flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) != 0 )
3929 return;
3930 if ( (flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) != 0 )
3931 return;
3932 handler(symbolName, imageOffset, true);
3933 });
3934 }
3935
3936 } // dyld3
3937
3938