]> git.saurik.com Git - apple/dyld.git/blob - dyld3/MachOAnalyzer.cpp
dyld-732.8.tar.gz
[apple/dyld.git] / dyld3 / MachOAnalyzer.cpp
1 /*
2 * Copyright (c) 2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 #include <sys/types.h>
25 #include <mach/mach.h>
26 #include <assert.h>
27 #include <limits.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <mach-o/reloc.h>
32 #include <mach-o/nlist.h>
33 #include <TargetConditionals.h>
34
35 #include "MachOAnalyzer.h"
36 #include "CodeSigningTypes.h"
37 #include "Array.h"
38
39
40 namespace dyld3 {
41
42
43 const MachOAnalyzer* MachOAnalyzer::validMainExecutable(Diagnostics& diag, const mach_header* mh, const char* path, uint64_t sliceLength,
44 const GradedArchs& archs, Platform platform)
45 {
46 const MachOAnalyzer* result = (const MachOAnalyzer*)mh;
47 if ( !result->validMachOForArchAndPlatform(diag, (size_t)sliceLength, path, archs, platform) )
48 return nullptr;
49 if ( !result->isDynamicExecutable() )
50 return nullptr;
51
52 return result;
53 }
54
55
56 closure::LoadedFileInfo MachOAnalyzer::load(Diagnostics& diag, const closure::FileSystem& fileSystem,
57 const char* path, const GradedArchs& archs, Platform platform, char realerPath[MAXPATHLEN])
58 {
59 // FIXME: This should probably be an assert, but if we happen to have a diagnostic here then something is wrong
60 // above us and we should quickly return instead of doing unnecessary work.
61 if (diag.hasError())
62 return closure::LoadedFileInfo();
63
64 closure::LoadedFileInfo info;
65 if (!fileSystem.loadFile(path, info, realerPath, ^(const char *format, ...) {
66 va_list list;
67 va_start(list, format);
68 diag.error(format, list);
69 va_end(list);
70 })) {
71 return closure::LoadedFileInfo();
72 }
73
74 // If we now have an error, but succeeded, then we must have tried multiple paths, one of which errored, but
75 // then succeeded on a later path. So clear the error.
76 if (diag.hasError())
77 diag.clearError();
78
79 // if fat, remap just slice needed
80 bool fatButMissingSlice;
81 const FatFile* fh = (FatFile*)info.fileContent;
82 uint64_t sliceOffset = info.sliceOffset;
83 uint64_t sliceLen = info.sliceLen;
84 if ( fh->isFatFileWithSlice(diag, info.fileContentLen, archs, sliceOffset, sliceLen, fatButMissingSlice) ) {
85 // unmap anything before slice
86 fileSystem.unloadPartialFile(info, sliceOffset, sliceLen);
87 // Update the info to keep track of the new slice offset.
88 info.sliceOffset = sliceOffset;
89 info.sliceLen = sliceLen;
90 }
91 else if ( diag.hasError() ) {
92 // We must have generated an error in the fat file parsing so use that error
93 fileSystem.unloadFile(info);
94 return closure::LoadedFileInfo();
95 }
96 else if ( fatButMissingSlice ) {
97 diag.error("missing compatible arch in %s", path);
98 fileSystem.unloadFile(info);
99 return closure::LoadedFileInfo();
100 }
101
102 const MachOAnalyzer* mh = (MachOAnalyzer*)info.fileContent;
103
104 // validate is mach-o of requested arch and platform
105 if ( !mh->validMachOForArchAndPlatform(diag, (size_t)info.sliceLen, path, archs, platform) ) {
106 fileSystem.unloadFile(info);
107 return closure::LoadedFileInfo();
108 }
109
110 // if has zero-fill expansion, re-map
111 mh = mh->remapIfZeroFill(diag, fileSystem, info);
112
113 // on error, remove mappings and return nullptr
114 if ( diag.hasError() ) {
115 fileSystem.unloadFile(info);
116 return closure::LoadedFileInfo();
117 }
118
119 // now that LINKEDIT is at expected offset, finish validation
120 mh->validLinkedit(diag, path);
121
122 // on error, remove mappings and return nullptr
123 if ( diag.hasError() ) {
124 fileSystem.unloadFile(info);
125 return closure::LoadedFileInfo();
126 }
127
128 return info;
129 }
130
131 #if DEBUG
132 // only used in debug builds of cache builder to verify segment moves are valid
133 void MachOAnalyzer::validateDyldCacheDylib(Diagnostics& diag, const char* path) const
134 {
135 validLinkedit(diag, path);
136 validSegments(diag, path, 0xffffffff);
137 }
138 #endif
139
140 uint64_t MachOAnalyzer::mappedSize() const
141 {
142 uint64_t vmSpace;
143 bool hasZeroFill;
144 analyzeSegmentsLayout(vmSpace, hasZeroFill);
145 return vmSpace;
146 }
147
148 bool MachOAnalyzer::validMachOForArchAndPlatform(Diagnostics& diag, size_t sliceLength, const char* path, const GradedArchs& archs, Platform platform) const
149 {
150 // must start with mach-o magic value
151 if ( (this->magic != MH_MAGIC) && (this->magic != MH_MAGIC_64) ) {
152 diag.error("could not use '%s' because it is not a mach-o file: 0x%08X 0x%08X", path, this->magic, this->cputype);
153 return false;
154 }
155
156 if ( !archs.grade(this->cputype, this->cpusubtype) ) {
157 diag.error("could not use '%s' because it is not a compatible arch", path);
158 return false;
159 }
160
161 // must be a filetype dyld can load
162 switch ( this->filetype ) {
163 case MH_EXECUTE:
164 case MH_DYLIB:
165 case MH_BUNDLE:
166 case MH_DYLINKER:
167 break;
168 default:
169 diag.error("could not use '%s' because it is not a dylib, bundle, or executable, filetype=0x%08X", path, this->filetype);
170 return false;
171 }
172
173 // validate load commands structure
174 if ( !this->validLoadCommands(diag, path, sliceLength) ) {
175 return false;
176 }
177
178 // filter out static executables
179 if ( (this->filetype == MH_EXECUTE) && !isDynamicExecutable() ) {
180 diag.error("could not use '%s' because it is a static executable", path);
181 return false;
182 }
183
184 // must match requested platform (do this after load commands are validated)
185 if ( !this->supportsPlatform(platform) ) {
186 diag.error("could not use '%s' because it was built for a different platform", path);
187 return false;
188 }
189
190 // validate dylib loads
191 if ( !validEmbeddedPaths(diag, platform, path) )
192 return false;
193
194 // validate segments
195 if ( !validSegments(diag, path, sliceLength) )
196 return false;
197
198 // validate entry
199 if ( this->filetype == MH_EXECUTE ) {
200 if ( !validMain(diag, path) )
201 return false;
202 }
203
204 // further validations done in validLinkedit()
205
206 return true;
207 }
208
209 bool MachOAnalyzer::validLinkedit(Diagnostics& diag, const char* path) const
210 {
211 // validate LINKEDIT layout
212 if ( !validLinkeditLayout(diag, path) )
213 return false;
214
215 if ( hasChainedFixups() ) {
216 if ( !validChainedFixupsInfo(diag, path) )
217 return false;
218 }
219 else {
220 // validate rebasing info
221 if ( !validRebaseInfo(diag, path) )
222 return false;
223
224 // validate binding info
225 if ( !validBindInfo(diag, path) )
226 return false;
227 }
228
229 return true;
230 }
231
232 bool MachOAnalyzer::validLoadCommands(Diagnostics& diag, const char* path, size_t fileLen) const
233 {
234 // check load command don't exceed file length
235 if ( this->sizeofcmds + machHeaderSize() > fileLen ) {
236 diag.error("in '%s' load commands exceed length of file", path);
237 return false;
238 }
239
240 // walk all load commands and sanity check them
241 Diagnostics walkDiag;
242 forEachLoadCommand(walkDiag, ^(const load_command* cmd, bool& stop) {});
243 if ( walkDiag.hasError() ) {
244 #if BUILDING_CACHE_BUILDER
245 diag.error("in '%s' %s", path, walkDiag.errorMessage().c_str());
246 #else
247 diag.error("in '%s' %s", path, walkDiag.errorMessage());
248 #endif
249 return false;
250 }
251
252 // check load commands fit in TEXT segment
253 __block bool foundTEXT = false;
254 forEachSegment(^(const SegmentInfo& info, bool& stop) {
255 if ( strcmp(info.segName, "__TEXT") == 0 ) {
256 foundTEXT = true;
257 if ( this->sizeofcmds + machHeaderSize() > info.fileSize ) {
258 diag.error("in '%s' load commands exceed length of __TEXT segment", path);
259 }
260 if ( info.fileOffset != 0 ) {
261 diag.error("in '%s' __TEXT segment not start of mach-o", path);
262 }
263 stop = true;
264 }
265 });
266 if ( !diag.noError() && !foundTEXT ) {
267 diag.error("in '%s' __TEXT segment not found", path);
268 return false;
269 }
270
271 return true;
272 }
273
274 const MachOAnalyzer* MachOAnalyzer::remapIfZeroFill(Diagnostics& diag, const closure::FileSystem& fileSystem, closure::LoadedFileInfo& info) const
275 {
276 uint64_t vmSpaceRequired;
277 bool hasZeroFill;
278 analyzeSegmentsLayout(vmSpaceRequired, hasZeroFill);
279
280 if ( hasZeroFill ) {
281 vm_address_t newMappedAddr;
282 if ( ::vm_allocate(mach_task_self(), &newMappedAddr, (size_t)vmSpaceRequired, VM_FLAGS_ANYWHERE) != 0 ) {
283 diag.error("vm_allocate failure");
284 return nullptr;
285 }
286 // re-map each segment read-only, with runtime layout
287 uint64_t textSegVmAddr = preferredLoadAddress();
288 forEachSegment(^(const SegmentInfo& segmentInfo, bool& stop) {
289 if ( segmentInfo.fileSize != 0 ) {
290 kern_return_t r = vm_copy(mach_task_self(), (vm_address_t)((long)info.fileContent+segmentInfo.fileOffset), (vm_size_t)segmentInfo.fileSize, (vm_address_t)(newMappedAddr+segmentInfo.vmAddr-textSegVmAddr));
291 if ( r != KERN_SUCCESS ) {
292 diag.error("vm_copy() failure");
293 stop = true;
294 }
295 }
296 });
297 if ( diag.noError() ) {
298 // remove original mapping and return new mapping
299 fileSystem.unloadFile(info);
300
301 // make the new mapping read-only
302 ::vm_protect(mach_task_self(), newMappedAddr, (vm_size_t)vmSpaceRequired, false, VM_PROT_READ);
303
304 // Set vm_deallocate as the unload method.
305 info.unload = [](const closure::LoadedFileInfo& info) {
306 ::vm_deallocate(mach_task_self(), (vm_address_t)info.fileContent, (size_t)info.fileContentLen);
307 };
308
309 // And update the file content to the new location
310 info.fileContent = (const void*)newMappedAddr;
311 info.fileContentLen = vmSpaceRequired;
312 return (const MachOAnalyzer*)info.fileContent;
313 }
314 else {
315 // new mapping failed, return old mapping with an error in diag
316 ::vm_deallocate(mach_task_self(), newMappedAddr, (size_t)vmSpaceRequired);
317 return nullptr;
318 }
319 }
320
321 return this;
322 }
323
324 void MachOAnalyzer::analyzeSegmentsLayout(uint64_t& vmSpace, bool& hasZeroFill) const
325 {
326 __block bool writeExpansion = false;
327 __block uint64_t lowestVmAddr = 0xFFFFFFFFFFFFFFFFULL;
328 __block uint64_t highestVmAddr = 0;
329 __block uint64_t sumVmSizes = 0;
330 forEachSegment(^(const SegmentInfo& segmentInfo, bool& stop) {
331 if ( strcmp(segmentInfo.segName, "__PAGEZERO") == 0 )
332 return;
333 if ( segmentInfo.writable() && (segmentInfo.fileSize != segmentInfo.vmSize) )
334 writeExpansion = true; // zerofill at end of __DATA
335 if ( segmentInfo.vmAddr < lowestVmAddr )
336 lowestVmAddr = segmentInfo.vmAddr;
337 if ( segmentInfo.vmAddr+segmentInfo.vmSize > highestVmAddr )
338 highestVmAddr = segmentInfo.vmAddr+segmentInfo.vmSize;
339 sumVmSizes += segmentInfo.vmSize;
340 });
341 uint64_t totalVmSpace = (highestVmAddr - lowestVmAddr);
342 // LINKEDIT vmSize is not required to be a multiple of page size. Round up if that is the case
343 const uint64_t pageSize = uses16KPages() ? 0x4000 : 0x1000;
344 totalVmSpace = (totalVmSpace + (pageSize - 1)) & ~(pageSize - 1);
345 bool hasHole = (totalVmSpace != sumVmSizes); // segments not contiguous
346
347 vmSpace = totalVmSpace;
348 hasZeroFill = writeExpansion || hasHole;
349 }
350
351 bool MachOAnalyzer::enforceFormat(Malformed kind) const
352 {
353 __block bool result = false;
354 forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) {
355 switch (platform) {
356 case Platform::macOS:
357 switch (kind) {
358 case Malformed::linkeditOrder:
359 case Malformed::linkeditAlignment:
360 case Malformed::dyldInfoAndlocalRelocs:
361 // enforce these checks on new binaries only
362 if (sdk >= 0x000A0E00) // macOS 10.14
363 result = true;
364 break;
365 case Malformed::segmentOrder:
366 case Malformed::linkeditPermissions:
367 case Malformed::textPermissions:
368 case Malformed::executableData:
369 case Malformed::codeSigAlignment:
370 // enforce these checks on new binaries only
371 if (sdk >= 0x000A0F00) // macOS 10.15
372 result = true;
373 break;
374 }
375 break;
376 case Platform::iOS:
377 switch (kind) {
378 case Malformed::linkeditOrder:
379 case Malformed::dyldInfoAndlocalRelocs:
380 case Malformed::textPermissions:
381 case Malformed::executableData:
382 result = true;
383 break;
384 case Malformed::linkeditAlignment:
385 case Malformed::segmentOrder:
386 case Malformed::linkeditPermissions:
387 case Malformed::codeSigAlignment:
388 // enforce these checks on new binaries only
389 if (sdk >= 0x000D0000) // iOS 13
390 result = true;
391 break;
392 }
393 break;
394 default:
395 result = true;
396 break;
397 }
398 });
399 // if binary is so old, there is no platform info, don't enforce malformed errors
400 return result;
401 }
402
403 bool MachOAnalyzer::validEmbeddedPaths(Diagnostics& diag, Platform platform, const char* path) const
404 {
405 __block int index = 1;
406 __block bool allGood = true;
407 __block bool foundInstallName = false;
408 __block int dependentsCount = 0;
409 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
410 const dylib_command* dylibCmd;
411 const rpath_command* rpathCmd;
412 switch ( cmd->cmd ) {
413 case LC_ID_DYLIB:
414 foundInstallName = true;
415 // fall through
416 [[clang::fallthrough]];
417 case LC_LOAD_DYLIB:
418 case LC_LOAD_WEAK_DYLIB:
419 case LC_REEXPORT_DYLIB:
420 case LC_LOAD_UPWARD_DYLIB:
421 dylibCmd = (dylib_command*)cmd;
422 if ( dylibCmd->dylib.name.offset > cmd->cmdsize ) {
423 diag.error("in '%s' load command #%d name offset (%u) outside its size (%u)", path, index, dylibCmd->dylib.name.offset, cmd->cmdsize);
424 stop = true;
425 allGood = false;
426 }
427 else {
428 bool foundEnd = false;
429 const char* start = (char*)dylibCmd + dylibCmd->dylib.name.offset;
430 const char* end = (char*)dylibCmd + cmd->cmdsize;
431 for (const char* s=start; s < end; ++s) {
432 if ( *s == '\0' ) {
433 foundEnd = true;
434 break;
435 }
436 }
437 if ( !foundEnd ) {
438 diag.error("in '%s' load command #%d string extends beyond end of load command", path, index);
439 stop = true;
440 allGood = false;
441 }
442 }
443 if ( cmd->cmd != LC_ID_DYLIB )
444 ++dependentsCount;
445 break;
446 case LC_RPATH:
447 rpathCmd = (rpath_command*)cmd;
448 if ( rpathCmd->path.offset > cmd->cmdsize ) {
449 diag.error("in '%s' load command #%d path offset (%u) outside its size (%u)", path, index, rpathCmd->path.offset, cmd->cmdsize);
450 stop = true;
451 allGood = false;
452 }
453 else {
454 bool foundEnd = false;
455 const char* start = (char*)rpathCmd + rpathCmd->path.offset;
456 const char* end = (char*)rpathCmd + cmd->cmdsize;
457 for (const char* s=start; s < end; ++s) {
458 if ( *s == '\0' ) {
459 foundEnd = true;
460 break;
461 }
462 }
463 if ( !foundEnd ) {
464 diag.error("in '%s' load command #%d string extends beyond end of load command", path, index);
465 stop = true;
466 allGood = false;
467 }
468 }
469 break;
470 }
471 ++index;
472 });
473 if ( !allGood )
474 return false;
475
476 if ( this->filetype == MH_DYLIB ) {
477 if ( !foundInstallName ) {
478 diag.error("in '%s' MH_DYLIB is missing LC_ID_DYLIB", path);
479 return false;
480 }
481 }
482 else {
483 if ( foundInstallName ) {
484 diag.error("in '%s' LC_ID_DYLIB found in non-MH_DYLIB", path);
485 return false;
486 }
487 }
488
489 if ( (dependentsCount == 0) && (this->filetype == MH_EXECUTE) ) {
490 diag.error("in '%s' missing LC_LOAD_DYLIB (must link with at least libSystem.dylib)", path);
491 return false;
492 }
493
494 return true;
495 }
496
497 bool MachOAnalyzer::validSegments(Diagnostics& diag, const char* path, size_t fileLen) const
498 {
499 // check segment load command size
500 __block bool badSegmentLoadCommand = false;
501 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
502 if ( cmd->cmd == LC_SEGMENT_64 ) {
503 const segment_command_64* seg = (segment_command_64*)cmd;
504 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command_64);
505 if ( sectionsSpace < 0 ) {
506 diag.error("in '%s' load command size too small for LC_SEGMENT_64", path);
507 badSegmentLoadCommand = true;
508 stop = true;
509 }
510 else if ( (sectionsSpace % sizeof(section_64)) != 0 ) {
511 diag.error("in '%s' segment load command size 0x%X will not fit whole number of sections", path, cmd->cmdsize);
512 badSegmentLoadCommand = true;
513 stop = true;
514 }
515 else if ( sectionsSpace != (seg->nsects * sizeof(section_64)) ) {
516 diag.error("in '%s' load command size 0x%X does not match nsects %d", path, cmd->cmdsize, seg->nsects);
517 badSegmentLoadCommand = true;
518 stop = true;
519 }
520 else if ( greaterThanAddOrOverflow(seg->fileoff, seg->filesize, fileLen) ) {
521 diag.error("in '%s' segment load command content extends beyond end of file", path);
522 badSegmentLoadCommand = true;
523 stop = true;
524 }
525 else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
526 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
527 diag.error("in '%s' segment filesize exceeds vmsize", path);
528 badSegmentLoadCommand = true;
529 stop = true;
530 }
531 }
532 else if ( cmd->cmd == LC_SEGMENT ) {
533 const segment_command* seg = (segment_command*)cmd;
534 int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command);
535 if ( sectionsSpace < 0 ) {
536 diag.error("in '%s' load command size too small for LC_SEGMENT", path);
537 badSegmentLoadCommand = true;
538 stop = true;
539 }
540 else if ( (sectionsSpace % sizeof(section)) != 0 ) {
541 diag.error("in '%s' segment load command size 0x%X will not fit whole number of sections", path, cmd->cmdsize);
542 badSegmentLoadCommand = true;
543 stop = true;
544 }
545 else if ( sectionsSpace != (seg->nsects * sizeof(section)) ) {
546 diag.error("in '%s' load command size 0x%X does not match nsects %d", path, cmd->cmdsize, seg->nsects);
547 badSegmentLoadCommand = true;
548 stop = true;
549 }
550 else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) {
551 // <rdar://problem/19986776> dyld should support non-allocatable __LLVM segment
552 diag.error("in '%s' segment filesize exceeds vmsize", path);
553 badSegmentLoadCommand = true;
554 stop = true;
555 }
556 }
557 });
558 if ( badSegmentLoadCommand )
559 return false;
560
561 // check mapping permissions of segments
562 __block bool badPermissions = false;
563 __block bool badSize = false;
564 __block bool hasTEXT = false;
565 __block bool hasLINKEDIT = false;
566 forEachSegment(^(const SegmentInfo& info, bool& stop) {
567 if ( strcmp(info.segName, "__TEXT") == 0 ) {
568 if ( (info.protections != (VM_PROT_READ|VM_PROT_EXECUTE)) && enforceFormat(Malformed::textPermissions) ) {
569 diag.error("in '%s' __TEXT segment permissions is not 'r-x'", path);
570 badPermissions = true;
571 stop = true;
572 }
573 hasTEXT = true;
574 }
575 else if ( strcmp(info.segName, "__LINKEDIT") == 0 ) {
576 if ( (info.protections != VM_PROT_READ) && enforceFormat(Malformed::linkeditPermissions) ) {
577 diag.error("in '%s' __LINKEDIT segment permissions is not 'r--'", path);
578 badPermissions = true;
579 stop = true;
580 }
581 hasLINKEDIT = true;
582 }
583 else if ( (info.protections & 0xFFFFFFF8) != 0 ) {
584 diag.error("in '%s' %s segment permissions has invalid bits set", path, info.segName);
585 badPermissions = true;
586 stop = true;
587 }
588 if ( greaterThanAddOrOverflow(info.fileOffset, info.fileSize, fileLen) ) {
589 diag.error("in '%s' %s segment content extends beyond end of file", path, info.segName);
590 badSize = true;
591 stop = true;
592 }
593 if ( is64() ) {
594 if ( info.vmAddr+info.vmSize < info.vmAddr ) {
595 diag.error("in '%s' %s segment vm range wraps", path, info.segName);
596 badSize = true;
597 stop = true;
598 }
599 }
600 else {
601 if ( (uint32_t)(info.vmAddr+info.vmSize) < (uint32_t)(info.vmAddr) ) {
602 diag.error("in '%s' %s segment vm range wraps", path, info.segName);
603 badSize = true;
604 stop = true;
605 }
606 }
607 });
608 if ( badPermissions || badSize )
609 return false;
610 if ( !hasTEXT ) {
611 diag.error("in '%s' missing __TEXT segment", path);
612 return false;
613 }
614 if ( !hasLINKEDIT ) {
615 diag.error("in '%s' missing __LINKEDIT segment", path);
616 return false;
617 }
618
619 // check for overlapping segments
620 __block bool badSegments = false;
621 forEachSegment(^(const SegmentInfo& info1, bool& stop1) {
622 uint64_t seg1vmEnd = info1.vmAddr + info1.vmSize;
623 uint64_t seg1FileEnd = info1.fileOffset + info1.fileSize;
624 forEachSegment(^(const SegmentInfo& info2, bool& stop2) {
625 if ( info1.segIndex == info2.segIndex )
626 return;
627 uint64_t seg2vmEnd = info2.vmAddr + info2.vmSize;
628 uint64_t seg2FileEnd = info2.fileOffset + info2.fileSize;
629 if ( ((info2.vmAddr <= info1.vmAddr) && (seg2vmEnd > info1.vmAddr) && (seg1vmEnd > info1.vmAddr )) || ((info2.vmAddr >= info1.vmAddr ) && (info2.vmAddr < seg1vmEnd) && (seg2vmEnd > info2.vmAddr)) ) {
630 diag.error("in '%s' segment %s vm range overlaps segment %s", path, info1.segName, info2.segName);
631 badSegments = true;
632 stop1 = true;
633 stop2 = true;
634 }
635 if ( ((info2.fileOffset <= info1.fileOffset) && (seg2FileEnd > info1.fileOffset) && (seg1FileEnd > info1.fileOffset)) || ((info2.fileOffset >= info1.fileOffset) && (info2.fileOffset < seg1FileEnd) && (seg2FileEnd > info2.fileOffset )) ) {
636 diag.error("in '%s' segment %s file content overlaps segment %s", path, info1.segName, info2.segName);
637 badSegments = true;
638 stop1 = true;
639 stop2 = true;
640 }
641 if ( (info1.segIndex < info2.segIndex) && !stop1 ) {
642 if ( (info1.vmAddr > info2.vmAddr) || ((info1.fileOffset > info2.fileOffset ) && (info1.fileOffset != 0) && (info2.fileOffset != 0)) ){
643 if ( !inDyldCache() && enforceFormat(Malformed::segmentOrder) ) {
644 // dyld cache __DATA_* segments are moved around
645 diag.error("in '%s' segment load commands out of order with respect to layout for %s and %s", path, info1.segName, info2.segName);
646 badSegments = true;
647 stop1 = true;
648 stop2 = true;
649 }
650 }
651 }
652 });
653 });
654 if ( badSegments )
655 return false;
656
657 // check sections are within segment
658 __block bool badSections = false;
659 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
660 if ( cmd->cmd == LC_SEGMENT_64 ) {
661 const segment_command_64* seg = (segment_command_64*)cmd;
662 const section_64* const sectionsStart = (section_64*)((char*)seg + sizeof(struct segment_command_64));
663 const section_64* const sectionsEnd = &sectionsStart[seg->nsects];
664 for (const section_64* sect=sectionsStart; (sect < sectionsEnd); ++sect) {
665 if ( (int64_t)(sect->size) < 0 ) {
666 diag.error("in '%s' section %s size too large 0x%llX", path, sect->sectname, sect->size);
667 badSections = true;
668 }
669 else if ( sect->addr < seg->vmaddr ) {
670 diag.error("in '%s' section %s start address 0x%llX is before containing segment's address 0x%0llX", path, sect->sectname, sect->addr, seg->vmaddr);
671 badSections = true;
672 }
673 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
674 diag.error("in '%s' section %s end address 0x%llX is beyond containing segment's end address 0x%0llX", path, sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
675 badSections = true;
676 }
677 }
678 }
679 else if ( cmd->cmd == LC_SEGMENT ) {
680 const segment_command* seg = (segment_command*)cmd;
681 const section* const sectionsStart = (section*)((char*)seg + sizeof(struct segment_command));
682 const section* const sectionsEnd = &sectionsStart[seg->nsects];
683 for (const section* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) {
684 if ( (int64_t)(sect->size) < 0 ) {
685 diag.error("in '%s' section %s size too large 0x%X", path, sect->sectname, sect->size);
686 badSections = true;
687 }
688 else if ( sect->addr < seg->vmaddr ) {
689 diag.error("in '%s' section %s start address 0x%X is before containing segment's address 0x%0X", path, sect->sectname, sect->addr, seg->vmaddr);
690 badSections = true;
691 }
692 else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) {
693 diag.error("in '%s' section %s end address 0x%X is beyond containing segment's end address 0x%0X", path, sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize);
694 badSections = true;
695 }
696 }
697 }
698 });
699
700 return !badSections;
701 }
702
703
704 bool MachOAnalyzer::validMain(Diagnostics& diag, const char* path) const
705 {
706 __block uint64_t textSegStartAddr = 0;
707 __block uint64_t textSegStartSize = 0;
708 forEachSegment(^(const SegmentInfo& info, bool& stop) {
709 if ( strcmp(info.segName, "__TEXT") == 0 ) {
710 textSegStartAddr = info.vmAddr;
711 textSegStartSize = info.vmSize;
712 stop = true;
713 }
714 });
715
716 __block int mainCount = 0;
717 __block int threadCount = 0;
718 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
719 entry_point_command* mainCmd;
720 uint64_t startAddress;
721 switch (cmd->cmd) {
722 case LC_MAIN:
723 ++mainCount;
724 mainCmd = (entry_point_command*)cmd;
725 if ( mainCmd->entryoff >= textSegStartSize ) {
726 startAddress = preferredLoadAddress() + mainCmd->entryoff;
727 __block bool foundSegment = false;
728 forEachSegment(^(const SegmentInfo& info, bool& stopSegment) {
729 // Skip segments which don't contain this address
730 if ( (startAddress < info.vmAddr) || (startAddress >= info.vmAddr+info.vmSize) )
731 return;
732 foundSegment = true;
733 if ( (info.protections & VM_PROT_EXECUTE) == 0 )
734 diag.error("LC_MAIN points to non-executable segment");
735 stopSegment = true;
736 });
737 if (!foundSegment)
738 diag.error("LC_MAIN entryoff is out of range");
739 stop = true;
740 }
741 break;
742 case LC_UNIXTHREAD:
743 ++threadCount;
744 startAddress = entryAddrFromThreadCmd((thread_command*)cmd);
745 if ( startAddress == 0 ) {
746 diag.error("LC_UNIXTHREAD not valid for arch %s", archName());
747 stop = true;
748 }
749 else if ( (startAddress < textSegStartAddr) || (startAddress >= textSegStartAddr+textSegStartSize) ) {
750 diag.error("LC_UNIXTHREAD entry not in __TEXT segment");
751 stop = true;
752 }
753 break;
754 }
755 });
756 if ( diag.hasError() )
757 return false;
758 if ( diag.noError() && (mainCount+threadCount == 1) )
759 return true;
760
761 if ( mainCount + threadCount == 0 )
762 diag.error("missing LC_MAIN or LC_UNIXTHREAD");
763 else
764 diag.error("only one LC_MAIN or LC_UNIXTHREAD is allowed");
765 return false;
766 }
767
768
769 namespace {
770 struct LinkEditContentChunk
771 {
772 const char* name;
773 uint32_t alignment;
774 uint32_t fileOffsetStart;
775 uint32_t size;
776
777 static int compareByFileOffset(const void* l, const void* r) {
778 if ( ((LinkEditContentChunk*)l)->fileOffsetStart < ((LinkEditContentChunk*)r)->fileOffsetStart )
779 return -1;
780 else
781 return 1;
782 }
783 };
784 } // anonymous namespace
785
786
787
788 bool MachOAnalyzer::validLinkeditLayout(Diagnostics& diag, const char* path) const
789 {
790 LinkEditInfo leInfo;
791 getLinkEditPointers(diag, leInfo);
792 if ( diag.hasError() )
793 return false;
794 const uint32_t ptrSize = pointerSize();
795
796 // build vector of all blobs in LINKEDIT
797 LinkEditContentChunk blobs[32];
798 LinkEditContentChunk* bp = blobs;
799 if ( leInfo.dyldInfo != nullptr ) {
800 if ( leInfo.dyldInfo->rebase_size != 0 )
801 *bp++ = {"rebase opcodes", ptrSize, leInfo.dyldInfo->rebase_off, leInfo.dyldInfo->rebase_size};
802 if ( leInfo.dyldInfo->bind_size != 0 )
803 *bp++ = {"bind opcodes", ptrSize, leInfo.dyldInfo->bind_off, leInfo.dyldInfo->bind_size};
804 if ( leInfo.dyldInfo->weak_bind_size != 0 )
805 *bp++ = {"weak bind opcodes", ptrSize, leInfo.dyldInfo->weak_bind_off, leInfo.dyldInfo->weak_bind_size};
806 if ( leInfo.dyldInfo->lazy_bind_size != 0 )
807 *bp++ = {"lazy bind opcodes", ptrSize, leInfo.dyldInfo->lazy_bind_off, leInfo.dyldInfo->lazy_bind_size};
808 if ( leInfo.dyldInfo->export_size!= 0 )
809 *bp++ = {"exports trie", ptrSize, leInfo.dyldInfo->export_off, leInfo.dyldInfo->export_size};
810 }
811 if ( leInfo.exportsTrie != nullptr ) {
812 if ( leInfo.exportsTrie->datasize != 0 )
813 *bp++ = {"exports trie", ptrSize, leInfo.exportsTrie->dataoff, leInfo.exportsTrie->datasize};
814 }
815
816 if ( leInfo.dynSymTab != nullptr ) {
817 if ( leInfo.dynSymTab->nlocrel != 0 )
818 *bp++ = {"local relocations", ptrSize, leInfo.dynSymTab->locreloff, static_cast<uint32_t>(leInfo.dynSymTab->nlocrel*sizeof(relocation_info))};
819 if ( leInfo.dynSymTab->nextrel != 0 )
820 *bp++ = {"external relocations", ptrSize, leInfo.dynSymTab->extreloff, static_cast<uint32_t>(leInfo.dynSymTab->nextrel*sizeof(relocation_info))};
821 if ( leInfo.dynSymTab->nindirectsyms != 0 )
822 *bp++ = {"indirect symbol table", 4, leInfo.dynSymTab->indirectsymoff, leInfo.dynSymTab->nindirectsyms*4};
823 }
824 if ( leInfo.splitSegInfo != nullptr ) {
825 if ( leInfo.splitSegInfo->datasize != 0 )
826 *bp++ = {"shared cache info", ptrSize, leInfo.splitSegInfo->dataoff, leInfo.splitSegInfo->datasize};
827 }
828 if ( leInfo.functionStarts != nullptr ) {
829 if ( leInfo.functionStarts->datasize != 0 )
830 *bp++ = {"function starts", ptrSize, leInfo.functionStarts->dataoff, leInfo.functionStarts->datasize};
831 }
832 if ( leInfo.dataInCode != nullptr ) {
833 if ( leInfo.dataInCode->datasize != 0 )
834 *bp++ = {"data in code", ptrSize, leInfo.dataInCode->dataoff, leInfo.dataInCode->datasize};
835 }
836 if ( leInfo.symTab != nullptr ) {
837 if ( leInfo.symTab->nsyms != 0 )
838 *bp++ = {"symbol table", ptrSize, leInfo.symTab->symoff, static_cast<uint32_t>(leInfo.symTab->nsyms*(ptrSize == 8 ? sizeof(nlist_64) : sizeof(struct nlist)))};
839 if ( leInfo.symTab->strsize != 0 )
840 *bp++ = {"symbol table strings", 1, leInfo.symTab->stroff, leInfo.symTab->strsize};
841 }
842 if ( leInfo.codeSig != nullptr ) {
843 if ( leInfo.codeSig->datasize != 0 )
844 *bp++ = {"code signature", ptrSize, leInfo.codeSig->dataoff, leInfo.codeSig->datasize};
845 }
846
847 // check for bad combinations
848 if ( (leInfo.dyldInfo != nullptr) && (leInfo.dyldInfo->cmd == LC_DYLD_INFO_ONLY) && (leInfo.dynSymTab != nullptr) ) {
849 if ( (leInfo.dynSymTab->nlocrel != 0) && enforceFormat(Malformed::dyldInfoAndlocalRelocs) ) {
850 diag.error("in '%s' malformed mach-o contains LC_DYLD_INFO_ONLY and local relocations", path);
851 return false;
852 }
853 if ( leInfo.dynSymTab->nextrel != 0 ) {
854 diag.error("in '%s' malformed mach-o contains LC_DYLD_INFO_ONLY and external relocations", path);
855 return false;
856 }
857 }
858 if ( (leInfo.dyldInfo == nullptr) && (leInfo.dynSymTab == nullptr) ) {
859 diag.error("in '%s' malformed mach-o misssing LC_DYLD_INFO and LC_DYSYMTAB", path);
860 return false;
861 }
862 const unsigned long blobCount = bp - blobs;
863 if ( blobCount == 0 ) {
864 diag.error("in '%s' malformed mach-o misssing LINKEDIT", path);
865 return false;
866 }
867
868 uint32_t linkeditFileEnd = leInfo.layout.linkeditFileOffset + leInfo.layout.linkeditFileSize;
869
870
871 // sort blobs by file-offset and error on overlaps
872 ::qsort(blobs, blobCount, sizeof(LinkEditContentChunk), &LinkEditContentChunk::compareByFileOffset);
873 uint32_t prevEnd = leInfo.layout.linkeditFileOffset;
874 const char* prevName = "start of LINKEDIT";
875 for (unsigned long i=0; i < blobCount; ++i) {
876 const LinkEditContentChunk& blob = blobs[i];
877 if ( blob.fileOffsetStart < prevEnd ) {
878 diag.error("in '%s' LINKEDIT overlap of %s and %s", path, prevName, blob.name);
879 return false;
880 }
881 if (greaterThanAddOrOverflow(blob.fileOffsetStart, blob.size, linkeditFileEnd)) {
882 diag.error("in '%s' LINKEDIT content '%s' extends beyond end of segment", path, blob.name);
883 return false;
884 }
885 if ( (blob.fileOffsetStart & (blob.alignment-1)) != 0 ) {
886 // <rdar://problem/51115705> relax code sig alignment for pre iOS13
887 Malformed kind = (strcmp(blob.name, "code signature") == 0) ? Malformed::codeSigAlignment : Malformed::linkeditAlignment;
888 if ( enforceFormat(kind) )
889 diag.error("in '%s' mis-aligned LINKEDIT content '%s'", path, blob.name);
890 }
891 prevEnd = blob.fileOffsetStart + blob.size;
892 prevName = blob.name;
893 }
894
895 // Check for invalid symbol table sizes
896 if ( leInfo.symTab != nullptr ) {
897 if ( leInfo.symTab->nsyms > 0x10000000 ) {
898 diag.error("in '%s' malformed mach-o image: symbol table too large", path);
899 return false;
900 }
901 if ( leInfo.dynSymTab != nullptr ) {
902 // validate indirect symbol table
903 if ( leInfo.dynSymTab->nindirectsyms != 0 ) {
904 if ( leInfo.dynSymTab->nindirectsyms > 0x10000000 ) {
905 diag.error("in '%s' malformed mach-o image: indirect symbol table too large", path);
906 return false;
907 }
908 }
909 if ( (leInfo.dynSymTab->nlocalsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->ilocalsym > leInfo.symTab->nsyms) ) {
910 diag.error("in '%s' malformed mach-o image: indirect symbol table local symbol count exceeds total symbols", path);
911 return false;
912 }
913 if ( leInfo.dynSymTab->ilocalsym + leInfo.dynSymTab->nlocalsym < leInfo.dynSymTab->ilocalsym ) {
914 diag.error("in '%s' malformed mach-o image: indirect symbol table local symbol count wraps", path);
915 return false;
916 }
917 if ( (leInfo.dynSymTab->nextdefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iextdefsym > leInfo.symTab->nsyms) ) {
918 diag.error("in '%s' malformed mach-o image: indirect symbol table extern symbol count exceeds total symbols", path);
919 return false;
920 }
921 if ( leInfo.dynSymTab->iextdefsym + leInfo.dynSymTab->nextdefsym < leInfo.dynSymTab->iextdefsym ) {
922 diag.error("in '%s' malformed mach-o image: indirect symbol table extern symbol count wraps", path);
923 return false;
924 }
925 if ( (leInfo.dynSymTab->nundefsym > leInfo.symTab->nsyms) || (leInfo.dynSymTab->iundefsym > leInfo.symTab->nsyms) ) {
926 diag.error("in '%s' malformed mach-o image: indirect symbol table undefined symbol count exceeds total symbols", path);
927 return false;
928 }
929 if ( leInfo.dynSymTab->iundefsym + leInfo.dynSymTab->nundefsym < leInfo.dynSymTab->iundefsym ) {
930 diag.error("in '%s' malformed mach-o image: indirect symbol table undefined symbol count wraps", path);
931 return false;
932 }
933 }
934 }
935
936 return true;
937 }
938
939
940
941 bool MachOAnalyzer::invalidRebaseState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
942 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type) const
943 {
944 if ( !segIndexSet ) {
945 diag.error("in '%s' %s missing preceding REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path, opcodeName);
946 return true;
947 }
948 if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
949 diag.error("in '%s' %s segment index %d too large", path, opcodeName, segmentIndex);
950 return true;
951 }
952 if ( segmentOffset > (segments[segmentIndex].vmSize-ptrSize) ) {
953 diag.error("in '%s' %s current segment offset 0x%08llX beyond segment size (0x%08llX)", path, opcodeName, segmentOffset, segments[segmentIndex].vmSize);
954 return true;
955 }
956 switch ( type ) {
957 case REBASE_TYPE_POINTER:
958 if ( !segments[segmentIndex].writable() ) {
959 diag.error("in '%s' %s pointer rebase is in non-writable segment", path, opcodeName);
960 return true;
961 }
962 if ( segments[segmentIndex].executable() && enforceFormat(Malformed::executableData) ) {
963 diag.error("in '%s' %s pointer rebase is in executable segment", path, opcodeName);
964 return true;
965 }
966 break;
967 case REBASE_TYPE_TEXT_ABSOLUTE32:
968 case REBASE_TYPE_TEXT_PCREL32:
969 if ( !segments[segmentIndex].textRelocs ) {
970 diag.error("in '%s' %s text rebase is in segment that does not support text relocations", path, opcodeName);
971 return true;
972 }
973 if ( segments[segmentIndex].writable() ) {
974 diag.error("in '%s' %s text rebase is in writable segment", path, opcodeName);
975 return true;
976 }
977 if ( !segments[segmentIndex].executable() ) {
978 diag.error("in '%s' %s pointer rebase is in non-executable segment", path, opcodeName);
979 return true;
980 }
981 break;
982 default:
983 diag.error("in '%s' %s unknown rebase type %d", path, opcodeName, type);
984 return true;
985 }
986 return false;
987 }
988
989
990 void MachOAnalyzer::getAllSegmentsInfos(Diagnostics& diag, SegmentInfo segments[]) const
991 {
992 forEachSegment(^(const SegmentInfo& info, bool& stop) {
993 segments[info.segIndex] = info;
994 });
995 }
996
997
998 bool MachOAnalyzer::validRebaseInfo(Diagnostics& diag, const char* path) const
999 {
1000 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1001 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1002 if ( invalidRebaseState(diag, opcodeName, path, leInfo, segments, segIndexSet, ptrSize, segmentIndex, segmentOffset, type) )
1003 stop = true;
1004 });
1005 return diag.noError();
1006 }
1007
1008
1009 void MachOAnalyzer::forEachTextRebase(Diagnostics& diag, void (^handler)(uint64_t runtimeOffset, bool& stop)) const
1010 {
1011 __block bool startVmAddrSet = false;
1012 __block uint64_t startVmAddr = 0;
1013 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1014 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1015 if ( type != REBASE_TYPE_TEXT_ABSOLUTE32 )
1016 return;
1017 if ( !startVmAddrSet ) {
1018 for (int i=0; i <= segmentIndex; ++i) {
1019 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1020 startVmAddr = segments[i].vmAddr;
1021 startVmAddrSet = true;
1022 break;
1023 }
1024 }
1025 }
1026 uint64_t rebaseVmAddr = segments[segmentIndex].vmAddr + segmentOffset;
1027 uint64_t runtimeOffset = rebaseVmAddr - startVmAddr;
1028 handler(runtimeOffset, stop);
1029 });
1030 }
1031
1032
1033 void MachOAnalyzer::forEachRebase(Diagnostics& diag, bool ignoreLazyPointers, void (^handler)(uint64_t runtimeOffset, bool& stop)) const
1034 {
1035 __block bool startVmAddrSet = false;
1036 __block uint64_t startVmAddr = 0;
1037 __block uint64_t lpVmAddr = 0;
1038 __block uint64_t lpEndVmAddr = 0;
1039 __block uint64_t shVmAddr = 0;
1040 __block uint64_t shEndVmAddr = 0;
1041 if ( ignoreLazyPointers ) {
1042 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
1043 if ( (info.sectFlags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS ) {
1044 lpVmAddr = info.sectAddr;
1045 lpEndVmAddr = info.sectAddr + info.sectSize;
1046 }
1047 else if ( (info.sectFlags & S_ATTR_PURE_INSTRUCTIONS) && (strcmp(info.sectName, "__stub_helper") == 0) ) {
1048 shVmAddr = info.sectAddr;
1049 shEndVmAddr = info.sectAddr + info.sectSize;
1050 }
1051 });
1052 }
1053 forEachRebase(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1054 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop) {
1055 if ( type != REBASE_TYPE_POINTER )
1056 return;
1057 if ( !startVmAddrSet ) {
1058 for (int i=0; i < segmentIndex; ++i) {
1059 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1060 startVmAddr = segments[i].vmAddr;
1061 startVmAddrSet = true;
1062 break;
1063 }
1064 }
1065 }
1066 uint64_t rebaseVmAddr = segments[segmentIndex].vmAddr + segmentOffset;
1067 bool skipRebase = false;
1068 if ( (rebaseVmAddr >= lpVmAddr) && (rebaseVmAddr < lpEndVmAddr) ) {
1069 // rebase is in lazy pointer section
1070 uint64_t lpValue = 0;
1071 if ( ptrSize == 8 )
1072 lpValue = *((uint64_t*)(rebaseVmAddr-startVmAddr+(uint8_t*)this));
1073 else
1074 lpValue = *((uint32_t*)(rebaseVmAddr-startVmAddr+(uint8_t*)this));
1075 if ( (lpValue >= shVmAddr) && (lpValue < shEndVmAddr) ) {
1076 // content is into stub_helper section
1077 uint64_t lpTargetImageOffset = lpValue - startVmAddr;
1078 const uint8_t* helperContent = (uint8_t*)this + lpTargetImageOffset;
1079 bool isLazyStub = contentIsRegularStub(helperContent);
1080 // ignore rebases for normal lazy pointers, but leave rebase for resolver helper stub
1081 if ( isLazyStub )
1082 skipRebase = true;
1083 }
1084 else {
1085 // if lazy pointer does not point into stub_helper, then it points to weak-def symbol and we need rebase
1086 }
1087 }
1088 if ( !skipRebase ) {
1089 uint64_t runtimeOffset = rebaseVmAddr - startVmAddr;
1090 handler(runtimeOffset, stop);
1091 }
1092 });
1093 }
1094
1095
1096 bool MachOAnalyzer::contentIsRegularStub(const uint8_t* helperContent) const
1097 {
1098 switch (this->cputype) {
1099 case CPU_TYPE_X86_64:
1100 return ( (helperContent[0] == 0x68) && (helperContent[5] == 0xE9) ); // push $xxx / JMP pcRel
1101 case CPU_TYPE_I386:
1102 return ( (helperContent[0] == 0x68) && (helperContent[5] == 0xFF) && (helperContent[2] == 0x26) ); // push $xxx / JMP *pcRel
1103 case CPU_TYPE_ARM:
1104 return ( (helperContent[0] == 0x00) && (helperContent[1] == 0xC0) && (helperContent[2] == 0x9F) && (helperContent[3] == 0xE5) ); // ldr ip, [pc, #0]
1105 case CPU_TYPE_ARM64:
1106 return ( (helperContent[0] == 0x50) && (helperContent[1] == 0x00) && (helperContent[2] == 0x00) && (helperContent[3] == 0x18) ); // ldr w16, L0
1107
1108 }
1109 return false;
1110 }
1111
1112 static int uint32Sorter(const void* l, const void* r) {
1113 if ( *((uint32_t*)l) < *((uint32_t*)r) )
1114 return -1;
1115 else
1116 return 1;
1117 }
1118
1119
1120 void MachOAnalyzer::forEachRebase(Diagnostics& diag,
1121 void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1122 bool segIndexSet, uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1123 uint8_t type, bool& stop)) const
1124 {
1125 LinkEditInfo leInfo;
1126 getLinkEditPointers(diag, leInfo);
1127 if ( diag.hasError() )
1128 return;
1129
1130 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1131 getAllSegmentsInfos(diag, segmentsInfo);
1132 if ( diag.hasError() )
1133 return;
1134
1135 if ( leInfo.dyldInfo != nullptr ) {
1136 const uint8_t* const start = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->rebase_off);
1137 const uint8_t* const end = start + leInfo.dyldInfo->rebase_size;
1138 const uint8_t* p = start;
1139 const uint32_t ptrSize = pointerSize();
1140 uint8_t type = 0;
1141 int segIndex = 0;
1142 uint64_t segOffset = 0;
1143 uint64_t count;
1144 uint64_t skip;
1145 bool segIndexSet = false;
1146 bool stop = false;
1147 while ( !stop && diag.noError() && (p < end) ) {
1148 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
1149 uint8_t opcode = *p & REBASE_OPCODE_MASK;
1150 ++p;
1151 switch (opcode) {
1152 case REBASE_OPCODE_DONE:
1153 if ( (end - p) > 8 )
1154 diag.error("rebase opcodes terminated early at offset %d of %d", (int)(p-start), (int)(end-start));
1155 stop = true;
1156 break;
1157 case REBASE_OPCODE_SET_TYPE_IMM:
1158 type = immediate;
1159 break;
1160 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1161 segIndex = immediate;
1162 segOffset = read_uleb128(diag, p, end);
1163 segIndexSet = true;
1164 break;
1165 case REBASE_OPCODE_ADD_ADDR_ULEB:
1166 segOffset += read_uleb128(diag, p, end);
1167 break;
1168 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
1169 segOffset += immediate*ptrSize;
1170 break;
1171 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
1172 for (int i=0; i < immediate; ++i) {
1173 handler("REBASE_OPCODE_DO_REBASE_IMM_TIMES", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1174 segOffset += ptrSize;
1175 if ( stop )
1176 break;
1177 }
1178 break;
1179 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
1180 count = read_uleb128(diag, p, end);
1181 for (uint32_t i=0; i < count; ++i) {
1182 handler("REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1183 segOffset += ptrSize;
1184 if ( stop )
1185 break;
1186 }
1187 break;
1188 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
1189 handler("REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1190 segOffset += read_uleb128(diag, p, end) + ptrSize;
1191 break;
1192 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
1193 count = read_uleb128(diag, p, end);
1194 if ( diag.hasError() )
1195 break;
1196 skip = read_uleb128(diag, p, end);
1197 for (uint32_t i=0; i < count; ++i) {
1198 handler("REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, ptrSize, segIndex, segOffset, type, stop);
1199 segOffset += skip + ptrSize;
1200 if ( stop )
1201 break;
1202 }
1203 break;
1204 default:
1205 diag.error("unknown rebase opcode 0x%02X", opcode);
1206 }
1207 }
1208 }
1209 else {
1210 // old binary, walk relocations
1211 const uint64_t relocsStartAddress = relocBaseAddress(segmentsInfo, leInfo.layout.linkeditSegIndex);
1212 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->locreloff);
1213 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nlocrel];
1214 bool stop = false;
1215 const uint8_t relocSize = (is64() ? 3 : 2);
1216 const uint8_t ptrSize = pointerSize();
1217 STACK_ALLOC_OVERFLOW_SAFE_ARRAY(uint32_t, relocAddrs, 2048);
1218 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
1219 if ( reloc->r_length != relocSize ) {
1220 diag.error("local relocation has wrong r_length");
1221 break;
1222 }
1223 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
1224 diag.error("local relocation has wrong r_type");
1225 break;
1226 }
1227 relocAddrs.push_back(reloc->r_address);
1228 }
1229 if ( !relocAddrs.empty() ) {
1230 ::qsort(&relocAddrs[0], relocAddrs.count(), sizeof(uint32_t), &uint32Sorter);
1231 for (uint32_t addrOff : relocAddrs) {
1232 uint32_t segIndex = 0;
1233 uint64_t segOffset = 0;
1234 if ( segIndexAndOffsetForAddress(relocsStartAddress+addrOff, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1235 uint8_t type = REBASE_TYPE_POINTER;
1236 if ( this->cputype == CPU_TYPE_I386 ) {
1237 if ( segmentsInfo[segIndex].executable() )
1238 type = REBASE_TYPE_TEXT_ABSOLUTE32;
1239 }
1240 handler("local relocation", leInfo, segmentsInfo, true, ptrSize, segIndex, segOffset, type , stop);
1241 }
1242 else {
1243 diag.error("local relocation has out of range r_address");
1244 break;
1245 }
1246 }
1247 }
1248 // then process indirect symbols
1249 forEachIndirectPointer(diag, ^(uint64_t address, bool bind, int bindLibOrdinal,
1250 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
1251 if ( bind )
1252 return;
1253 uint32_t segIndex = 0;
1254 uint64_t segOffset = 0;
1255 if ( segIndexAndOffsetForAddress(address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1256 handler("local relocation", leInfo, segmentsInfo, true, ptrSize, segIndex, segOffset, REBASE_TYPE_POINTER, indStop);
1257 }
1258 else {
1259 diag.error("local relocation has out of range r_address");
1260 indStop = true;
1261 }
1262 });
1263 }
1264 }
1265
1266 bool MachOAnalyzer::segIndexAndOffsetForAddress(uint64_t addr, const SegmentInfo segmentsInfos[], uint32_t segCount, uint32_t& segIndex, uint64_t& segOffset) const
1267 {
1268 for (uint32_t i=0; i < segCount; ++i) {
1269 if ( (segmentsInfos[i].vmAddr <= addr) && (addr < segmentsInfos[i].vmAddr+segmentsInfos[i].vmSize) ) {
1270 segIndex = i;
1271 segOffset = addr - segmentsInfos[i].vmAddr;
1272 return true;
1273 }
1274 }
1275 return false;
1276 }
1277
1278 uint64_t MachOAnalyzer::relocBaseAddress(const SegmentInfo segmentsInfos[], uint32_t segCount) const
1279 {
1280 if ( is64() ) {
1281 // x86_64 reloc base address is first writable segment
1282 for (uint32_t i=0; i < segCount; ++i) {
1283 if ( segmentsInfos[i].writable() )
1284 return segmentsInfos[i].vmAddr;
1285 }
1286 }
1287 return segmentsInfos[0].vmAddr;
1288 }
1289
1290
1291
1292 void MachOAnalyzer::forEachIndirectPointer(Diagnostics& diag, void (^handler)(uint64_t pointerAddress, bool bind, int bindLibOrdinal, const char* bindSymbolName,
1293 bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& stop)) const
1294 {
1295 LinkEditInfo leInfo;
1296 getLinkEditPointers(diag, leInfo);
1297 if ( diag.hasError() )
1298 return;
1299
1300 // find lazy and non-lazy pointer sections
1301 const bool is64Bit = is64();
1302 const uint32_t* const indirectSymbolTable = (uint32_t*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->indirectsymoff);
1303 const uint32_t indirectSymbolTableCount = leInfo.dynSymTab->nindirectsyms;
1304 const uint32_t ptrSize = pointerSize();
1305 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
1306 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
1307 const struct nlist* symbols32 = (struct nlist*)symbolTable;
1308 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1309 uint32_t symCount = leInfo.symTab->nsyms;
1310 uint32_t poolSize = leInfo.symTab->strsize;
1311 __block bool stop = false;
1312 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectInfo, bool malformedSectionRange, bool& sectionStop) {
1313 uint8_t sectionType = (sectInfo.sectFlags & SECTION_TYPE);
1314 bool selfModifyingStub = (sectionType == S_SYMBOL_STUBS) && (sectInfo.sectFlags & S_ATTR_SELF_MODIFYING_CODE) && (sectInfo.reserved2 == 5) && (this->cputype == CPU_TYPE_I386);
1315 if ( (sectionType != S_LAZY_SYMBOL_POINTERS) && (sectionType != S_NON_LAZY_SYMBOL_POINTERS) && !selfModifyingStub )
1316 return;
1317 if ( (flags & S_ATTR_SELF_MODIFYING_CODE) && !selfModifyingStub ) {
1318 diag.error("S_ATTR_SELF_MODIFYING_CODE section type only valid in old i386 binaries");
1319 sectionStop = true;
1320 return;
1321 }
1322 uint32_t elementSize = selfModifyingStub ? sectInfo.reserved2 : ptrSize;
1323 uint32_t elementCount = (uint32_t)(sectInfo.sectSize/elementSize);
1324 if ( greaterThanAddOrOverflow(sectInfo.reserved1, elementCount, indirectSymbolTableCount) ) {
1325 diag.error("section %s overflows indirect symbol table", sectInfo.sectName);
1326 sectionStop = true;
1327 return;
1328 }
1329
1330 for (uint32_t i=0; (i < elementCount) && !stop; ++i) {
1331 uint32_t symNum = indirectSymbolTable[sectInfo.reserved1 + i];
1332 if ( symNum == INDIRECT_SYMBOL_ABS )
1333 continue;
1334 if ( symNum == INDIRECT_SYMBOL_LOCAL ) {
1335 handler(sectInfo.sectAddr+i*elementSize, false, 0, "", false, false, false, stop);
1336 continue;
1337 }
1338 if ( symNum > symCount ) {
1339 diag.error("indirect symbol[%d] = %d which is invalid symbol index", sectInfo.reserved1 + i, symNum);
1340 sectionStop = true;
1341 return;
1342 }
1343 uint16_t n_desc = is64Bit ? symbols64[symNum].n_desc : symbols32[symNum].n_desc;
1344 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
1345 uint32_t strOffset = is64Bit ? symbols64[symNum].n_un.n_strx : symbols32[symNum].n_un.n_strx;
1346 if ( strOffset > poolSize ) {
1347 diag.error("symbol[%d] string offset out of range", sectInfo.reserved1 + i);
1348 sectionStop = true;
1349 return;
1350 }
1351 const char* symbolName = stringPool + strOffset;
1352 bool weakImport = (n_desc & N_WEAK_REF);
1353 bool lazy = (sectionType == S_LAZY_SYMBOL_POINTERS);
1354 handler(sectInfo.sectAddr+i*elementSize, true, libOrdinal, symbolName, weakImport, lazy, selfModifyingStub, stop);
1355 }
1356 sectionStop = stop;
1357 });
1358 }
1359
1360 int MachOAnalyzer::libOrdinalFromDesc(uint16_t n_desc) const
1361 {
1362 // -flat_namespace is always flat lookup
1363 if ( (this->flags & MH_TWOLEVEL) == 0 )
1364 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
1365
1366 // extract byte from undefined symbol entry
1367 int libIndex = GET_LIBRARY_ORDINAL(n_desc);
1368 switch ( libIndex ) {
1369 case SELF_LIBRARY_ORDINAL:
1370 return BIND_SPECIAL_DYLIB_SELF;
1371
1372 case DYNAMIC_LOOKUP_ORDINAL:
1373 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
1374
1375 case EXECUTABLE_ORDINAL:
1376 return BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
1377 }
1378
1379 return libIndex;
1380 }
1381
1382 bool MachOAnalyzer::validBindInfo(Diagnostics& diag, const char* path) const
1383 {
1384 forEachBind(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1385 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1386 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1387 uint8_t type, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
1388 if ( invalidBindState(diag, opcodeName, path, leInfo, segments, segIndexSet, libraryOrdinalSet, dylibCount,
1389 libOrdinal, ptrSize, segmentIndex, segmentOffset, type, symbolName) ) {
1390 stop = true;
1391 }
1392 }, ^(const char* symbolName) {
1393 }, ^() { });
1394 return diag.noError();
1395 }
1396
1397 bool MachOAnalyzer::invalidBindState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1398 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint32_t ptrSize,
1399 uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, const char* symbolName) const
1400 {
1401 if ( !segIndexSet ) {
1402 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path, opcodeName);
1403 return true;
1404 }
1405 if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
1406 diag.error("in '%s' %s segment index %d too large", path, opcodeName, segmentIndex);
1407 return true;
1408 }
1409 if ( segmentOffset > (segments[segmentIndex].vmSize-ptrSize) ) {
1410 diag.error("in '%s' %s current segment offset 0x%08llX beyond segment size (0x%08llX)", path, opcodeName, segmentOffset, segments[segmentIndex].vmSize);
1411 return true;
1412 }
1413 if ( symbolName == NULL ) {
1414 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", path, opcodeName);
1415 return true;
1416 }
1417 if ( !libraryOrdinalSet ) {
1418 diag.error("in '%s' %s missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL", path, opcodeName);
1419 return true;
1420 }
1421 if ( libOrdinal > (int)dylibCount ) {
1422 diag.error("in '%s' %s has library ordinal too large (%d) max (%d)", path, opcodeName, libOrdinal, dylibCount);
1423 return true;
1424 }
1425 if ( libOrdinal < BIND_SPECIAL_DYLIB_WEAK_LOOKUP ) {
1426 diag.error("in '%s' %s has unknown library special ordinal (%d)", path, opcodeName, libOrdinal);
1427 return true;
1428 }
1429 switch ( type ) {
1430 case BIND_TYPE_POINTER:
1431 if ( !segments[segmentIndex].writable() ) {
1432 diag.error("in '%s' %s pointer bind is in non-writable segment", path, opcodeName);
1433 return true;
1434 }
1435 if ( segments[segmentIndex].executable() && enforceFormat(Malformed::executableData) ) {
1436 diag.error("in '%s' %s pointer bind is in executable segment", path, opcodeName);
1437 return true;
1438 }
1439 break;
1440 case BIND_TYPE_TEXT_ABSOLUTE32:
1441 case BIND_TYPE_TEXT_PCREL32:
1442 if ( !segments[segmentIndex].textRelocs ) {
1443 diag.error("in '%s' %s text bind is in segment that does not support text relocations", path, opcodeName);
1444 return true;
1445 }
1446 if ( segments[segmentIndex].writable() ) {
1447 diag.error("in '%s' %s text bind is in writable segment", path, opcodeName);
1448 return true;
1449 }
1450 if ( !segments[segmentIndex].executable() ) {
1451 diag.error("in '%s' %s pointer bind is in non-executable segment", path, opcodeName);
1452 return true;
1453 }
1454 break;
1455 default:
1456 diag.error("in '%s' %s unknown bind type %d", path, opcodeName, type);
1457 return true;
1458 }
1459 return false;
1460 }
1461
1462 void MachOAnalyzer::forEachBind(Diagnostics& diag, void (^handler)(uint64_t runtimeOffset, int libOrdinal, const char* symbolName,
1463 bool weakImport, bool lazyBind, uint64_t addend, bool& stop),
1464 void (^strongHandler)(const char* symbolName),
1465 void (^missingLazyBindHandler)()) const
1466 {
1467 __block bool startVmAddrSet = false;
1468 __block uint64_t startVmAddr = 0;
1469 forEachBind(diag, ^(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1470 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1471 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset,
1472 uint8_t type, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
1473 if ( !startVmAddrSet ) {
1474 for (int i=0; i <= segmentIndex; ++i) {
1475 if ( strcmp(segments[i].segName, "__TEXT") == 0 ) {
1476 startVmAddr = segments[i].vmAddr;
1477 startVmAddrSet = true;
1478 break;
1479 }
1480 }
1481 }
1482 uint64_t bindVmOffset = segments[segmentIndex].vmAddr + segmentOffset;
1483 uint64_t runtimeOffset = bindVmOffset - startVmAddr;
1484 handler(runtimeOffset, libOrdinal, symbolName, weakImport, lazyBind, addend, stop);
1485 }, ^(const char* symbolName) {
1486 strongHandler(symbolName);
1487 }, ^() {
1488 missingLazyBindHandler();
1489 });
1490 }
1491
1492 void MachOAnalyzer::forEachBind(Diagnostics& diag,
1493 void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
1494 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
1495 uint32_t ptrSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type,
1496 const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop),
1497 void (^strongHandler)(const char* symbolName),
1498 void (^missingLazyBindHandler)()) const
1499 {
1500 const uint32_t ptrSize = this->pointerSize();
1501 bool stop = false;
1502
1503 LinkEditInfo leInfo;
1504 getLinkEditPointers(diag, leInfo);
1505 if ( diag.hasError() )
1506 return;
1507
1508 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1509 getAllSegmentsInfos(diag, segmentsInfo);
1510 if ( diag.hasError() )
1511 return;
1512
1513
1514
1515 const uint32_t dylibCount = dependentDylibCount();
1516
1517 if ( leInfo.dyldInfo != nullptr ) {
1518 // process bind opcodes
1519 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
1520 const uint8_t* end = p + leInfo.dyldInfo->bind_size;
1521 uint8_t type = 0;
1522 uint64_t segmentOffset = 0;
1523 uint8_t segmentIndex = 0;
1524 const char* symbolName = NULL;
1525 int libraryOrdinal = 0;
1526 bool segIndexSet = false;
1527 bool libraryOrdinalSet = false;
1528
1529 int64_t addend = 0;
1530 uint64_t count;
1531 uint64_t skip;
1532 bool weakImport = false;
1533 while ( !stop && diag.noError() && (p < end) ) {
1534 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1535 uint8_t opcode = *p & BIND_OPCODE_MASK;
1536 ++p;
1537 switch (opcode) {
1538 case BIND_OPCODE_DONE:
1539 stop = true;
1540 break;
1541 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1542 libraryOrdinal = immediate;
1543 libraryOrdinalSet = true;
1544 break;
1545 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1546 libraryOrdinal = (int)read_uleb128(diag, p, end);
1547 libraryOrdinalSet = true;
1548 break;
1549 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1550 // the special ordinals are negative numbers
1551 if ( immediate == 0 )
1552 libraryOrdinal = 0;
1553 else {
1554 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1555 libraryOrdinal = signExtended;
1556 }
1557 libraryOrdinalSet = true;
1558 break;
1559 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1560 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1561 symbolName = (char*)p;
1562 while (*p != '\0')
1563 ++p;
1564 ++p;
1565 break;
1566 case BIND_OPCODE_SET_TYPE_IMM:
1567 type = immediate;
1568 break;
1569 case BIND_OPCODE_SET_ADDEND_SLEB:
1570 addend = read_sleb128(diag, p, end);
1571 break;
1572 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1573 segmentIndex = immediate;
1574 segmentOffset = read_uleb128(diag, p, end);
1575 segIndexSet = true;
1576 break;
1577 case BIND_OPCODE_ADD_ADDR_ULEB:
1578 segmentOffset += read_uleb128(diag, p, end);
1579 break;
1580 case BIND_OPCODE_DO_BIND:
1581 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1582 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1583 segmentOffset += ptrSize;
1584 break;
1585 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1586 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1587 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1588 segmentOffset += read_uleb128(diag, p, end) + ptrSize;
1589 break;
1590 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1591 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1592 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1593 segmentOffset += immediate*ptrSize + ptrSize;
1594 break;
1595 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1596 count = read_uleb128(diag, p, end);
1597 skip = read_uleb128(diag, p, end);
1598 for (uint32_t i=0; i < count; ++i) {
1599 handler("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1600 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1601 segmentOffset += skip + ptrSize;
1602 if ( stop )
1603 break;
1604 }
1605 break;
1606 default:
1607 diag.error("bad bind opcode 0x%02X", *p);
1608 }
1609 }
1610 if ( diag.hasError() )
1611 return;
1612
1613 // process lazy bind opcodes
1614 uint32_t lazyDoneCount = 0;
1615 uint32_t lazyBindCount = 0;
1616 if ( leInfo.dyldInfo->lazy_bind_size != 0 ) {
1617 p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->lazy_bind_off);
1618 end = p + leInfo.dyldInfo->lazy_bind_size;
1619 type = BIND_TYPE_POINTER;
1620 segmentOffset = 0;
1621 segmentIndex = 0;
1622 symbolName = NULL;
1623 libraryOrdinal = 0;
1624 segIndexSet = false;
1625 libraryOrdinalSet= false;
1626 addend = 0;
1627 weakImport = false;
1628 stop = false;
1629 while ( !stop && diag.noError() && (p < end) ) {
1630 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1631 uint8_t opcode = *p & BIND_OPCODE_MASK;
1632 ++p;
1633 switch (opcode) {
1634 case BIND_OPCODE_DONE:
1635 // this opcode marks the end of each lazy pointer binding
1636 ++lazyDoneCount;
1637 break;
1638 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1639 libraryOrdinal = immediate;
1640 libraryOrdinalSet = true;
1641 break;
1642 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1643 libraryOrdinal = (int)read_uleb128(diag, p, end);
1644 libraryOrdinalSet = true;
1645 break;
1646 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1647 // the special ordinals are negative numbers
1648 if ( immediate == 0 )
1649 libraryOrdinal = 0;
1650 else {
1651 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1652 libraryOrdinal = signExtended;
1653 }
1654 libraryOrdinalSet = true;
1655 break;
1656 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1657 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1658 symbolName = (char*)p;
1659 while (*p != '\0')
1660 ++p;
1661 ++p;
1662 break;
1663 case BIND_OPCODE_SET_ADDEND_SLEB:
1664 addend = read_sleb128(diag, p, end);
1665 break;
1666 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1667 segmentIndex = immediate;
1668 segmentOffset = read_uleb128(diag, p, end);
1669 segIndexSet = true;
1670 break;
1671 case BIND_OPCODE_DO_BIND:
1672 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1673 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, true, addend, stop);
1674 segmentOffset += ptrSize;
1675 ++lazyBindCount;
1676 break;
1677 case BIND_OPCODE_SET_TYPE_IMM:
1678 case BIND_OPCODE_ADD_ADDR_ULEB:
1679 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1680 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1681 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1682 default:
1683 diag.error("bad lazy bind opcode 0x%02X", opcode);
1684 break;
1685 }
1686 }
1687 if ( lazyDoneCount > lazyBindCount+7 )
1688 missingLazyBindHandler();
1689 // diag.error("lazy bind opcodes missing binds");
1690 }
1691 if ( diag.hasError() )
1692 return;
1693
1694 // process weak bind info
1695 if ( leInfo.dyldInfo->weak_bind_size != 0 ) {
1696 p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->weak_bind_off);
1697 end = p + leInfo.dyldInfo->weak_bind_size;
1698 type = BIND_TYPE_POINTER;
1699 segmentOffset = 0;
1700 segmentIndex = 0;
1701 symbolName = NULL;
1702 libraryOrdinal = BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
1703 segIndexSet = false;
1704 libraryOrdinalSet= true;
1705 addend = 0;
1706 weakImport = false;
1707 stop = false;
1708 while ( !stop && diag.noError() && (p < end) ) {
1709 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1710 uint8_t opcode = *p & BIND_OPCODE_MASK;
1711 ++p;
1712 switch (opcode) {
1713 case BIND_OPCODE_DONE:
1714 stop = true;
1715 break;
1716 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1717 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1718 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1719 diag.error("unexpected dylib ordinal in weak_bind");
1720 break;
1721 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1722 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1723 symbolName = (char*)p;
1724 while (*p != '\0')
1725 ++p;
1726 ++p;
1727 if ( immediate & BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION ) {
1728 strongHandler(symbolName);
1729 }
1730 break;
1731 case BIND_OPCODE_SET_TYPE_IMM:
1732 type = immediate;
1733 break;
1734 case BIND_OPCODE_SET_ADDEND_SLEB:
1735 addend = read_sleb128(diag, p, end);
1736 break;
1737 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1738 segmentIndex = immediate;
1739 segmentOffset = read_uleb128(diag, p, end);
1740 segIndexSet = true;
1741 break;
1742 case BIND_OPCODE_ADD_ADDR_ULEB:
1743 segmentOffset += read_uleb128(diag, p, end);
1744 break;
1745 case BIND_OPCODE_DO_BIND:
1746 handler("BIND_OPCODE_DO_BIND", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1747 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1748 segmentOffset += ptrSize;
1749 break;
1750 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1751 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1752 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1753 segmentOffset += read_uleb128(diag, p, end) + ptrSize;
1754 break;
1755 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1756 handler("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1757 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1758 segmentOffset += immediate*ptrSize + ptrSize;
1759 break;
1760 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1761 count = read_uleb128(diag, p, end);
1762 skip = read_uleb128(diag, p, end);
1763 for (uint32_t i=0; i < count; ++i) {
1764 handler("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", leInfo, segmentsInfo, segIndexSet, libraryOrdinalSet, dylibCount, libraryOrdinal,
1765 ptrSize, segmentIndex, segmentOffset, type, symbolName, weakImport, false, addend, stop);
1766 segmentOffset += skip + ptrSize;
1767 if ( stop )
1768 break;
1769 }
1770 break;
1771 default:
1772 diag.error("bad bind opcode 0x%02X", *p);
1773 }
1774 }
1775 }
1776 }
1777 else {
1778 // old binary, process external relocations
1779 const uint64_t relocsStartAddress = relocBaseAddress(segmentsInfo, leInfo.layout.linkeditSegIndex);
1780 const relocation_info* const relocsStart = (relocation_info*)getLinkEditContent(leInfo.layout, leInfo.dynSymTab->extreloff);
1781 const relocation_info* const relocsEnd = &relocsStart[leInfo.dynSymTab->nextrel];
1782 bool is64Bit = is64() ;
1783 const uint8_t relocSize = (is64Bit ? 3 : 2);
1784 const void* symbolTable = getLinkEditContent(leInfo.layout, leInfo.symTab->symoff);
1785 const struct nlist_64* symbols64 = (nlist_64*)symbolTable;
1786 const struct nlist* symbols32 = (struct nlist*)symbolTable;
1787 const char* stringPool = (char*)getLinkEditContent(leInfo.layout, leInfo.symTab->stroff);
1788 uint32_t symCount = leInfo.symTab->nsyms;
1789 uint32_t poolSize = leInfo.symTab->strsize;
1790 for (const relocation_info* reloc=relocsStart; (reloc < relocsEnd) && !stop; ++reloc) {
1791 if ( reloc->r_length != relocSize ) {
1792 diag.error("external relocation has wrong r_length");
1793 break;
1794 }
1795 if ( reloc->r_type != 0 ) { // 0 == X86_64_RELOC_UNSIGNED == GENERIC_RELOC_VANILLA == ARM64_RELOC_UNSIGNED
1796 diag.error("external relocation has wrong r_type");
1797 break;
1798 }
1799 uint32_t segIndex = 0;
1800 uint64_t segOffset = 0;
1801 if ( segIndexAndOffsetForAddress(relocsStartAddress+reloc->r_address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1802 uint32_t symbolIndex = reloc->r_symbolnum;
1803 if ( symbolIndex > symCount ) {
1804 diag.error("external relocation has out of range r_symbolnum");
1805 break;
1806 }
1807 else {
1808 uint32_t strOffset = is64Bit ? symbols64[symbolIndex].n_un.n_strx : symbols32[symbolIndex].n_un.n_strx;
1809 uint16_t n_desc = is64Bit ? symbols64[symbolIndex].n_desc : symbols32[symbolIndex].n_desc;
1810 uint32_t libOrdinal = libOrdinalFromDesc(n_desc);
1811 if ( strOffset >= poolSize ) {
1812 diag.error("external relocation has r_symbolnum=%d which has out of range n_strx", symbolIndex);
1813 break;
1814 }
1815 else {
1816 const char* symbolName = stringPool + strOffset;
1817 bool weakImport = (n_desc & N_WEAK_REF);
1818 const uint8_t* content = (uint8_t*)this + segmentsInfo[segIndex].vmAddr - leInfo.layout.textUnslidVMAddr + segOffset;
1819 uint64_t addend = is64Bit ? *((uint64_t*)content) : *((uint32_t*)content);
1820 handler("external relocation", leInfo, segmentsInfo, true, true, dylibCount, libOrdinal,
1821 ptrSize, segIndex, segOffset, BIND_TYPE_POINTER, symbolName, weakImport, false, addend, stop);
1822 }
1823 }
1824 }
1825 else {
1826 diag.error("local relocation has out of range r_address");
1827 break;
1828 }
1829 }
1830 // then process indirect symbols
1831 forEachIndirectPointer(diag, ^(uint64_t address, bool bind, int bindLibOrdinal,
1832 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& indStop) {
1833 if ( !bind )
1834 return;
1835 uint32_t segIndex = 0;
1836 uint64_t segOffset = 0;
1837 if ( segIndexAndOffsetForAddress(address, segmentsInfo, leInfo.layout.linkeditSegIndex, segIndex, segOffset) ) {
1838 handler("indirect symbol", leInfo, segmentsInfo, true, true, dylibCount, bindLibOrdinal,
1839 ptrSize, segIndex, segOffset, BIND_TYPE_POINTER, bindSymbolName, bindWeakImport, bindLazy, 0, indStop);
1840 }
1841 else {
1842 diag.error("indirect symbol has out of range address");
1843 indStop = true;
1844 }
1845 });
1846 }
1847
1848 }
1849
1850
1851 bool MachOAnalyzer::validChainedFixupsInfo(Diagnostics& diag, const char* path) const
1852 {
1853 __block uint32_t maxTargetCount = 0;
1854 __block uint32_t currentTargetCount = 0;
1855 parseOrgArm64eChainedFixups(diag,
1856 ^(uint32_t totalTargets, bool& stop) {
1857 maxTargetCount = totalTargets;
1858 },
1859 ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop) {
1860 if ( symbolName == NULL ) {
1861 diag.error("in '%s' missing BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", path);
1862 }
1863 else if ( !libraryOrdinalSet ) {
1864 diag.error("in '%s' missing BIND_OPCODE_SET_DYLIB_ORDINAL", path);
1865 }
1866 else if ( libOrdinal > (int)dylibCount ) {
1867 diag.error("in '%s' has library ordinal too large (%d) max (%d)", path, libOrdinal, dylibCount);
1868 }
1869 else if ( libOrdinal < BIND_SPECIAL_DYLIB_WEAK_LOOKUP ) {
1870 diag.error("in '%s' has unknown library special ordinal (%d)", path, libOrdinal);
1871 }
1872 else if ( type != BIND_TYPE_POINTER ) {
1873 diag.error("in '%s' unknown bind type %d", path, type);
1874 }
1875 else if ( currentTargetCount > maxTargetCount ) {
1876 diag.error("in '%s' chained target counts exceeds BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB", path);
1877 }
1878 ++currentTargetCount;
1879 if ( diag.hasError() )
1880 stop = true;
1881 },
1882 ^(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, uint16_t format, bool& stop) {
1883 if ( !segIndexSet ) {
1884 diag.error("in '%s' missing BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", path);
1885 }
1886 else if ( segmentIndex >= leInfo.layout.linkeditSegIndex ) {
1887 diag.error("in '%s' segment index %d too large", path, segmentIndex);
1888 }
1889 else if ( segmentOffset > (segments[segmentIndex].vmSize-8) ) {
1890 diag.error("in '%s' current segment offset 0x%08llX beyond segment size (0x%08llX)", path, segmentOffset, segments[segmentIndex].vmSize);
1891 }
1892 else if ( !segments[segmentIndex].writable() ) {
1893 diag.error("in '%s' pointer bind is in non-writable segment", path);
1894 }
1895 else if ( segments[segmentIndex].executable() ) {
1896 diag.error("in '%s' pointer bind is in executable segment", path);
1897 }
1898 if ( diag.hasError() )
1899 stop = true;
1900 }
1901 );
1902
1903 return diag.noError();
1904 }
1905
1906
1907
1908 void MachOAnalyzer::parseOrgArm64eChainedFixups(Diagnostics& diag, void (^targetCount)(uint32_t totalTargets, bool& stop),
1909 void (^addTarget)(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop),
1910 void (^addChainStart)(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, uint16_t format, bool& stop)) const
1911 {
1912 bool stop = false;
1913
1914 LinkEditInfo leInfo;
1915 getLinkEditPointers(diag, leInfo);
1916 if ( diag.hasError() )
1917 return;
1918
1919 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
1920 getAllSegmentsInfos(diag, segmentsInfo);
1921 if ( diag.hasError() )
1922 return;
1923
1924 const uint32_t dylibCount = dependentDylibCount();
1925
1926 if ( leInfo.dyldInfo != nullptr ) {
1927 // process bind opcodes
1928 const uint8_t* p = getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
1929 const uint8_t* end = p + leInfo.dyldInfo->bind_size;
1930 uint8_t type = 0;
1931 uint64_t segmentOffset = 0;
1932 uint8_t segmentIndex = 0;
1933 const char* symbolName = NULL;
1934 int libraryOrdinal = 0;
1935 bool segIndexSet = false;
1936 bool libraryOrdinalSet = false;
1937 uint64_t targetTableCount;
1938 uint64_t addend = 0;
1939 bool weakImport = false;
1940 while ( !stop && diag.noError() && (p < end) ) {
1941 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1942 uint8_t opcode = *p & BIND_OPCODE_MASK;
1943 ++p;
1944 switch (opcode) {
1945 case BIND_OPCODE_DONE:
1946 stop = true;
1947 break;
1948 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1949 libraryOrdinal = immediate;
1950 libraryOrdinalSet = true;
1951 break;
1952 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1953 libraryOrdinal = (int)read_uleb128(diag, p, end);
1954 libraryOrdinalSet = true;
1955 break;
1956 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1957 // the special ordinals are negative numbers
1958 if ( immediate == 0 )
1959 libraryOrdinal = 0;
1960 else {
1961 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1962 libraryOrdinal = signExtended;
1963 }
1964 libraryOrdinalSet = true;
1965 break;
1966 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1967 weakImport = ( (immediate & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0 );
1968 symbolName = (char*)p;
1969 while (*p != '\0')
1970 ++p;
1971 ++p;
1972 break;
1973 case BIND_OPCODE_SET_TYPE_IMM:
1974 type = immediate;
1975 break;
1976 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1977 segmentIndex = immediate;
1978 segmentOffset = read_uleb128(diag, p, end);
1979 segIndexSet = true;
1980 break;
1981 case BIND_OPCODE_SET_ADDEND_SLEB:
1982 addend = read_sleb128(diag, p, end);
1983 break;
1984 case BIND_OPCODE_DO_BIND:
1985 if ( addTarget )
1986 addTarget(leInfo, segmentsInfo, libraryOrdinalSet, dylibCount, libraryOrdinal, type, symbolName, addend, weakImport, stop);
1987 break;
1988 case BIND_OPCODE_THREADED:
1989 switch (immediate) {
1990 case BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB:
1991 targetTableCount = read_uleb128(diag, p, end);
1992 if ( targetTableCount > 65535 ) {
1993 diag.error("BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB size too large");
1994 stop = true;
1995 }
1996 else {
1997 if ( targetCount )
1998 targetCount((uint32_t)targetTableCount, stop);
1999 }
2000 break;
2001 case BIND_SUBOPCODE_THREADED_APPLY:
2002 if ( addChainStart )
2003 addChainStart(leInfo, segmentsInfo, segmentIndex, segIndexSet, segmentOffset, DYLD_CHAINED_PTR_ARM64E, stop);
2004 break;
2005 default:
2006 diag.error("bad BIND_OPCODE_THREADED sub-opcode 0x%02X", immediate);
2007 }
2008 break;
2009 default:
2010 diag.error("bad bind opcode 0x%02X", immediate);
2011 }
2012 }
2013 if ( diag.hasError() )
2014 return;
2015 }
2016 }
2017
2018 void MachOAnalyzer::forEachChainedFixupTarget(Diagnostics& diag, void (^callback)(int libOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop)) const
2019 {
2020 LinkEditInfo leInfo;
2021 getLinkEditPointers(diag, leInfo);
2022 if ( diag.hasError() )
2023 return;
2024
2025 BLOCK_ACCCESSIBLE_ARRAY(SegmentInfo, segmentsInfo, leInfo.layout.linkeditSegIndex+1);
2026 getAllSegmentsInfos(diag, segmentsInfo);
2027 if ( diag.hasError() )
2028 return;
2029
2030 bool stop = false;
2031 if ( leInfo.dyldInfo != nullptr ) {
2032 parseOrgArm64eChainedFixups(diag, nullptr, ^(const LinkEditInfo& leInfo2, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount,
2033 int libOrdinal, uint8_t type, const char* symbolName, uint64_t fixAddend, bool weakImport, bool& stopChain) {
2034 callback(libOrdinal, symbolName, fixAddend, weakImport, stopChain);
2035 }, nullptr);
2036 }
2037 else if ( leInfo.chainedFixups != nullptr ) {
2038 const dyld_chained_fixups_header* header = (dyld_chained_fixups_header*)getLinkEditContent(leInfo.layout, leInfo.chainedFixups->dataoff);
2039 if ( (header->imports_offset > leInfo.chainedFixups->datasize) || (header->symbols_offset > leInfo.chainedFixups->datasize) ) {
2040 diag.error("malformed import table");
2041 return;
2042 }
2043 const dyld_chained_import* imports;
2044 const dyld_chained_import_addend* importsA32;
2045 const dyld_chained_import_addend64* importsA64;
2046 const char* symbolsPool = (char*)header + header->symbols_offset;
2047 uint32_t maxSymbolOffset = leInfo.chainedFixups->datasize - header->symbols_offset;
2048 int libOrdinal;
2049 switch (header->imports_format) {
2050 case DYLD_CHAINED_IMPORT:
2051 imports = (dyld_chained_import*)((uint8_t*)header + header->imports_offset);
2052 for (uint32_t i=0; i < header->imports_count; ++i) {
2053 const char* symbolName = &symbolsPool[imports[i].name_offset];
2054 if ( imports[i].name_offset > maxSymbolOffset ) {
2055 diag.error("malformed import table, string overflow");
2056 return;
2057 }
2058 uint8_t libVal = imports[i].lib_ordinal;
2059 if ( libVal > 0xF0 )
2060 libOrdinal = (int8_t)libVal;
2061 else
2062 libOrdinal = libVal;
2063 callback(libOrdinal, symbolName, 0, imports[i].weak_import, stop);
2064 }
2065 break;
2066 case DYLD_CHAINED_IMPORT_ADDEND:
2067 importsA32 = (dyld_chained_import_addend*)((uint8_t*)header + header->imports_offset);
2068 for (uint32_t i=0; i < header->imports_count; ++i) {
2069 const char* symbolName = &symbolsPool[importsA32[i].name_offset];
2070 if ( importsA32[i].name_offset > maxSymbolOffset ) {
2071 diag.error("malformed import table, string overflow");
2072 return;
2073 }
2074 uint8_t libVal = importsA32[i].lib_ordinal;
2075 if ( libVal > 0xF0 )
2076 libOrdinal = (int8_t)libVal;
2077 else
2078 libOrdinal = libVal;
2079 callback(libOrdinal, symbolName, importsA32[i].addend, importsA32[i].weak_import, stop);
2080 }
2081 break;
2082 case DYLD_CHAINED_IMPORT_ADDEND64:
2083 importsA64 = (dyld_chained_import_addend64*)((uint8_t*)header + header->imports_offset);
2084 for (uint32_t i=0; i < header->imports_count; ++i) {
2085 const char* symbolName = &symbolsPool[importsA64[i].name_offset];
2086 if ( importsA64[i].name_offset > maxSymbolOffset ) {
2087 diag.error("malformed import table, string overflow");
2088 return;
2089 }
2090 uint16_t libVal = importsA64[i].lib_ordinal;
2091 if ( libVal > 0xFFF0 )
2092 libOrdinal = (int16_t)libVal;
2093 else
2094 libOrdinal = libVal;
2095 callback(libOrdinal, symbolName, importsA64[i].addend, importsA64[i].weak_import, stop);
2096 }
2097 break;
2098 default:
2099 diag.error("unknown imports format");
2100 return;
2101 }
2102 }
2103 }
2104
2105 uint32_t MachOAnalyzer::segmentCount() const
2106 {
2107 __block uint32_t count = 0;
2108 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2109 ++count;
2110 });
2111 return count;
2112 }
2113
2114 bool MachOAnalyzer::hasCodeSignature(uint32_t& fileOffset, uint32_t& size) const
2115 {
2116 fileOffset = 0;
2117 size = 0;
2118
2119 Diagnostics diag;
2120 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2121 if ( cmd->cmd == LC_CODE_SIGNATURE ) {
2122 const linkedit_data_command* sigCmd = (linkedit_data_command*)cmd;
2123 fileOffset = sigCmd->dataoff;
2124 size = sigCmd->datasize;
2125 stop = true;
2126 }
2127 });
2128 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2129
2130 // early exist if no LC_CODE_SIGNATURE
2131 if ( fileOffset == 0 )
2132 return false;
2133
2134 // <rdar://problem/13622786> ignore code signatures in macOS binaries built with pre-10.9 tools
2135 if ( (this->cputype == CPU_TYPE_X86_64) || (this->cputype == CPU_TYPE_I386) ) {
2136 __block bool foundPlatform = false;
2137 __block bool badSignature = false;
2138 forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) {
2139 foundPlatform = true;
2140 if ( (platform == Platform::macOS) && (sdk < 0x000A0900) )
2141 badSignature = true;
2142 });
2143 return foundPlatform && !badSignature;
2144 }
2145
2146 return true;
2147 }
2148
2149 bool MachOAnalyzer::hasInitializer(Diagnostics& diag, bool contentRebased, const void* dyldCache) const
2150 {
2151 __block bool result = false;
2152 forEachInitializer(diag, contentRebased, ^(uint32_t offset) {
2153 result = true;
2154 }, dyldCache);
2155 return result;
2156 }
2157
2158 void MachOAnalyzer::forEachInitializerPointerSection(Diagnostics& diag, void (^callback)(uint32_t sectionOffset, uint32_t sectionSize, const uint8_t* content, bool& stop)) const
2159 {
2160 const unsigned ptrSize = pointerSize();
2161 const uint64_t baseAddress = preferredLoadAddress();
2162 const uint64_t slide = (uint64_t)this - baseAddress;
2163 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& sectStop) {
2164 if ( (info.sectFlags & SECTION_TYPE) == S_MOD_INIT_FUNC_POINTERS ) {
2165 if ( (info.sectSize % ptrSize) != 0 ) {
2166 diag.error("initializer section %s/%s has bad size", info.segInfo.segName, info.sectName);
2167 sectStop = true;
2168 return;
2169 }
2170 if ( malformedSectionRange ) {
2171 diag.error("initializer section %s/%s extends beyond its segment", info.segInfo.segName, info.sectName);
2172 sectStop = true;
2173 return;
2174 }
2175 const uint8_t* content = (uint8_t*)(info.sectAddr + slide);
2176 if ( ((long)content % ptrSize) != 0 ) {
2177 diag.error("initializer section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2178 sectStop = true;
2179 return;
2180 }
2181 callback((uint32_t)(info.sectAddr - baseAddress), (uint32_t)info.sectSize, content, sectStop);
2182 }
2183 });
2184 }
2185
2186 struct VIS_HIDDEN SegmentRanges
2187 {
2188 struct SegmentRange {
2189 uint64_t vmAddrStart;
2190 uint64_t vmAddrEnd;
2191 uint32_t fileSize;
2192 };
2193
2194 bool contains(uint64_t vmAddr) const {
2195 for (const SegmentRange& range : segments) {
2196 if ( (range.vmAddrStart <= vmAddr) && (vmAddr < range.vmAddrEnd) )
2197 return true;
2198 }
2199 return false;
2200 }
2201
2202 private:
2203 SegmentRange localAlloc[1];
2204
2205 public:
2206 dyld3::OverflowSafeArray<SegmentRange> segments { localAlloc, sizeof(localAlloc) / sizeof(localAlloc[0]) };
2207 };
2208
2209 void MachOAnalyzer::forEachInitializer(Diagnostics& diag, bool contentRebased, void (^callback)(uint32_t offset), const void* dyldCache) const
2210 {
2211 __block SegmentRanges executableSegments;
2212 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2213 if ( (info.protections & VM_PROT_EXECUTE) != 0 ) {
2214 executableSegments.segments.push_back({ info.vmAddr, info.vmAddr + info.vmSize, (uint32_t)info.fileSize });
2215 }
2216 });
2217
2218 if (executableSegments.segments.empty()) {
2219 diag.error("no exeutable segments");
2220 return;
2221 }
2222
2223 uint64_t loadAddress = preferredLoadAddress();
2224 intptr_t slide = getSlide();
2225
2226 // if dylib linked with -init linker option, that initializer is first
2227 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2228 if ( cmd->cmd == LC_ROUTINES ) {
2229 const routines_command* routines = (routines_command*)cmd;
2230 uint64_t dashInit = routines->init_address;
2231 if ( executableSegments.contains(dashInit) )
2232 callback((uint32_t)(dashInit - loadAddress));
2233 else
2234 diag.error("-init does not point within __TEXT segment");
2235 }
2236 else if ( cmd->cmd == LC_ROUTINES_64 ) {
2237 const routines_command_64* routines = (routines_command_64*)cmd;
2238 uint64_t dashInit = routines->init_address;
2239 if ( executableSegments.contains(dashInit) )
2240 callback((uint32_t)(dashInit - loadAddress));
2241 else
2242 diag.error("-init does not point within __TEXT segment");
2243 }
2244 });
2245
2246 // next any function pointers in mod-init section
2247 const unsigned ptrSize = pointerSize();
2248 const bool useChainedFixups = hasChainedFixups();
2249 const uint16_t pointerFormat = useChainedFixups ? this->chainedPointerFormat() : 0;
2250 forEachInitializerPointerSection(diag, ^(uint32_t sectionOffset, uint32_t sectionSize, const uint8_t* content, bool& stop) {
2251 if ( ptrSize == 8 ) {
2252 const uint64_t* initsStart = (uint64_t*)content;
2253 const uint64_t* initsEnd = (uint64_t*)((uint8_t*)content + sectionSize);
2254 for (const uint64_t* p=initsStart; p < initsEnd; ++p) {
2255 uint64_t anInit = *p;
2256 if ( contentRebased ) {
2257 // The function pointer may have been signed. Strip the signature if that is the case
2258 #if __has_feature(ptrauth_calls)
2259 anInit = (uint64_t)__builtin_ptrauth_strip((void*)anInit, ptrauth_key_asia);
2260 #endif
2261 anInit -= slide;
2262 }
2263 else if ( useChainedFixups ) {
2264 uint64_t initFuncRuntimeOffset;
2265 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2266 if ( aChainedInit->isRebase(pointerFormat, loadAddress, initFuncRuntimeOffset) ) {
2267 anInit = loadAddress+initFuncRuntimeOffset;
2268 }
2269 else {
2270 diag.error("initializer is not rebased");
2271 stop = true;
2272 break;
2273 }
2274 }
2275 if ( !executableSegments.contains(anInit) ) {
2276 diag.error("initializer 0x%0llX does not point within executable segment", anInit);
2277 stop = true;
2278 break;
2279 }
2280 callback((uint32_t)(anInit - loadAddress));
2281 }
2282 }
2283 else {
2284 const uint32_t* initsStart = (uint32_t*)content;
2285 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + sectionSize);
2286 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2287 uint32_t anInit = *p;
2288 if ( contentRebased ) {
2289 anInit -= slide;
2290 }
2291 else if ( useChainedFixups ) {
2292 uint64_t initFuncRuntimeOffset;
2293 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2294 if ( aChainedInit->isRebase(pointerFormat, loadAddress, initFuncRuntimeOffset) ) {
2295 anInit = (uint32_t)(loadAddress+initFuncRuntimeOffset);
2296 }
2297 else {
2298 diag.error("initializer is not rebased");
2299 stop = true;
2300 break;
2301 }
2302 }
2303 if ( !executableSegments.contains(anInit) ) {
2304 diag.error("initializer 0x%0X does not point within executable segment", anInit);
2305 stop = true;
2306 break;
2307 }
2308 callback(anInit - (uint32_t)loadAddress);
2309 }
2310 }
2311 });
2312
2313 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2314 if ( (info.sectFlags & SECTION_TYPE) != S_INIT_FUNC_OFFSETS )
2315 return;
2316 const uint8_t* content = (uint8_t*)(info.sectAddr + slide);
2317 if ( info.segInfo.writable() ) {
2318 diag.error("initializer offsets section %s/%s must be in read-only segment", info.segInfo.segName, info.sectName);
2319 stop = true;
2320 return;
2321 }
2322 if ( (info.sectSize % 4) != 0 ) {
2323 diag.error("initializer offsets section %s/%s has bad size", info.segInfo.segName, info.sectName);
2324 stop = true;
2325 return;
2326 }
2327 if ( malformedSectionRange ) {
2328 diag.error("initializer offsets section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2329 stop = true;
2330 return;
2331 }
2332 if ( (info.sectAddr % 4) != 0 ) {
2333 diag.error("initializer offsets section %s/%s is not 4-byte aligned", info.segInfo.segName, info.sectName);
2334 stop = true;
2335 return;
2336 }
2337 const uint32_t* initsStart = (uint32_t*)content;
2338 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + info.sectSize);
2339 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2340 uint32_t anInitOffset = *p;
2341 if ( anInitOffset > executableSegments.segments[0].fileSize ) {
2342 diag.error("initializer 0x%0X is not an offset within __TEXT segment", anInitOffset);
2343 stop = true;
2344 break;
2345 }
2346 callback(anInitOffset);
2347 }
2348 });
2349 }
2350
2351 bool MachOAnalyzer::hasTerminators(Diagnostics& diag, bool contentRebased) const
2352 {
2353 __block bool result = false;
2354 forEachTerminator(diag, contentRebased, ^(uint32_t offset) {
2355 result = true;
2356 });
2357 return result;
2358 }
2359
2360 void MachOAnalyzer::forEachTerminator(Diagnostics& diag, bool contentRebased, void (^callback)(uint32_t offset)) const
2361 {
2362 __block SegmentRanges executableSegments;
2363 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2364 if ( (info.protections & VM_PROT_EXECUTE) != 0 ) {
2365 executableSegments.segments.push_back({ info.vmAddr, info.vmAddr + info.vmSize, (uint32_t)info.fileSize });
2366 }
2367 });
2368
2369 if (executableSegments.segments.empty()) {
2370 diag.error("no exeutable segments");
2371 return;
2372 }
2373
2374 uint64_t loadAddress = preferredLoadAddress();
2375 intptr_t slide = getSlide();
2376
2377 // next any function pointers in mod-term section
2378 const unsigned ptrSize = pointerSize();
2379 const bool useChainedFixups = hasChainedFixups();
2380 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2381 if ( (info.sectFlags & SECTION_TYPE) == S_MOD_TERM_FUNC_POINTERS ) {
2382 uint64_t initFuncRuntimeOffset;
2383 const uint16_t pointerFormat = useChainedFixups ? this->chainedPointerFormat() : 0;
2384 const uint8_t* content;
2385 content = (uint8_t*)(info.sectAddr + slide);
2386 if ( (info.sectSize % ptrSize) != 0 ) {
2387 diag.error("terminator section %s/%s has bad size", info.segInfo.segName, info.sectName);
2388 stop = true;
2389 return;
2390 }
2391 if ( malformedSectionRange ) {
2392 diag.error("terminator section %s/%s extends beyond its segment", info.segInfo.segName, info.sectName);
2393 stop = true;
2394 return;
2395 }
2396 if ( ((long)content % ptrSize) != 0 ) {
2397 diag.error("terminator section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2398 stop = true;
2399 return;
2400 }
2401 if ( ptrSize == 8 ) {
2402 const uint64_t* initsStart = (uint64_t*)content;
2403 const uint64_t* initsEnd = (uint64_t*)((uint8_t*)content + info.sectSize);
2404 for (const uint64_t* p=initsStart; p < initsEnd; ++p) {
2405 uint64_t anInit = *p;
2406 if ( contentRebased ) {
2407 // The function pointer may have been signed. Strip the signature if that is the case
2408 #if __has_feature(ptrauth_calls)
2409 anInit = (uint64_t)__builtin_ptrauth_strip((void*)anInit, ptrauth_key_asia);
2410 #endif
2411 anInit -= slide;
2412 }
2413 else if ( useChainedFixups ) {
2414 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2415 if ( aChainedInit->isRebase(pointerFormat, loadAddress, initFuncRuntimeOffset) ) {
2416 anInit = loadAddress+initFuncRuntimeOffset;
2417 }
2418 else {
2419 diag.error("terminator is not rebased");
2420 stop = true;
2421 break;
2422 }
2423 }
2424 if ( !executableSegments.contains(anInit) ) {
2425 diag.error("terminator 0x%0llX does not point within executable segment", anInit);
2426 stop = true;
2427 break;
2428 }
2429 callback((uint32_t)(anInit - loadAddress));
2430 }
2431 }
2432 else {
2433 const uint32_t* initsStart = (uint32_t*)content;
2434 const uint32_t* initsEnd = (uint32_t*)((uint8_t*)content + info.sectSize);
2435 for (const uint32_t* p=initsStart; p < initsEnd; ++p) {
2436 uint32_t anInit = *p;
2437 if ( contentRebased ) {
2438 anInit -= slide;
2439 }
2440 else if ( useChainedFixups ) {
2441 ChainedFixupPointerOnDisk* aChainedInit = (ChainedFixupPointerOnDisk*)p;
2442 if ( aChainedInit->isRebase(pointerFormat, loadAddress, initFuncRuntimeOffset) ) {
2443 anInit = (uint32_t)(loadAddress+initFuncRuntimeOffset);
2444 }
2445 else {
2446 diag.error("terminator is not rebased");
2447 stop = true;
2448 break;
2449 }
2450 }
2451 if ( !executableSegments.contains(anInit) ) {
2452 diag.error("terminator 0x%0X does not point within executable segment", anInit);
2453 stop = true;
2454 break;
2455 }
2456 callback(anInit - (uint32_t)loadAddress);
2457 }
2458 }
2459 }
2460 });
2461 }
2462
2463
2464
2465 void MachOAnalyzer::forEachRPath(void (^callback)(const char* rPath, bool& stop)) const
2466 {
2467 Diagnostics diag;
2468 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2469 if ( cmd->cmd == LC_RPATH ) {
2470 const char* rpath = (char*)cmd + ((struct rpath_command*)cmd)->path.offset;
2471 callback(rpath, stop);
2472 }
2473 });
2474 diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call
2475 }
2476
2477
2478 bool MachOAnalyzer::hasObjC() const
2479 {
2480 __block bool result = false;
2481 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2482 if ( (strcmp(info.sectName, "__objc_imageinfo") == 0) && (strncmp(info.segInfo.segName, "__DATA", 6) == 0) ) {
2483 result = true;
2484 stop = true;
2485 }
2486 if ( (this->cputype == CPU_TYPE_I386) && (strcmp(info.sectName, "__image_info") == 0) && (strcmp(info.segInfo.segName, "__OBJC") == 0) ) {
2487 result = true;
2488 stop = true;
2489 }
2490 });
2491 return result;
2492 }
2493
2494 bool MachOAnalyzer::hasPlusLoadMethod(Diagnostics& diag) const
2495 {
2496 __block bool result = false;
2497 if ( (this->cputype == CPU_TYPE_I386) && supportsPlatform(Platform::macOS) ) {
2498 // old objc runtime has no special section for +load methods, scan for string
2499 int64_t slide = getSlide();
2500 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2501 if ( ( (info.sectFlags & SECTION_TYPE) == S_CSTRING_LITERALS ) ) {
2502 if ( malformedSectionRange ) {
2503 diag.error("cstring section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2504 stop = true;
2505 return;
2506 }
2507 const uint8_t* content = (uint8_t*)(info.sectAddr + slide);
2508 const char* s = (char*)content;
2509 const char* end = s + info.sectSize;
2510 while ( s < end ) {
2511 if ( strcmp(s, "load") == 0 ) {
2512 result = true;
2513 stop = true;
2514 return;
2515 }
2516 while (*s != '\0' )
2517 ++s;
2518 ++s;
2519 }
2520 }
2521 });
2522 }
2523 else {
2524 // in new objc runtime compiler puts classes/categories with +load method in specical section
2525 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) {
2526 if ( strncmp(info.segInfo.segName, "__DATA", 6) != 0 )
2527 return;
2528 if ( (strcmp(info.sectName, "__objc_nlclslist") == 0) || (strcmp(info.sectName, "__objc_nlcatlist") == 0)) {
2529 result = true;
2530 stop = true;
2531 }
2532 });
2533 }
2534 return result;
2535 }
2536
2537 const void* MachOAnalyzer::getRebaseOpcodes(uint32_t& size) const
2538 {
2539 Diagnostics diag;
2540 LinkEditInfo leInfo;
2541 getLinkEditPointers(diag, leInfo);
2542 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2543 return nullptr;
2544
2545 size = leInfo.dyldInfo->rebase_size;
2546 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->rebase_off);
2547 }
2548
2549 const void* MachOAnalyzer::getBindOpcodes(uint32_t& size) const
2550 {
2551 Diagnostics diag;
2552 LinkEditInfo leInfo;
2553 getLinkEditPointers(diag, leInfo);
2554 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2555 return nullptr;
2556
2557 size = leInfo.dyldInfo->bind_size;
2558 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->bind_off);
2559 }
2560
2561 const void* MachOAnalyzer::getLazyBindOpcodes(uint32_t& size) const
2562 {
2563 Diagnostics diag;
2564 LinkEditInfo leInfo;
2565 getLinkEditPointers(diag, leInfo);
2566 if ( diag.hasError() || (leInfo.dyldInfo == nullptr) )
2567 return nullptr;
2568
2569 size = leInfo.dyldInfo->lazy_bind_size;
2570 return getLinkEditContent(leInfo.layout, leInfo.dyldInfo->lazy_bind_off);
2571 }
2572
2573 const void* MachOAnalyzer::getSplitSeg(uint32_t& size) const
2574 {
2575 Diagnostics diag;
2576 LinkEditInfo leInfo;
2577 getLinkEditPointers(diag, leInfo);
2578 if ( diag.hasError() || (leInfo.splitSegInfo == nullptr) )
2579 return nullptr;
2580
2581 size = leInfo.splitSegInfo->datasize;
2582 return getLinkEditContent(leInfo.layout, leInfo.splitSegInfo->dataoff);
2583 }
2584
2585
2586 uint64_t MachOAnalyzer::segAndOffsetToRuntimeOffset(uint8_t targetSegIndex, uint64_t targetSegOffset) const
2587 {
2588 __block uint64_t textVmAddr = 0;
2589 __block uint64_t result = 0;
2590 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2591 if ( strcmp(info.segName, "__TEXT") == 0 )
2592 textVmAddr = info.vmAddr;
2593 if ( info.segIndex == targetSegIndex ) {
2594 result = (info.vmAddr - textVmAddr) + targetSegOffset;
2595 }
2596 });
2597 return result;
2598 }
2599
2600 bool MachOAnalyzer::hasLazyPointers(uint32_t& runtimeOffset, uint32_t& size) const
2601 {
2602 size = 0;
2603 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2604 if ( (info.sectFlags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS ) {
2605 runtimeOffset = (uint32_t)(info.sectAddr - preferredLoadAddress());
2606 size = (uint32_t)info.sectSize;
2607 stop = true;
2608 }
2609 });
2610 return (size != 0);
2611 }
2612
2613 uint64_t MachOAnalyzer::preferredLoadAddress() const
2614 {
2615 __block uint64_t textVmAddr = 0;
2616 forEachSegment(^(const SegmentInfo& info, bool& stop) {
2617 if ( strcmp(info.segName, "__TEXT") == 0 ) {
2618 textVmAddr = info.vmAddr;
2619 stop = true;
2620 }
2621 });
2622 return textVmAddr;
2623 }
2624
2625
2626 bool MachOAnalyzer::getEntry(uint32_t& offset, bool& usesCRT) const
2627 {
2628 Diagnostics diag;
2629 offset = 0;
2630 forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) {
2631 if ( cmd->cmd == LC_MAIN ) {
2632 entry_point_command* mainCmd = (entry_point_command*)cmd;
2633 usesCRT = false;
2634 offset = (uint32_t)mainCmd->entryoff;
2635 stop = true;
2636 }
2637 else if ( cmd->cmd == LC_UNIXTHREAD ) {
2638 stop = true;
2639 usesCRT = true;
2640 uint64_t startAddress = entryAddrFromThreadCmd((thread_command*)cmd);
2641 offset = (uint32_t)(startAddress - preferredLoadAddress());
2642 }
2643 });
2644 return (offset != 0);
2645 }
2646
2647 uint64_t MachOAnalyzer::entryAddrFromThreadCmd(const thread_command* cmd) const
2648 {
2649 assert(cmd->cmd == LC_UNIXTHREAD);
2650 const uint32_t* regs32 = (uint32_t*)(((char*)cmd) + 16);
2651 const uint64_t* regs64 = (uint64_t*)(((char*)cmd) + 16);
2652 uint64_t startAddress = 0;
2653 switch ( this->cputype ) {
2654 case CPU_TYPE_I386:
2655 startAddress = regs32[10]; // i386_thread_state_t.eip
2656 break;
2657 case CPU_TYPE_X86_64:
2658 startAddress = regs64[16]; // x86_thread_state64_t.rip
2659 break;
2660 case CPU_TYPE_ARM:
2661 startAddress = regs32[15]; // arm_thread_state_t.pc
2662 break;
2663 case CPU_TYPE_ARM64:
2664 startAddress = regs64[32]; // arm_thread_state64_t.__pc
2665 break;
2666 }
2667 return startAddress;
2668 }
2669
2670
2671 void MachOAnalyzer::forEachInterposingSection(Diagnostics& diag, void (^handler)(uint64_t vmOffset, uint64_t vmSize, bool& stop)) const
2672 {
2673 const unsigned ptrSize = pointerSize();
2674 const unsigned entrySize = 2 * ptrSize;
2675 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2676 if ( ((info.sectFlags & SECTION_TYPE) == S_INTERPOSING) || ((strcmp(info.sectName, "__interpose") == 0) && (strcmp(info.segInfo.segName, "__DATA") == 0)) ) {
2677 if ( info.sectSize % entrySize != 0 ) {
2678 diag.error("interposing section %s/%s has bad size", info.segInfo.segName, info.sectName);
2679 stop = true;
2680 return;
2681 }
2682 if ( malformedSectionRange ) {
2683 diag.error("interposing section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName);
2684 stop = true;
2685 return;
2686 }
2687 if ( (info.sectAddr % ptrSize) != 0 ) {
2688 diag.error("interposing section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName);
2689 stop = true;
2690 return;
2691 }
2692 handler(info.sectAddr - preferredLoadAddress(), info.sectSize, stop);
2693 }
2694 });
2695 }
2696
2697 void MachOAnalyzer::forEachDOFSection(Diagnostics& diag, void (^callback)(uint32_t offset)) const
2698 {
2699 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2700 if ( ( (info.sectFlags & SECTION_TYPE) == S_DTRACE_DOF ) && !malformedSectionRange ) {
2701 callback((uint32_t)(info.sectAddr - info.segInfo.vmAddr));
2702 }
2703 });
2704 }
2705
2706 void MachOAnalyzer::forEachCDHash(void (^handler)(const uint8_t cdHash[20])) const
2707 {
2708 Diagnostics diag;
2709 LinkEditInfo leInfo;
2710 getLinkEditPointers(diag, leInfo);
2711 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2712 return;
2713
2714 forEachCDHashOfCodeSignature(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff),
2715 leInfo.codeSig->datasize,
2716 handler);
2717 }
2718
2719 bool MachOAnalyzer::isRestricted() const
2720 {
2721 __block bool result = false;
2722 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& info, bool malformedSectionRange, bool &stop) {
2723 if ( (strcmp(info.segInfo.segName, "__RESTRICT") == 0) && (strcmp(info.sectName, "__restrict") == 0) ) {
2724 result = true;
2725 stop = true;
2726 }
2727 });
2728 return result;
2729 }
2730
2731 bool MachOAnalyzer::usesLibraryValidation() const
2732 {
2733 Diagnostics diag;
2734 LinkEditInfo leInfo;
2735 getLinkEditPointers(diag, leInfo);
2736 if ( diag.hasError() || (leInfo.codeSig == nullptr) )
2737 return false;
2738
2739 // check for CS_REQUIRE_LV in CS_CodeDirectory.flags
2740 __block bool requiresLV = false;
2741 forEachCodeDirectoryBlob(getLinkEditContent(leInfo.layout, leInfo.codeSig->dataoff),
2742 leInfo.codeSig->datasize,
2743 ^(const void *cdBuffer) {
2744 const CS_CodeDirectory* cd = (const CS_CodeDirectory*)cdBuffer;
2745 requiresLV |= (htonl(cd->flags) & CS_REQUIRE_LV);
2746 });
2747
2748 return requiresLV;
2749 }
2750
2751 bool MachOAnalyzer::canHavePrecomputedDlopenClosure(const char* path, void (^failureReason)(const char*)) const
2752 {
2753 __block bool retval = true;
2754
2755 // only dylibs can go in cache
2756 if ( (this->filetype != MH_DYLIB) && (this->filetype != MH_BUNDLE) ) {
2757 retval = false;
2758 failureReason("not MH_DYLIB or MH_BUNDLE");
2759 }
2760
2761 // flat namespace files cannot go in cache
2762 if ( (this->flags & MH_TWOLEVEL) == 0 ) {
2763 retval = false;
2764 failureReason("not built with two level namespaces");
2765 }
2766
2767 // can only depend on other dylibs with absolute paths
2768 __block bool allDepPathsAreGood = true;
2769 forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
2770 if ( loadPath[0] != '/' ) {
2771 allDepPathsAreGood = false;
2772 stop = true;
2773 }
2774 });
2775 if ( !allDepPathsAreGood ) {
2776 retval = false;
2777 failureReason("depends on dylibs that are not absolute paths");
2778 }
2779
2780 // dylibs with interposing info cannot have dlopen closure pre-computed
2781 __block bool hasInterposing = false;
2782 forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool &stop) {
2783 if ( ((info.sectFlags & SECTION_TYPE) == S_INTERPOSING) || ((strcmp(info.sectName, "__interpose") == 0) && (strcmp(info.segInfo.segName, "__DATA") == 0)) )
2784 hasInterposing = true;
2785 });
2786 if ( hasInterposing ) {
2787 retval = false;
2788 failureReason("has interposing tuples");
2789 }
2790
2791 // images that use dynamic_lookup, bundle_loader, or have weak-defs cannot have dlopen closure pre-computed
2792 Diagnostics diag;
2793 auto checkBind = ^(int libOrdinal, bool& stop) {
2794 switch (libOrdinal) {
2795 case BIND_SPECIAL_DYLIB_WEAK_LOOKUP:
2796 failureReason("has weak externals");
2797 retval = false;
2798 stop = true;
2799 break;
2800 case BIND_SPECIAL_DYLIB_FLAT_LOOKUP:
2801 failureReason("has dynamic_lookup binds");
2802 retval = false;
2803 stop = true;
2804 break;
2805 case BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE:
2806 failureReason("has reference to main executable (bundle loader)");
2807 retval = false;
2808 stop = true;
2809 break;
2810 }
2811 };
2812
2813 if (hasChainedFixups()) {
2814 forEachChainedFixupTarget(diag, ^(int libOrdinal, const char *symbolName, uint64_t addend, bool weakImport, bool &stop) {
2815 checkBind(libOrdinal, stop);
2816 });
2817 } else {
2818 forEachBind(diag, ^(uint64_t runtimeOffset, int libOrdinal, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
2819 checkBind(libOrdinal, stop);
2820 },
2821 ^(const char* symbolName) {
2822 },
2823 ^() {
2824 });
2825 }
2826
2827 // special system dylib overrides cannot have closure pre-computed
2828 if ( strncmp(path, "/usr/lib/system/introspection/", 30) == 0 ) {
2829 retval = false;
2830 failureReason("override of OS dylib");
2831 }
2832
2833 // Don't precompute iOSMac for now until dyld3 support is there.
2834 if ( supportsPlatform(Platform::iOSMac) && !supportsPlatform(Platform::macOS) ) {
2835 retval = false;
2836 failureReason("UIKitForMac binary");
2837 }
2838
2839 return retval;
2840 }
2841
2842
2843 bool MachOAnalyzer::hasUnalignedPointerFixups() const
2844 {
2845 // only look at 64-bit architectures
2846 if ( pointerSize() == 4 )
2847 return false;
2848
2849 __block Diagnostics diag;
2850 __block bool result = false;
2851 if ( hasChainedFixups() ) {
2852 withChainStarts(diag, chainStartsOffset(), ^(const dyld_chained_starts_in_image* startsInfo) {
2853 forEachFixupInAllChains(diag, startsInfo, false, ^(MachOLoaded::ChainedFixupPointerOnDisk* fixupLoc, const dyld_chained_starts_in_segment* segInfo, bool& fixupsStop) {
2854 if ( ((long)(fixupLoc) & 7) != 0 ) {
2855 result = true;
2856 fixupsStop = true;
2857 }
2858 });
2859 });
2860 }
2861 else {
2862 forEachBind(diag, ^(uint64_t runtimeOffset, int libOrdinal, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
2863 if ( (runtimeOffset & 7) != 0 ) {
2864 result = true;
2865 stop = true;
2866 }
2867 },
2868 ^(const char* symbolName) {
2869 },
2870 ^() {
2871 });
2872 forEachRebase(diag, true, ^(uint64_t runtimeOffset, bool& stop) {
2873 if ( (runtimeOffset & 7) != 0 ) {
2874 result = true;
2875 stop = true;
2876 }
2877 });
2878 }
2879
2880 return result;
2881 }
2882
2883 void MachOAnalyzer::recurseTrie(Diagnostics& diag, const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
2884 char* cummulativeString, int curStrOffset, bool& stop, ExportsCallback callback) const
2885 {
2886 if ( p >= end ) {
2887 diag.error("malformed trie, node past end");
2888 return;
2889 }
2890 const uint64_t terminalSize = read_uleb128(diag, p, end);
2891 const uint8_t* children = p + terminalSize;
2892 if ( terminalSize != 0 ) {
2893 uint64_t imageOffset = 0;
2894 uint64_t flags = read_uleb128(diag, p, end);
2895 uint64_t other = 0;
2896 const char* importName = nullptr;
2897 if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
2898 other = read_uleb128(diag, p, end); // dylib ordinal
2899 importName = (char*)p;
2900 }
2901 else {
2902 imageOffset = read_uleb128(diag, p, end);
2903 if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
2904 other = read_uleb128(diag, p, end);
2905 else
2906 other = 0;
2907 }
2908 if ( diag.hasError() )
2909 return;
2910 callback(cummulativeString, imageOffset, flags, other, importName, stop);
2911 if ( stop )
2912 return;
2913 }
2914 if ( children > end ) {
2915 diag.error("malformed trie, terminalSize extends beyond trie data");
2916 return;
2917 }
2918 const uint8_t childrenCount = *children++;
2919 const uint8_t* s = children;
2920 for (uint8_t i=0; i < childrenCount; ++i) {
2921 int edgeStrLen = 0;
2922 while (*s != '\0') {
2923 cummulativeString[curStrOffset+edgeStrLen] = *s++;
2924 ++edgeStrLen;
2925 if ( s > end ) {
2926 diag.error("malformed trie node, child node extends past end of trie\n");
2927 return;
2928 }
2929 }
2930 cummulativeString[curStrOffset+edgeStrLen] = *s++;
2931 uint64_t childNodeOffset = read_uleb128(diag, s, end);
2932 if (childNodeOffset == 0) {
2933 diag.error("malformed trie, childNodeOffset==0");
2934 return;
2935 }
2936 recurseTrie(diag, start, start+childNodeOffset, end, cummulativeString, curStrOffset+edgeStrLen, stop, callback);
2937 if ( diag.hasError() || stop )
2938 return;
2939 }
2940 }
2941
2942 void MachOAnalyzer::forEachExportedSymbol(Diagnostics& diag, ExportsCallback callback) const
2943 {
2944 LinkEditInfo leInfo;
2945 getLinkEditPointers(diag, leInfo);
2946 if ( diag.hasError() )
2947 return;
2948 uint64_t trieSize;
2949 if ( const uint8_t* trieStart = getExportsTrie(leInfo, trieSize) ) {
2950 const uint8_t* trieEnd = trieStart + trieSize;
2951 bool stop = false;
2952 char cummulativeString[trieSize];
2953 recurseTrie(diag, trieStart, trieStart, trieEnd, cummulativeString, 0, stop, callback);
2954 }
2955 }
2956
2957 bool MachOAnalyzer::canBePlacedInDyldCache(const char* path, void (^failureReason)(const char*)) const
2958 {
2959 if (!MachOFile::canBePlacedInDyldCache(path, failureReason))
2960 return false;
2961 if ( !(isArch("x86_64") || isArch("x86_64h")) )
2962 return true;
2963
2964 __block bool rebasesOk = true;
2965 Diagnostics diag;
2966 uint64_t startVMAddr = preferredLoadAddress();
2967 uint64_t endVMAddr = startVMAddr + mappedSize();
2968 forEachRebase(diag, false, ^(uint64_t runtimeOffset, bool &stop) {
2969 // We allow TBI for x86_64 dylibs, but then require that the remainder of the offset
2970 // is a 32-bit offset from the mach-header.
2971 uint64_t value = *(uint64_t*)((uint8_t*)this + runtimeOffset);
2972 value &= 0x00FFFFFFFFFFFFFFULL;
2973 if ( (value < startVMAddr) || (value >= endVMAddr) ) {
2974 failureReason("rebase value out of range of dylib");
2975 rebasesOk = false;
2976 stop = true;
2977 return;
2978 }
2979
2980 // Also error if the rebase location is anything other than 4/8 byte aligned
2981 if ( (runtimeOffset & 0x3) != 0 ) {
2982 failureReason("rebase value is not 4-byte aligned");
2983 rebasesOk = false;
2984 stop = true;
2985 return;
2986 }
2987 });
2988 return rebasesOk;
2989 }
2990
2991 uint64_t MachOAnalyzer::chainStartsOffset() const
2992 {
2993 Diagnostics diag;
2994 LinkEditInfo leInfo;
2995 getLinkEditPointers(diag, leInfo);
2996 if ( diag.hasError() || (leInfo.chainedFixups == nullptr) )
2997 return 0;
2998
2999 const dyld_chained_fixups_header* header = (dyld_chained_fixups_header*)getLinkEditContent(leInfo.layout, leInfo.chainedFixups->dataoff);
3000 return header->starts_offset + ((uint8_t*)header - (uint8_t*)this);
3001 }
3002
3003 uint16_t MachOAnalyzer::chainedPointerFormat() const
3004 {
3005 uint64_t infoOffset = chainStartsOffset();
3006 if ( infoOffset != 0 ) {
3007 // get pointer format from chain info struct in LINKEDIT
3008 const dyld_chained_starts_in_image* startsInfo = (dyld_chained_starts_in_image*)((uint8_t*)this + infoOffset);
3009 for (uint32_t i=0; i < startsInfo->seg_count; ++i) {
3010 uint32_t segInfoOffset = startsInfo->seg_info_offset[i];
3011 // 0 offset means this segment has no fixups
3012 if ( segInfoOffset == 0 )
3013 continue;
3014 const dyld_chained_starts_in_segment* segInfo = (dyld_chained_starts_in_segment*)((uint8_t*)startsInfo + segInfoOffset);
3015 if ( segInfo->page_count != 0 )
3016 return segInfo->pointer_format;
3017 }
3018 }
3019 assert(this->cputype == CPU_TYPE_ARM64 && this->cpusubtype == CPU_SUBTYPE_ARM64E && "chainedPointerFormat() called on non-chained binary");
3020 return DYLD_CHAINED_PTR_ARM64E;
3021 }
3022
3023 #if (BUILDING_DYLD || BUILDING_LIBDYLD) && !__arm64e__
3024 #define SUPPORT_OLD_ARM64E_FORMAT 0
3025 #else
3026 #define SUPPORT_OLD_ARM64E_FORMAT 1
3027 #endif
3028
3029 // find dyld_chained_starts_in_image* in image
3030 // if old arm64e binary, synthesize dyld_chained_starts_in_image*
3031 void MachOAnalyzer::withChainStarts(Diagnostics& diag, uint64_t startsStructOffsetHint, void (^callback)(const dyld_chained_starts_in_image*)) const
3032 {
3033 if ( startsStructOffsetHint != 0 ) {
3034 // we have a pre-computed offset into LINKEDIT for dyld_chained_starts_in_image
3035 callback((dyld_chained_starts_in_image*)((uint8_t*)this + startsStructOffsetHint));
3036 return;
3037 }
3038
3039 LinkEditInfo leInfo;
3040 getLinkEditPointers(diag, leInfo);
3041 if ( diag.hasError() )
3042 return;
3043
3044 if ( leInfo.chainedFixups != nullptr ) {
3045 // find dyld_chained_starts_in_image from dyld_chained_fixups_header
3046 const dyld_chained_fixups_header* header = (dyld_chained_fixups_header*)getLinkEditContent(leInfo.layout, leInfo.chainedFixups->dataoff);
3047 callback((dyld_chained_starts_in_image*)((uint8_t*)header + header->starts_offset));
3048 }
3049 #if SUPPORT_OLD_ARM64E_FORMAT
3050 // don't want this code in non-arm64e dyld because it causes a stack protector which dereferences a GOT pointer before GOT is set up
3051 else if ( (leInfo.dyldInfo != nullptr) && (this->cputype == CPU_TYPE_ARM64) && (this->cpusubtype == CPU_SUBTYPE_ARM64E) ) {
3052 // old arm64e binary, create a dyld_chained_starts_in_image for caller
3053 uint64_t baseAddress = preferredLoadAddress();
3054 BLOCK_ACCCESSIBLE_ARRAY(uint8_t, buffer, leInfo.dyldInfo->bind_size + 512);
3055 dyld_chained_starts_in_image* header = (dyld_chained_starts_in_image*)buffer;
3056 header->seg_count = leInfo.layout.linkeditSegIndex;
3057 for (uint32_t i=0; i < header->seg_count; ++i)
3058 header->seg_info_offset[i] = 0;
3059 __block uint8_t curSegIndex = 0;
3060 __block dyld_chained_starts_in_segment* curSeg = (dyld_chained_starts_in_segment*)(&(header->seg_info_offset[header->seg_count]));
3061 parseOrgArm64eChainedFixups(diag, nullptr, nullptr, ^(const LinkEditInfo& leInfo2, const SegmentInfo segments[], uint8_t segmentIndex,
3062 bool segIndexSet, uint64_t segmentOffset, uint16_t format, bool& stop) {
3063 uint32_t pageIndex = (uint32_t)(segmentOffset/0x1000);
3064 if ( segmentIndex != curSegIndex ) {
3065 if ( curSegIndex == 0 ) {
3066 header->seg_info_offset[segmentIndex] = (uint32_t)((uint8_t*)curSeg - buffer);
3067 }
3068 else {
3069 header->seg_info_offset[segmentIndex] = (uint32_t)((uint8_t*)(&curSeg->page_start[curSeg->page_count]) - buffer);
3070 curSeg = (dyld_chained_starts_in_segment*)((uint8_t*)header+header->seg_info_offset[segmentIndex]);
3071 }
3072 curSeg->page_count = 0;
3073 curSegIndex = segmentIndex;
3074 }
3075 while ( curSeg->page_count != pageIndex ) {
3076 curSeg->page_start[curSeg->page_count] = 0xFFFF;
3077 curSeg->page_count++;
3078 }
3079 curSeg->size = (uint32_t)((uint8_t*)(&curSeg->page_start[pageIndex]) - (uint8_t*)curSeg);
3080 curSeg->page_size = 0x1000; // old arm64e encoding used 4KB pages
3081 curSeg->pointer_format = DYLD_CHAINED_PTR_ARM64E;
3082 curSeg->segment_offset = segments[segmentIndex].vmAddr - baseAddress;
3083 curSeg->max_valid_pointer = 0;
3084 curSeg->page_count = pageIndex+1;
3085 curSeg->page_start[pageIndex] = segmentOffset & 0xFFF;
3086 //fprintf(stderr, "segment_offset=0x%llX, vmAddr=0x%llX\n", curSeg->segment_offset, segments[segmentIndex].vmAddr );
3087 //printf("segIndex=%d, segOffset=0x%08llX, page_start[%d]=0x%04X, page_start[%d]=0x%04X\n",
3088 // segmentIndex, segmentOffset, pageIndex, curSeg->page_start[pageIndex], pageIndex-1, pageIndex ? curSeg->page_start[pageIndex-1] : 0);
3089 });
3090 callback(header);
3091 }
3092 #endif
3093 else {
3094 diag.error("image does not use chained fixups");
3095 }
3096 }
3097
3098 MachOAnalyzer::ObjCInfo MachOAnalyzer::getObjCInfo() const
3099 {
3100 __block ObjCInfo result;
3101 result.selRefCount = 0;
3102 result.classDefCount = 0;
3103 result.protocolDefCount = 0;
3104
3105 const uint32_t ptrSize = pointerSize();
3106 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3107 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) == 0 ) {
3108 if ( strcmp(sectInfo.sectName, "__objc_selrefs") == 0 )
3109 result.selRefCount += (sectInfo.sectSize/ptrSize);
3110 else if ( strcmp(sectInfo.sectName, "__objc_classlist") == 0 )
3111 result.classDefCount += (sectInfo.sectSize/ptrSize);
3112 else if ( strcmp(sectInfo.sectName, "__objc_protolist") == 0 )
3113 result.protocolDefCount += (sectInfo.sectSize/ptrSize);
3114 }
3115 else if ( (this->cputype == CPU_TYPE_I386) && (strcmp(sectInfo.segInfo.segName, "__OBJC") == 0) ) {
3116 if ( strcmp(sectInfo.sectName, "__message_refs") == 0 )
3117 result.selRefCount += (sectInfo.sectSize/4);
3118 else if ( strcmp(sectInfo.sectName, "__class") == 0 )
3119 result.classDefCount += (sectInfo.sectSize/48);
3120 else if ( strcmp(sectInfo.sectName, "__protocol") == 0 )
3121 result.protocolDefCount += (sectInfo.sectSize/20);
3122 }
3123 });
3124
3125 return result;
3126 }
3127
3128 // Convert from a (possibly) live pointer to a vmAddr
3129 static uint64_t convertToVMAddr(uint64_t value, MachOAnalyzer::VMAddrConverter vmAddrConverter) {
3130 if ( vmAddrConverter.contentRebased ) {
3131 // The value may have been signed. Strip the signature if that is the case
3132 #if __has_feature(ptrauth_calls)
3133 value = (uint64_t)__builtin_ptrauth_strip((void*)value, ptrauth_key_asia);
3134 #endif
3135 value -= vmAddrConverter.slide;
3136 }
3137 else if ( vmAddrConverter.chainedPointerFormat != 0 ) {
3138 auto* chainedValue = (MachOAnalyzer::ChainedFixupPointerOnDisk*)&value;
3139 uint64_t targetRuntimeOffset;
3140 if ( chainedValue->isRebase(vmAddrConverter.chainedPointerFormat, vmAddrConverter.preferredLoadAddress,
3141 targetRuntimeOffset) ) {
3142 value = vmAddrConverter.preferredLoadAddress + targetRuntimeOffset;
3143 }
3144 }
3145
3146 return value;
3147 }
3148
3149 uint64_t MachOAnalyzer::ObjCClassInfo::getReadOnlyDataField(ObjCClassInfo::ReadOnlyDataField field, uint32_t pointerSize) const {
3150 if (pointerSize == 8) {
3151 typedef uint64_t PtrTy;
3152 struct class_ro_t {
3153 uint32_t flags;
3154 uint32_t instanceStart;
3155 // Note there is 4-bytes of alignment padding between instanceSize and ivarLayout
3156 // on 64-bit archs, but no padding on 32-bit archs.
3157 // This union is a way to model that.
3158 union {
3159 uint32_t instanceSize;
3160 PtrTy pad;
3161 } instanceSize;
3162 PtrTy ivarLayoutVMAddr;
3163 PtrTy nameVMAddr;
3164 PtrTy baseMethodsVMAddr;
3165 PtrTy baseProtocolsVMAddr;
3166 PtrTy ivarsVMAddr;
3167 PtrTy weakIvarLayoutVMAddr;
3168 PtrTy basePropertiesVMAddr;
3169 };
3170 const class_ro_t* classData = (const class_ro_t*)(dataVMAddr + vmAddrConverter.slide);
3171 switch (field) {
3172 case ObjCClassInfo::ReadOnlyDataField::name:
3173 return convertToVMAddr(classData->nameVMAddr, vmAddrConverter);
3174 case ObjCClassInfo::ReadOnlyDataField::baseMethods:
3175 return convertToVMAddr(classData->baseMethodsVMAddr, vmAddrConverter);
3176 }
3177 } else {
3178 typedef uint32_t PtrTy;
3179 struct class_ro_t {
3180 uint32_t flags;
3181 uint32_t instanceStart;
3182 // Note there is 4-bytes of alignment padding between instanceSize and ivarLayout
3183 // on 64-bit archs, but no padding on 32-bit archs.
3184 // This union is a way to model that.
3185 union {
3186 uint32_t instanceSize;
3187 PtrTy pad;
3188 } instanceSize;
3189 PtrTy ivarLayoutVMAddr;
3190 PtrTy nameVMAddr;
3191 PtrTy baseMethodsVMAddr;
3192 PtrTy baseProtocolsVMAddr;
3193 PtrTy ivarsVMAddr;
3194 PtrTy weakIvarLayoutVMAddr;
3195 PtrTy basePropertiesVMAddr;
3196 };
3197 const class_ro_t* classData = (const class_ro_t*)(dataVMAddr + vmAddrConverter.slide);
3198 switch (field) {
3199 case ObjCClassInfo::ReadOnlyDataField::name:
3200 return convertToVMAddr(classData->nameVMAddr, vmAddrConverter);
3201 case ObjCClassInfo::ReadOnlyDataField::baseMethods:
3202 return convertToVMAddr(classData->baseMethodsVMAddr, vmAddrConverter);
3203 }
3204 }
3205 }
3206
3207 const char* MachOAnalyzer::getPrintableString(uint64_t stringVMAddr, MachOAnalyzer::PrintableStringResult& result,
3208 SectionCache* sectionCache,
3209 bool (^sectionHandler)(const SectionInfo& sectionInfo)) const {
3210 if ( sectionCache != nullptr ) {
3211 // Make sure the string is pointing in to one of the supported sections
3212 __block const dyld3::MachOAnalyzer::SectionInfo* nameSectionInfo = nullptr;
3213 for (const dyld3::MachOAnalyzer::SectionInfo& sectionInfo : sectionCache->sectionInfos) {
3214 if ( stringVMAddr < sectionInfo.sectAddr ) {
3215 continue;
3216 }
3217 if ( stringVMAddr >= ( sectionInfo.sectAddr + sectionInfo.sectSize) ) {
3218 continue;
3219 }
3220 nameSectionInfo = &sectionInfo;
3221 break;
3222 }
3223
3224 if ( nameSectionInfo != nullptr ) {
3225 // The section handler may also reject this section
3226 if ( sectionHandler != nullptr ) {
3227 if (!sectionHandler(*nameSectionInfo)) {
3228 result = PrintableStringResult::UnknownSection;
3229 return nullptr;
3230 }
3231 }
3232
3233 result = PrintableStringResult::CanPrint;
3234 return (const char*)(stringVMAddr + getSlide());
3235 }
3236 }
3237
3238 // If the name isn't in the cache then find the section its in
3239
3240 uint32_t fairplayTextOffsetStart;
3241 uint32_t fairplayTextOffsetEnd;
3242 uint32_t fairplaySize;
3243 if ( isFairPlayEncrypted(fairplayTextOffsetStart, fairplaySize) ) {
3244 fairplayTextOffsetEnd = fairplayTextOffsetStart + fairplaySize;
3245 } else {
3246 fairplayTextOffsetEnd = 0;
3247 }
3248
3249 result = PrintableStringResult::UnknownSection;
3250 forEachSection(^(const MachOAnalyzer::SectionInfo &sectInfo, bool malformedSectionRange, bool &stop) {
3251 if ( stringVMAddr < sectInfo.sectAddr ) {
3252 return;
3253 }
3254 if ( stringVMAddr >= ( sectInfo.sectAddr + sectInfo.sectSize) ) {
3255 return;
3256 }
3257
3258 // We can't scan this section if its protected or not cstrings.
3259 if ( sectInfo.segInfo.isProtected || ( (sectInfo.sectFlags & SECTION_TYPE) != S_CSTRING_LITERALS ) ) {
3260 result = PrintableStringResult::ProtectedSection;
3261 stop = true;
3262 return;
3263 }
3264
3265 // We can't scan this section if it overlaps with the fairplay range
3266 if ( fairplayTextOffsetEnd < sectInfo.sectFileOffset ) {
3267 // Fairplay range ends before section
3268 } else if ( fairplayTextOffsetStart > (sectInfo.sectFileOffset + sectInfo.sectSize) ) {
3269 // Fairplay range starts after section
3270 } else {
3271 // Must overlap
3272 result = PrintableStringResult::FairPlayEncrypted;
3273 stop = true;
3274 return;
3275 }
3276
3277 // The section handler may also reject this section
3278 if ( sectionHandler != nullptr ) {
3279 if (!sectionHandler(sectInfo)) {
3280 result = PrintableStringResult::UnknownSection;
3281 stop = true;
3282 return;
3283 }
3284 }
3285 // Cache this section for later.
3286 if ( sectionCache != nullptr ) {
3287 sectionCache->sectionInfos.push_back(sectInfo);
3288 }
3289 result = PrintableStringResult::CanPrint;
3290 stop = true;
3291 });
3292
3293 if (result == PrintableStringResult::CanPrint)
3294 return (const char*)(stringVMAddr + getSlide());
3295 return nullptr;
3296 }
3297
3298 bool MachOAnalyzer::SectionCache::findSectionForVMAddr(uint64_t vmAddr, bool (^sectionHandler)(const SectionInfo& sectionInfo)) {
3299
3300 // Make sure the string is pointing in to one of the supported sections
3301 __block const dyld3::MachOAnalyzer::SectionInfo* foundSectionInfo = nullptr;
3302 for (const dyld3::MachOAnalyzer::SectionInfo& sectionInfo : sectionInfos) {
3303 if ( vmAddr < sectionInfo.sectAddr ) {
3304 continue;
3305 }
3306 if ( vmAddr >= ( sectionInfo.sectAddr + sectionInfo.sectSize) ) {
3307 continue;
3308 }
3309 foundSectionInfo = &sectionInfo;
3310 break;
3311 }
3312
3313 if ( foundSectionInfo != nullptr ) {
3314 // The section handler may also reject this section
3315 if ( sectionHandler != nullptr ) {
3316 if (!sectionHandler(*foundSectionInfo)) {
3317 return nullptr;
3318 }
3319 }
3320
3321 // Found a section, so return true
3322 return true;
3323 }
3324
3325 // If the name isn't in the cache then find the section its in
3326
3327 uint32_t fairplayTextOffsetStart;
3328 uint32_t fairplayTextOffsetEnd;
3329 uint32_t fairplaySize;
3330 if ( ma->isFairPlayEncrypted(fairplayTextOffsetStart, fairplaySize) ) {
3331 fairplayTextOffsetEnd = fairplayTextOffsetStart + fairplaySize;
3332 } else {
3333 fairplayTextOffsetEnd = 0;
3334 }
3335
3336 __block bool foundValidSection = false;
3337 ma->forEachSection(^(const MachOAnalyzer::SectionInfo &sectInfo, bool malformedSectionRange, bool &stop) {
3338 if ( vmAddr < sectInfo.sectAddr ) {
3339 return;
3340 }
3341 if ( vmAddr >= ( sectInfo.sectAddr + sectInfo.sectSize) ) {
3342 return;
3343 }
3344
3345 // We can't scan this section if it overlaps with the fairplay range
3346 if ( fairplayTextOffsetEnd < sectInfo.sectFileOffset ) {
3347 // Fairplay range ends before section
3348 } else if ( fairplayTextOffsetStart > (sectInfo.sectFileOffset + sectInfo.sectSize) ) {
3349 // Fairplay range starts after section
3350 } else {
3351 // Must overlap
3352 stop = true;
3353 return;
3354 }
3355
3356 // The section handler may also reject this section
3357 if ( sectionHandler != nullptr ) {
3358 if (!sectionHandler(sectInfo)) {
3359 stop = true;
3360 return;
3361 }
3362 }
3363 // Cache this section for later.
3364 sectionInfos.push_back(sectInfo);
3365 foundValidSection = true;
3366 stop = true;
3367 });
3368
3369 return foundValidSection;
3370 }
3371
3372 void MachOAnalyzer::forEachObjCClass(Diagnostics& diag, bool contentRebased,
3373 void (^handler)(Diagnostics& diag, uint64_t classVMAddr,
3374 uint64_t classSuperclassVMAddr, uint64_t classDataVMAddr,
3375 const ObjCClassInfo& objcClass, bool isMetaClass)) const {
3376 const uint64_t ptrSize = pointerSize();
3377 intptr_t slide = getSlide();
3378
3379 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3380 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3381 vmAddrConverter.slide = slide;
3382 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3383 vmAddrConverter.contentRebased = contentRebased;
3384
3385 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3386 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3387 return;
3388 if ( strcmp(sectInfo.sectName, "__objc_classlist") != 0 )
3389 return;
3390 const uint8_t* classList = (uint8_t*)(sectInfo.sectAddr + slide);
3391 uint64_t classListSize = sectInfo.sectSize;
3392
3393 if ( (classListSize % ptrSize) != 0 ) {
3394 diag.error("Invalid objc class section size");
3395 return;
3396 }
3397
3398 if ( ptrSize == 8 ) {
3399 typedef uint64_t PtrTy;
3400 struct objc_class_t {
3401 uint64_t isaVMAddr;
3402 uint64_t superclassVMAddr;
3403 uint64_t methodCacheBuckets;
3404 uint64_t methodCacheProperties;
3405 uint64_t dataVMAddrAndFastFlags;
3406 };
3407 // This matches "struct TargetClassMetadata" from Metadata.h in Swift
3408 struct swift_class_metadata_t : objc_class_t {
3409 uint32_t swiftClassFlags;
3410 };
3411 enum : uint64_t {
3412 FAST_DATA_MASK = 0x00007ffffffffff8ULL
3413 };
3414 for (uint64_t i = 0; i != classListSize; i += sizeof(PtrTy)) {
3415 uint64_t classVMAddr = convertToVMAddr(*(PtrTy*)(classList + i), vmAddrConverter);
3416 uint64_t classSuperclassVMAddr = classVMAddr + offsetof(objc_class_t, superclassVMAddr);
3417 uint64_t classDataVMAddr = classVMAddr + offsetof(objc_class_t, dataVMAddrAndFastFlags);
3418
3419 // First call the handler on the class
3420 const objc_class_t* classPtr = (const objc_class_t*)(classVMAddr + slide);
3421 const swift_class_metadata_t* swiftClassPtr = (const swift_class_metadata_t*)classPtr;
3422 ObjCClassInfo objcClass;
3423 objcClass.isaVMAddr = convertToVMAddr(classPtr->isaVMAddr, vmAddrConverter);
3424 objcClass.superclassVMAddr = convertToVMAddr(classPtr->superclassVMAddr, vmAddrConverter);
3425 objcClass.dataVMAddr = convertToVMAddr(classPtr->dataVMAddrAndFastFlags, vmAddrConverter) & FAST_DATA_MASK;
3426 objcClass.vmAddrConverter = vmAddrConverter;
3427 objcClass.isSwiftLegacy = classPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_LEGACY;
3428 objcClass.isSwiftStable = classPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_STABLE;
3429 // The Swift class flags are only present if the class is swift
3430 objcClass.swiftClassFlags = (objcClass.isSwiftLegacy || objcClass.isSwiftStable) ? swiftClassPtr->swiftClassFlags : 0;
3431 handler(diag, classVMAddr, classSuperclassVMAddr, classDataVMAddr, objcClass, false);
3432 if (diag.hasError())
3433 return;
3434
3435 // Then call it on the metaclass
3436 const objc_class_t* metaClassPtr = (const objc_class_t*)(objcClass.isaVMAddr + slide);
3437 const swift_class_metadata_t* swiftMetaClassPtr = (const swift_class_metadata_t*)metaClassPtr;
3438 ObjCClassInfo objcMetaClass;
3439 objcMetaClass.isaVMAddr = convertToVMAddr(metaClassPtr->isaVMAddr, vmAddrConverter);
3440 objcMetaClass.superclassVMAddr = convertToVMAddr(metaClassPtr->superclassVMAddr, vmAddrConverter);
3441 objcMetaClass.dataVMAddr = convertToVMAddr(metaClassPtr->dataVMAddrAndFastFlags, vmAddrConverter) & FAST_DATA_MASK;
3442 objcMetaClass.vmAddrConverter = vmAddrConverter;
3443 objcMetaClass.isSwiftLegacy = metaClassPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_LEGACY;
3444 objcMetaClass.isSwiftStable = metaClassPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_STABLE;
3445 // The Swift class flags are only present if the class is swift
3446 objcMetaClass.swiftClassFlags = (objcMetaClass.isSwiftLegacy || objcMetaClass.isSwiftStable) ? swiftMetaClassPtr->swiftClassFlags : 0;
3447 classSuperclassVMAddr = objcClass.isaVMAddr + offsetof(objc_class_t, superclassVMAddr);
3448 classDataVMAddr = objcClass.isaVMAddr + offsetof(objc_class_t, dataVMAddrAndFastFlags);
3449 handler(diag, classVMAddr, classSuperclassVMAddr, classDataVMAddr, objcMetaClass, true);
3450 if (diag.hasError())
3451 return;
3452 }
3453 } else {
3454 typedef uint32_t PtrTy;
3455 struct objc_class_t {
3456 uint32_t isaVMAddr;
3457 uint32_t superclassVMAddr;
3458 uint32_t methodCacheBuckets;
3459 uint32_t methodCacheProperties;
3460 uint32_t dataVMAddrAndFastFlags;
3461 };
3462 // This matches "struct TargetClassMetadata" from Metadata.h in Swift
3463 struct swift_class_metadata_t : objc_class_t {
3464 uint32_t swiftClassFlags;
3465 };
3466 enum : uint32_t {
3467 FAST_DATA_MASK = 0xfffffffcUL
3468 };
3469 for (uint64_t i = 0; i != classListSize; i += sizeof(PtrTy)) {
3470 uint64_t classVMAddr = convertToVMAddr(*(PtrTy*)(classList + i), vmAddrConverter);
3471 uint64_t classSuperclassVMAddr = classVMAddr + offsetof(objc_class_t, superclassVMAddr);
3472 uint64_t classDataVMAddr = classVMAddr + offsetof(objc_class_t, dataVMAddrAndFastFlags);
3473
3474 // First call the handler on the class
3475 const objc_class_t* classPtr = (const objc_class_t*)(classVMAddr + slide);
3476 const swift_class_metadata_t* swiftClassPtr = (const swift_class_metadata_t*)classPtr;
3477 ObjCClassInfo objcClass;
3478 objcClass.isaVMAddr = convertToVMAddr(classPtr->isaVMAddr, vmAddrConverter);
3479 objcClass.superclassVMAddr = convertToVMAddr(classPtr->superclassVMAddr, vmAddrConverter);
3480 objcClass.dataVMAddr = convertToVMAddr(classPtr->dataVMAddrAndFastFlags, vmAddrConverter) & FAST_DATA_MASK;
3481 objcClass.vmAddrConverter = vmAddrConverter;
3482 objcClass.isSwiftLegacy = classPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_LEGACY;
3483 objcClass.isSwiftStable = classPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_STABLE;
3484 // The Swift class flags are only present if the class is swift
3485 objcClass.swiftClassFlags = (objcClass.isSwiftLegacy || objcClass.isSwiftStable) ? swiftClassPtr->swiftClassFlags : 0;
3486 handler(diag, classVMAddr, classSuperclassVMAddr, classDataVMAddr, objcClass, false);
3487 if (diag.hasError())
3488 return;
3489
3490 // Then call it on the metaclass
3491 const objc_class_t* metaClassPtr = (const objc_class_t*)(objcClass.isaVMAddr + slide);
3492 const swift_class_metadata_t* swiftMetaClassPtr = (const swift_class_metadata_t*)metaClassPtr;
3493 ObjCClassInfo objcMetaClass;
3494 objcMetaClass.isaVMAddr = convertToVMAddr(metaClassPtr->isaVMAddr, vmAddrConverter);
3495 objcMetaClass.superclassVMAddr = convertToVMAddr(metaClassPtr->superclassVMAddr, vmAddrConverter);
3496 objcMetaClass.dataVMAddr = convertToVMAddr(metaClassPtr->dataVMAddrAndFastFlags, vmAddrConverter) & FAST_DATA_MASK;
3497 objcMetaClass.vmAddrConverter = vmAddrConverter;
3498 objcMetaClass.isSwiftLegacy = metaClassPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_LEGACY;
3499 objcMetaClass.isSwiftStable = metaClassPtr->dataVMAddrAndFastFlags & ObjCClassInfo::FAST_IS_SWIFT_STABLE;
3500 // The Swift class flags are only present if the class is swift
3501 objcMetaClass.swiftClassFlags = (objcMetaClass.isSwiftLegacy || objcMetaClass.isSwiftStable) ? swiftMetaClassPtr->swiftClassFlags : 0;
3502 classSuperclassVMAddr = objcClass.isaVMAddr + offsetof(objc_class_t, superclassVMAddr);
3503 classDataVMAddr = objcClass.isaVMAddr + offsetof(objc_class_t, dataVMAddrAndFastFlags);
3504 handler(diag, classVMAddr, classSuperclassVMAddr, classDataVMAddr, objcMetaClass, true);
3505 if (diag.hasError())
3506 return;
3507 }
3508 }
3509 });
3510 }
3511
3512 void MachOAnalyzer::forEachObjCCategory(Diagnostics& diag, bool contentRebased,
3513 void (^handler)(Diagnostics& diag, uint64_t categoryVMAddr,
3514 const dyld3::MachOAnalyzer::ObjCCategory& objcCategory)) const {
3515 const uint64_t ptrSize = pointerSize();
3516 intptr_t slide = getSlide();
3517
3518 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3519 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3520 vmAddrConverter.slide = slide;
3521 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3522 vmAddrConverter.contentRebased = contentRebased;
3523
3524 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3525 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3526 return;
3527 if ( strcmp(sectInfo.sectName, "__objc_catlist") != 0 )
3528 return;
3529 const uint8_t* categoryList = (uint8_t*)(sectInfo.sectAddr + slide);
3530 uint64_t categoryListSize = sectInfo.sectSize;
3531
3532 if ( (categoryListSize % ptrSize) != 0 ) {
3533 diag.error("Invalid objc category section size");
3534 return;
3535 }
3536
3537 if ( ptrSize == 8 ) {
3538 typedef uint64_t PtrTy;
3539 struct objc_category_t {
3540 PtrTy nameVMAddr;
3541 PtrTy clsVMAddr;
3542 PtrTy instanceMethodsVMAddr;
3543 PtrTy classMethodsVMAddr;
3544 PtrTy protocolsVMAddr;
3545 PtrTy instancePropertiesVMAddr;
3546 };
3547 for (uint64_t i = 0; i != categoryListSize; i += sizeof(PtrTy)) {
3548 uint64_t categoryVMAddr = convertToVMAddr(*(PtrTy*)(categoryList + i), vmAddrConverter);
3549
3550 const objc_category_t* categoryPtr = (const objc_category_t*)(categoryVMAddr + slide);
3551 ObjCCategory objCCategory;
3552 objCCategory.nameVMAddr = convertToVMAddr(categoryPtr->nameVMAddr, vmAddrConverter);
3553 objCCategory.clsVMAddr = convertToVMAddr(categoryPtr->clsVMAddr, vmAddrConverter);
3554 objCCategory.instanceMethodsVMAddr = convertToVMAddr(categoryPtr->instanceMethodsVMAddr, vmAddrConverter);
3555 objCCategory.classMethodsVMAddr = convertToVMAddr(categoryPtr->classMethodsVMAddr, vmAddrConverter);
3556 objCCategory.protocolsVMAddr = convertToVMAddr(categoryPtr->protocolsVMAddr, vmAddrConverter);
3557 objCCategory.instancePropertiesVMAddr = convertToVMAddr(categoryPtr->instancePropertiesVMAddr, vmAddrConverter);
3558 handler(diag, categoryVMAddr, objCCategory);
3559 if (diag.hasError())
3560 return;
3561 }
3562 } else {
3563 typedef uint32_t PtrTy;
3564 struct objc_category_t {
3565 PtrTy nameVMAddr;
3566 PtrTy clsVMAddr;
3567 PtrTy instanceMethodsVMAddr;
3568 PtrTy classMethodsVMAddr;
3569 PtrTy protocolsVMAddr;
3570 PtrTy instancePropertiesVMAddr;
3571 };
3572 for (uint64_t i = 0; i != categoryListSize; i += sizeof(PtrTy)) {
3573 uint64_t categoryVMAddr = convertToVMAddr(*(PtrTy*)(categoryList + i), vmAddrConverter);
3574
3575 const objc_category_t* categoryPtr = (const objc_category_t*)(categoryVMAddr + slide);
3576 ObjCCategory objCCategory;
3577 objCCategory.nameVMAddr = convertToVMAddr(categoryPtr->nameVMAddr, vmAddrConverter);
3578 objCCategory.clsVMAddr = convertToVMAddr(categoryPtr->clsVMAddr, vmAddrConverter);
3579 objCCategory.instanceMethodsVMAddr = convertToVMAddr(categoryPtr->instanceMethodsVMAddr, vmAddrConverter);
3580 objCCategory.classMethodsVMAddr = convertToVMAddr(categoryPtr->classMethodsVMAddr, vmAddrConverter);
3581 objCCategory.protocolsVMAddr = convertToVMAddr(categoryPtr->protocolsVMAddr, vmAddrConverter);
3582 objCCategory.instancePropertiesVMAddr = convertToVMAddr(categoryPtr->instancePropertiesVMAddr, vmAddrConverter);
3583 handler(diag, categoryVMAddr, objCCategory);
3584 if (diag.hasError())
3585 return;
3586 }
3587 }
3588 });
3589 }
3590
3591 void MachOAnalyzer::forEachObjCProtocol(Diagnostics& diag, bool contentRebased,
3592 void (^handler)(Diagnostics& diag, uint64_t categoryVMAddr,
3593 const dyld3::MachOAnalyzer::ObjCProtocol& objCProtocol)) const {
3594 const uint64_t ptrSize = pointerSize();
3595 intptr_t slide = getSlide();
3596
3597 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3598 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3599 vmAddrConverter.slide = slide;
3600 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3601 vmAddrConverter.contentRebased = contentRebased;
3602
3603 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3604 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3605 return;
3606 if ( strcmp(sectInfo.sectName, "__objc_protolist") != 0 )
3607 return;
3608 const uint8_t* protocolList = (uint8_t*)(sectInfo.sectAddr + slide);
3609 uint64_t protocolListSize = sectInfo.sectSize;
3610
3611 if ( (protocolListSize % ptrSize) != 0 ) {
3612 diag.error("Invalid objc protocol section size");
3613 return;
3614 }
3615
3616 if ( ptrSize == 8 ) {
3617 typedef uint64_t PtrTy;
3618 struct protocol_t {
3619 PtrTy isaVMAddr;
3620 PtrTy nameVMAddr;
3621 PtrTy protocolsVMAddr;
3622 PtrTy instanceMethodsVMAddr;
3623 PtrTy classMethodsVMAddr;
3624 PtrTy optionalInstanceMethodsVMAddr;
3625 PtrTy optionalClassMethodsVMAddr;
3626 PtrTy instancePropertiesVMAddr;
3627 uint32_t size;
3628 uint32_t flags;
3629 // Fields below this point are not always present on disk.
3630 PtrTy extendedMethodTypesVMAddr;
3631 PtrTy demangledNameVMAddr;
3632 PtrTy classPropertiesVMAddr;
3633 };
3634 for (uint64_t i = 0; i != protocolListSize; i += sizeof(PtrTy)) {
3635 uint64_t protocolVMAddr = convertToVMAddr(*(PtrTy*)(protocolList + i), vmAddrConverter);
3636
3637 const protocol_t* protocolPtr = (const protocol_t*)(protocolVMAddr + slide);
3638 ObjCProtocol objCProtocol;
3639 objCProtocol.isaVMAddr = convertToVMAddr(protocolPtr->isaVMAddr, vmAddrConverter);
3640 objCProtocol.nameVMAddr = convertToVMAddr(protocolPtr->nameVMAddr, vmAddrConverter);
3641 objCProtocol.instanceMethodsVMAddr = convertToVMAddr(protocolPtr->instanceMethodsVMAddr, vmAddrConverter);
3642 objCProtocol.classMethodsVMAddr = convertToVMAddr(protocolPtr->classMethodsVMAddr, vmAddrConverter);
3643 objCProtocol.optionalInstanceMethodsVMAddr = convertToVMAddr(protocolPtr->optionalInstanceMethodsVMAddr, vmAddrConverter);
3644 objCProtocol.optionalClassMethodsVMAddr = convertToVMAddr(protocolPtr->optionalClassMethodsVMAddr, vmAddrConverter);
3645
3646 // Track if this protocol needs a reallocation in objc
3647 objCProtocol.requiresObjCReallocation = protocolPtr->size < sizeof(protocol_t);
3648
3649 handler(diag, protocolVMAddr, objCProtocol);
3650 if (diag.hasError())
3651 return;
3652 }
3653 } else {
3654 typedef uint32_t PtrTy;
3655 struct protocol_t {
3656 PtrTy isaVMAddr;
3657 PtrTy nameVMAddr;
3658 PtrTy protocolsVMAddr;
3659 PtrTy instanceMethodsVMAddr;
3660 PtrTy classMethodsVMAddr;
3661 PtrTy optionalInstanceMethodsVMAddr;
3662 PtrTy optionalClassMethodsVMAddr;
3663 PtrTy instancePropertiesVMAddr;
3664 uint32_t size;
3665 uint32_t flags;
3666 // Fields below this point are not always present on disk.
3667 PtrTy extendedMethodTypesVMAddr;
3668 PtrTy demangledNameVMAddr;
3669 PtrTy classPropertiesVMAddr;
3670 };
3671 for (uint64_t i = 0; i != protocolListSize; i += sizeof(PtrTy)) {
3672 uint64_t protocolVMAddr = convertToVMAddr(*(PtrTy*)(protocolList + i), vmAddrConverter);
3673
3674 const protocol_t* protocolPtr = (const protocol_t*)(protocolVMAddr + slide);
3675 ObjCProtocol objCProtocol;
3676 objCProtocol.isaVMAddr = convertToVMAddr(protocolPtr->isaVMAddr, vmAddrConverter);
3677 objCProtocol.nameVMAddr = convertToVMAddr(protocolPtr->nameVMAddr, vmAddrConverter);
3678 objCProtocol.instanceMethodsVMAddr = convertToVMAddr(protocolPtr->instanceMethodsVMAddr, vmAddrConverter);
3679 objCProtocol.classMethodsVMAddr = convertToVMAddr(protocolPtr->classMethodsVMAddr, vmAddrConverter);
3680 objCProtocol.optionalInstanceMethodsVMAddr = convertToVMAddr(protocolPtr->optionalInstanceMethodsVMAddr, vmAddrConverter);
3681 objCProtocol.optionalClassMethodsVMAddr = convertToVMAddr(protocolPtr->optionalClassMethodsVMAddr, vmAddrConverter);
3682
3683 // Track if this protocol needs a reallocation in objc
3684 objCProtocol.requiresObjCReallocation = protocolPtr->size < sizeof(protocol_t);
3685
3686 handler(diag, protocolVMAddr, objCProtocol);
3687 if (diag.hasError())
3688 return;
3689 }
3690 }
3691 });
3692 }
3693
3694 void MachOAnalyzer::forEachObjCMethod(uint64_t methodListVMAddr, bool contentRebased,
3695 void (^handler)(uint64_t methodVMAddr, const ObjCMethod& method)) const {
3696 if ( methodListVMAddr == 0 )
3697 return;
3698
3699 const uint64_t ptrSize = pointerSize();
3700 intptr_t slide = getSlide();
3701
3702 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3703 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3704 vmAddrConverter.slide = slide;
3705 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3706 vmAddrConverter.contentRebased = contentRebased;
3707
3708 if ( ptrSize == 8 ) {
3709 typedef uint64_t PtrTy;
3710 struct method_list_t {
3711 uint32_t entsize;
3712 uint32_t count;
3713 PtrTy methodArrayBase; // Note this is the start the array method_t[0]
3714
3715 uint32_t getEntsize() const {
3716 return (entsize) & ~(uint32_t)3;
3717 }
3718 };
3719
3720 struct method_t {
3721 PtrTy nameVMAddr; // SEL
3722 PtrTy typesVMAddr; // const char *
3723 PtrTy impVMAddr; // IMP
3724 };
3725
3726 const method_list_t* methodList = (const method_list_t*)(methodListVMAddr + slide);
3727 uint64_t methodListArrayBaseVMAddr = methodListVMAddr + offsetof(method_list_t, methodArrayBase);
3728 for (unsigned i = 0; i != methodList->count; ++i) {
3729 uint64_t methodEntryOffset = i * methodList->getEntsize();
3730 uint64_t methodVMAddr = methodListArrayBaseVMAddr + methodEntryOffset;
3731 const method_t* methodPtr = (const method_t*)(methodVMAddr + slide);
3732 ObjCMethod method;
3733 method.nameVMAddr = convertToVMAddr(methodPtr->nameVMAddr, vmAddrConverter);
3734 method.typesVMAddr = convertToVMAddr(methodPtr->typesVMAddr, vmAddrConverter);
3735 method.impVMAddr = convertToVMAddr(methodPtr->impVMAddr, vmAddrConverter);
3736 method.nameLocationVMAddr = methodVMAddr + offsetof(method_t, nameVMAddr);
3737 handler(methodVMAddr, method);
3738 }
3739 } else {
3740 typedef uint32_t PtrTy;
3741 struct method_list_t {
3742 uint32_t entsize;
3743 uint32_t count;
3744 PtrTy methodArrayBase; // Note this is the start the array method_t[0]
3745
3746 uint32_t getEntsize() const {
3747 return (entsize) & ~(uint32_t)3;
3748 }
3749 };
3750
3751 struct method_t {
3752 PtrTy nameVMAddr; // SEL
3753 PtrTy typesVMAddr; // const char *
3754 PtrTy impVMAddr; // IMP
3755 };
3756
3757 const method_list_t* methodList = (const method_list_t*)(methodListVMAddr + slide);
3758 uint64_t methodListArrayBaseVMAddr = methodListVMAddr + offsetof(method_list_t, methodArrayBase);
3759 for (unsigned i = 0; i != methodList->count; ++i) {
3760 uint64_t methodEntryOffset = i * methodList->getEntsize();
3761 uint64_t methodVMAddr = methodListArrayBaseVMAddr + methodEntryOffset;
3762 const method_t* methodPtr = (const method_t*)(methodVMAddr + slide);
3763 ObjCMethod method;
3764 method.nameVMAddr = convertToVMAddr(methodPtr->nameVMAddr, vmAddrConverter);
3765 method.typesVMAddr = convertToVMAddr(methodPtr->typesVMAddr, vmAddrConverter);
3766 method.impVMAddr = convertToVMAddr(methodPtr->impVMAddr, vmAddrConverter);
3767 method.nameLocationVMAddr = methodVMAddr + offsetof(method_t, nameVMAddr);
3768 handler(methodVMAddr, method);
3769 }
3770 }
3771 }
3772
3773
3774 void MachOAnalyzer::forEachObjCSelectorReference(Diagnostics& diag, bool contentRebased,
3775 void (^handler)(uint64_t selRefVMAddr, uint64_t selRefTargetVMAddr)) const {
3776 const uint64_t ptrSize = pointerSize();
3777 intptr_t slide = getSlide();
3778
3779 MachOAnalyzer::VMAddrConverter vmAddrConverter;
3780 vmAddrConverter.preferredLoadAddress = preferredLoadAddress();
3781 vmAddrConverter.slide = slide;
3782 vmAddrConverter.chainedPointerFormat = hasChainedFixups() ? chainedPointerFormat() : 0;
3783 vmAddrConverter.contentRebased = contentRebased;
3784
3785 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3786 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3787 return;
3788 if ( strcmp(sectInfo.sectName, "__objc_selrefs") != 0 )
3789 return;
3790 uint64_t selRefSectionVMAddr = sectInfo.sectAddr;
3791 const uint8_t* selRefs = (uint8_t*)(selRefSectionVMAddr + slide);
3792 uint64_t selRefsSize = sectInfo.sectSize;
3793
3794 if ( (selRefsSize % ptrSize) != 0 ) {
3795 diag.error("Invalid sel ref section size");
3796 return;
3797 }
3798
3799 if ( ptrSize == 8 ) {
3800 typedef uint64_t PtrTy;
3801 for (uint64_t i = 0; i != selRefsSize; i += sizeof(PtrTy)) {
3802 uint64_t selRefVMAddr = selRefSectionVMAddr + i;
3803 uint64_t selRefTargetVMAddr = convertToVMAddr(*(PtrTy*)(selRefs + i), vmAddrConverter);
3804 handler(selRefVMAddr, selRefTargetVMAddr);
3805 if (diag.hasError()) {
3806 stop = true;
3807 return;
3808 }
3809 }
3810 } else {
3811 typedef uint32_t PtrTy;
3812 for (uint64_t i = 0; i != selRefsSize; i += sizeof(PtrTy)) {
3813 uint64_t selRefVMAddr = selRefSectionVMAddr + i;
3814 uint64_t selRefTargetVMAddr = convertToVMAddr(*(PtrTy*)(selRefs + i), vmAddrConverter);
3815 handler(selRefVMAddr, selRefTargetVMAddr);
3816 if (diag.hasError()) {
3817 stop = true;
3818 return;
3819 }
3820 }
3821 }
3822 });
3823 }
3824
3825 void MachOAnalyzer::forEachObjCMethodName(void (^handler)(const char* methodName)) const {
3826 intptr_t slide = getSlide();
3827 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3828 if ( strcmp(sectInfo.segInfo.segName, "__TEXT") != 0 )
3829 return;
3830 if ( strcmp(sectInfo.sectName, "__objc_methname") != 0 )
3831 return;
3832 if ( sectInfo.segInfo.isProtected || ( (sectInfo.sectFlags & SECTION_TYPE) != S_CSTRING_LITERALS ) ) {
3833 stop = true;
3834 return;
3835 }
3836 if ( malformedSectionRange ) {
3837 stop = true;
3838 return;
3839 }
3840
3841 const char* content = (const char*)(sectInfo.sectAddr + slide);
3842 uint64_t sectionSize = sectInfo.sectSize;
3843
3844 const char* s = (const char*)content;
3845 const char* end = s + sectionSize;
3846 while ( s < end ) {
3847 handler(s);
3848 s += strlen(s) + 1;
3849 }
3850 });
3851 }
3852
3853
3854 bool MachOAnalyzer::hasObjCMessageReferences() const {
3855
3856 __block bool foundSection = false;
3857 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3858 if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 )
3859 return;
3860 if ( strcmp(sectInfo.sectName, "__objc_msgrefs") != 0 )
3861 return;
3862 foundSection = true;
3863 stop = true;
3864 });
3865 return foundSection;
3866 }
3867
3868 const MachOAnalyzer::ObjCImageInfo* MachOAnalyzer::objcImageInfo() const {
3869 int64_t slide = getSlide();
3870
3871 __block bool foundInvalidObjCImageInfo = false;
3872 __block const ObjCImageInfo* imageInfo = nullptr;
3873 forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectionInfo, bool malformedSectionRange, bool& stop) {
3874 if ( strncmp(sectionInfo.segInfo.segName, "__DATA", 6) != 0 )
3875 return;
3876 if (strcmp(sectionInfo.sectName, "__objc_imageinfo") != 0)
3877 return;
3878 if ( malformedSectionRange ) {
3879 stop = true;
3880 return;
3881 }
3882 if ( sectionInfo.sectSize != 8 ) {
3883 stop = true;
3884 return;
3885 }
3886 imageInfo = (const ObjCImageInfo*)(sectionInfo.sectAddr + slide);
3887 if ( (imageInfo->flags & ObjCImageInfo::dyldPreoptimized) != 0 ) {
3888 foundInvalidObjCImageInfo = true;
3889 stop = true;
3890 return;
3891 }
3892 stop = true;
3893 });
3894 if ( foundInvalidObjCImageInfo )
3895 return nullptr;
3896 return imageInfo;
3897 }
3898
3899 uint32_t MachOAnalyzer::loadCommandsFreeSpace() const
3900 {
3901 __block uint32_t firstSectionFileOffset = 0;
3902 __block uint32_t firstSegmentFileOffset = 0;
3903 forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
3904 firstSectionFileOffset = sectInfo.sectFileOffset;
3905 firstSegmentFileOffset = (uint32_t)sectInfo.segInfo.fileOffset;
3906 stop = true;
3907 });
3908
3909 uint32_t headerSize = (this->magic == MH_MAGIC_64) ? sizeof(mach_header_64) : sizeof(mach_header);
3910 uint32_t existSpaceUsed = this->sizeofcmds + headerSize;
3911 return firstSectionFileOffset - firstSegmentFileOffset - existSpaceUsed;
3912 }
3913
3914
3915 } // dyld3
3916
3917