]> git.saurik.com Git - apple/ld64.git/blob - src/other/machochecker.cpp
4841c7ca0c4ed47c7a4b812f0c566c63b8073f73
[apple/ld64.git] / src / other / machochecker.cpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <sys/mman.h>
28 #include <stdarg.h>
29 #include <stdio.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 #include <errno.h>
33
34 #include <vector>
35 #include <set>
36 #include <ext/hash_set>
37
38 #include "MachOFileAbstraction.hpp"
39 #include "Architectures.hpp"
40
41
42 __attribute__((noreturn))
43 void throwf(const char* format, ...)
44 {
45 va_list list;
46 char* p;
47 va_start(list, format);
48 vasprintf(&p, format, list);
49 va_end(list);
50
51 const char* t = p;
52 throw t;
53 }
54
55 static uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end)
56 {
57 uint64_t result = 0;
58 int bit = 0;
59 do {
60 if (p == end)
61 throwf("malformed uleb128");
62
63 uint64_t slice = *p & 0x7f;
64
65 if (bit >= 64 || slice << bit >> bit != slice)
66 throwf("uleb128 too big");
67 else {
68 result |= (slice << bit);
69 bit += 7;
70 }
71 }
72 while (*p++ & 0x80);
73 return result;
74 }
75
76
77 static int64_t read_sleb128(const uint8_t*& p, const uint8_t* end)
78 {
79 int64_t result = 0;
80 int bit = 0;
81 uint8_t byte;
82 do {
83 if (p == end)
84 throwf("malformed sleb128");
85 byte = *p++;
86 result |= ((byte & 0x7f) << bit);
87 bit += 7;
88 } while (byte & 0x80);
89 // sign extend negative numbers
90 if ( (byte & 0x40) != 0 )
91 result |= (-1LL) << bit;
92 return result;
93 }
94
95
96 template <typename A>
97 class MachOChecker
98 {
99 public:
100 static bool validFile(const uint8_t* fileContent);
101 static MachOChecker<A>* make(const uint8_t* fileContent, uint32_t fileLength, const char* path)
102 { return new MachOChecker<A>(fileContent, fileLength, path); }
103 virtual ~MachOChecker() {}
104
105
106 private:
107 typedef typename A::P P;
108 typedef typename A::P::E E;
109 typedef typename A::P::uint_t pint_t;
110
111 class CStringEquals
112 {
113 public:
114 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
115 };
116
117 typedef __gnu_cxx::hash_set<const char*, __gnu_cxx::hash<const char*>, CStringEquals> StringSet;
118
119 MachOChecker(const uint8_t* fileContent, uint32_t fileLength, const char* path);
120 void checkMachHeader();
121 void checkLoadCommands();
122 void checkSection(const macho_segment_command<P>* segCmd, const macho_section<P>* sect);
123 uint8_t loadCommandSizeMask();
124 void checkSymbolTable();
125 void checkInitTerms();
126 void checkIndirectSymbolTable();
127 void checkRelocations();
128 void checkExternalReloation(const macho_relocation_info<P>* reloc);
129 void checkLocalReloation(const macho_relocation_info<P>* reloc);
130 pint_t relocBase();
131 bool addressInWritableSegment(pint_t address);
132 bool hasTextRelocInRange(pint_t start, pint_t end);
133 pint_t segStartAddress(uint8_t segIndex);
134 bool addressIsRebaseSite(pint_t addr);
135 bool addressIsBindingSite(pint_t addr);
136 pint_t getInitialStackPointer(const macho_thread_command<P>*);
137 pint_t getEntryPoint(const macho_thread_command<P>*);
138
139
140
141 const char* fPath;
142 const macho_header<P>* fHeader;
143 uint32_t fLength;
144 const char* fStrings;
145 const char* fStringsEnd;
146 const macho_nlist<P>* fSymbols;
147 uint32_t fSymbolCount;
148 const macho_dysymtab_command<P>* fDynamicSymbolTable;
149 const uint32_t* fIndirectTable;
150 uint32_t fIndirectTableCount;
151 const macho_relocation_info<P>* fLocalRelocations;
152 uint32_t fLocalRelocationsCount;
153 const macho_relocation_info<P>* fExternalRelocations;
154 uint32_t fExternalRelocationsCount;
155 bool fWriteableSegmentWithAddrOver4G;
156 bool fSlidableImage;
157 const macho_segment_command<P>* fFirstSegment;
158 const macho_segment_command<P>* fFirstWritableSegment;
159 const macho_segment_command<P>* fTEXTSegment;
160 const macho_dyld_info_command<P>* fDyldInfo;
161 uint32_t fSectionCount;
162 std::vector<const macho_segment_command<P>*>fSegments;
163 };
164
165
166
167 template <>
168 bool MachOChecker<ppc>::validFile(const uint8_t* fileContent)
169 {
170 const macho_header<P>* header = (const macho_header<P>*)fileContent;
171 if ( header->magic() != MH_MAGIC )
172 return false;
173 if ( header->cputype() != CPU_TYPE_POWERPC )
174 return false;
175 switch (header->filetype()) {
176 case MH_EXECUTE:
177 case MH_DYLIB:
178 case MH_BUNDLE:
179 case MH_DYLINKER:
180 return true;
181 }
182 return false;
183 }
184
185 template <>
186 bool MachOChecker<ppc64>::validFile(const uint8_t* fileContent)
187 {
188 const macho_header<P>* header = (const macho_header<P>*)fileContent;
189 if ( header->magic() != MH_MAGIC_64 )
190 return false;
191 if ( header->cputype() != CPU_TYPE_POWERPC64 )
192 return false;
193 switch (header->filetype()) {
194 case MH_EXECUTE:
195 case MH_DYLIB:
196 case MH_BUNDLE:
197 case MH_DYLINKER:
198 return true;
199 }
200 return false;
201 }
202
203 template <>
204 bool MachOChecker<x86>::validFile(const uint8_t* fileContent)
205 {
206 const macho_header<P>* header = (const macho_header<P>*)fileContent;
207 if ( header->magic() != MH_MAGIC )
208 return false;
209 if ( header->cputype() != CPU_TYPE_I386 )
210 return false;
211 switch (header->filetype()) {
212 case MH_EXECUTE:
213 case MH_DYLIB:
214 case MH_BUNDLE:
215 case MH_DYLINKER:
216 return true;
217 }
218 return false;
219 }
220
221 template <>
222 bool MachOChecker<x86_64>::validFile(const uint8_t* fileContent)
223 {
224 const macho_header<P>* header = (const macho_header<P>*)fileContent;
225 if ( header->magic() != MH_MAGIC_64 )
226 return false;
227 if ( header->cputype() != CPU_TYPE_X86_64 )
228 return false;
229 switch (header->filetype()) {
230 case MH_EXECUTE:
231 case MH_DYLIB:
232 case MH_BUNDLE:
233 case MH_DYLINKER:
234 return true;
235 }
236 return false;
237 }
238
239 template <>
240 bool MachOChecker<arm>::validFile(const uint8_t* fileContent)
241 {
242 const macho_header<P>* header = (const macho_header<P>*)fileContent;
243 if ( header->magic() != MH_MAGIC )
244 return false;
245 if ( header->cputype() != CPU_TYPE_ARM )
246 return false;
247 switch (header->filetype()) {
248 case MH_EXECUTE:
249 case MH_DYLIB:
250 case MH_BUNDLE:
251 case MH_DYLINKER:
252 return true;
253 }
254 return false;
255 }
256
257 template <> uint8_t MachOChecker<ppc>::loadCommandSizeMask() { return 0x03; }
258 template <> uint8_t MachOChecker<ppc64>::loadCommandSizeMask() { return 0x07; }
259 template <> uint8_t MachOChecker<x86>::loadCommandSizeMask() { return 0x03; }
260 template <> uint8_t MachOChecker<x86_64>::loadCommandSizeMask() { return 0x07; }
261 template <> uint8_t MachOChecker<arm>::loadCommandSizeMask() { return 0x03; }
262
263
264 template <>
265 ppc::P::uint_t MachOChecker<ppc>::getInitialStackPointer(const macho_thread_command<ppc::P>* threadInfo)
266 {
267 return threadInfo->thread_register(3);
268 }
269
270 template <>
271 ppc64::P::uint_t MachOChecker<ppc64>::getInitialStackPointer(const macho_thread_command<ppc64::P>* threadInfo)
272 {
273 return threadInfo->thread_register(3);
274 }
275
276 template <>
277 x86::P::uint_t MachOChecker<x86>::getInitialStackPointer(const macho_thread_command<x86::P>* threadInfo)
278 {
279 return threadInfo->thread_register(7);
280 }
281
282 template <>
283 x86_64::P::uint_t MachOChecker<x86_64>::getInitialStackPointer(const macho_thread_command<x86_64::P>* threadInfo)
284 {
285 return threadInfo->thread_register(7);
286 }
287
288 template <>
289 arm::P::uint_t MachOChecker<arm>::getInitialStackPointer(const macho_thread_command<arm::P>* threadInfo)
290 {
291 return threadInfo->thread_register(13);
292 }
293
294
295
296
297
298 template <>
299 ppc::P::uint_t MachOChecker<ppc>::getEntryPoint(const macho_thread_command<ppc::P>* threadInfo)
300 {
301 return threadInfo->thread_register(0);
302 }
303
304 template <>
305 ppc64::P::uint_t MachOChecker<ppc64>::getEntryPoint(const macho_thread_command<ppc64::P>* threadInfo)
306 {
307 return threadInfo->thread_register(0);
308 }
309
310 template <>
311 x86::P::uint_t MachOChecker<x86>::getEntryPoint(const macho_thread_command<x86::P>* threadInfo)
312 {
313 return threadInfo->thread_register(10);
314 }
315
316 template <>
317 x86_64::P::uint_t MachOChecker<x86_64>::getEntryPoint(const macho_thread_command<x86_64::P>* threadInfo)
318 {
319 return threadInfo->thread_register(16);
320 }
321
322 template <>
323 arm::P::uint_t MachOChecker<arm>::getEntryPoint(const macho_thread_command<arm::P>* threadInfo)
324 {
325 return threadInfo->thread_register(15);
326 }
327
328
329 template <typename A>
330 MachOChecker<A>::MachOChecker(const uint8_t* fileContent, uint32_t fileLength, const char* path)
331 : fHeader(NULL), fLength(fileLength), fStrings(NULL), fSymbols(NULL), fSymbolCount(0), fDynamicSymbolTable(NULL), fIndirectTableCount(0),
332 fLocalRelocations(NULL), fLocalRelocationsCount(0), fExternalRelocations(NULL), fExternalRelocationsCount(0),
333 fWriteableSegmentWithAddrOver4G(false), fSlidableImage(false), fFirstSegment(NULL), fFirstWritableSegment(NULL),
334 fTEXTSegment(NULL), fDyldInfo(NULL), fSectionCount(0)
335 {
336 // sanity check
337 if ( ! validFile(fileContent) )
338 throw "not a mach-o file that can be checked";
339
340 fPath = strdup(path);
341 fHeader = (const macho_header<P>*)fileContent;
342
343 // sanity check header
344 checkMachHeader();
345
346 // check load commands
347 checkLoadCommands();
348
349 checkIndirectSymbolTable();
350
351 checkRelocations();
352
353 checkSymbolTable();
354
355 checkInitTerms();
356 }
357
358
359 template <typename A>
360 void MachOChecker<A>::checkMachHeader()
361 {
362 if ( (fHeader->sizeofcmds() + sizeof(macho_header<P>)) > fLength )
363 throw "sizeofcmds in mach_header is larger than file";
364
365 uint32_t flags = fHeader->flags();
366 const uint32_t invalidBits = MH_INCRLINK | MH_LAZY_INIT | 0xFE000000;
367 if ( flags & invalidBits )
368 throw "invalid bits in mach_header flags";
369 if ( (flags & MH_NO_REEXPORTED_DYLIBS) && (fHeader->filetype() != MH_DYLIB) )
370 throw "MH_NO_REEXPORTED_DYLIBS bit of mach_header flags only valid for dylibs";
371
372 switch ( fHeader->filetype() ) {
373 case MH_EXECUTE:
374 fSlidableImage = ( flags & MH_PIE );
375 break;
376 case MH_DYLIB:
377 case MH_BUNDLE:
378 fSlidableImage = true;
379 break;
380 default:
381 throw "not a mach-o file type supported by this tool";
382 }
383 }
384
385 template <typename A>
386 void MachOChecker<A>::checkLoadCommands()
387 {
388 // check that all load commands fit within the load command space file
389 const macho_encryption_info_command<P>* encryption_info = NULL;
390 const macho_thread_command<P>* threadInfo = NULL;
391 const uint8_t* const endOfFile = (uint8_t*)fHeader + fLength;
392 const uint8_t* const endOfLoadCommands = (uint8_t*)fHeader + sizeof(macho_header<P>) + fHeader->sizeofcmds();
393 const uint32_t cmd_count = fHeader->ncmds();
394 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
395 const macho_load_command<P>* cmd = cmds;
396 for (uint32_t i = 0; i < cmd_count; ++i) {
397 uint32_t size = cmd->cmdsize();
398 if ( (size & this->loadCommandSizeMask()) != 0 )
399 throwf("load command #%d has a unaligned size", i);
400 const uint8_t* endOfCmd = ((uint8_t*)cmd)+cmd->cmdsize();
401 if ( endOfCmd > endOfLoadCommands )
402 throwf("load command #%d extends beyond the end of the load commands", i);
403 if ( endOfCmd > endOfFile )
404 throwf("load command #%d extends beyond the end of the file", i);
405 switch ( cmd->cmd() ) {
406 case macho_segment_command<P>::CMD:
407 case LC_SYMTAB:
408 case LC_DYSYMTAB:
409 case LC_LOAD_DYLIB:
410 case LC_ID_DYLIB:
411 case LC_LOAD_DYLINKER:
412 case LC_ID_DYLINKER:
413 case macho_routines_command<P>::CMD:
414 case LC_SUB_FRAMEWORK:
415 case LC_SUB_CLIENT:
416 case LC_TWOLEVEL_HINTS:
417 case LC_PREBIND_CKSUM:
418 case LC_LOAD_WEAK_DYLIB:
419 case LC_LAZY_LOAD_DYLIB:
420 case LC_UUID:
421 case LC_REEXPORT_DYLIB:
422 case LC_SEGMENT_SPLIT_INFO:
423 case LC_CODE_SIGNATURE:
424 case LC_LOAD_UPWARD_DYLIB:
425 case LC_VERSION_MIN_MACOSX:
426 case LC_VERSION_MIN_IPHONEOS:
427 case LC_FUNCTION_STARTS:
428 case LC_RPATH:
429 break;
430 case LC_DYLD_INFO:
431 case LC_DYLD_INFO_ONLY:
432 fDyldInfo = (macho_dyld_info_command<P>*)cmd;
433 break;
434 case LC_ENCRYPTION_INFO:
435 encryption_info = (macho_encryption_info_command<P>*)cmd;
436 break;
437 case LC_SUB_UMBRELLA:
438 case LC_SUB_LIBRARY:
439 if ( fHeader->flags() & MH_NO_REEXPORTED_DYLIBS )
440 throw "MH_NO_REEXPORTED_DYLIBS bit of mach_header flags should not be set in an image with LC_SUB_LIBRARY or LC_SUB_UMBRELLA";
441 break;
442 case LC_UNIXTHREAD:
443 if ( fHeader->filetype() != MH_EXECUTE )
444 throw "LC_UNIXTHREAD can only be used in MH_EXECUTE file types";
445 threadInfo = (macho_thread_command<P>*)cmd;
446 break;
447 default:
448 throwf("load command #%d is an unknown kind 0x%X", i, cmd->cmd());
449 }
450 cmd = (const macho_load_command<P>*)endOfCmd;
451 }
452
453 // check segments
454 cmd = cmds;
455 std::vector<std::pair<pint_t, pint_t> > segmentAddressRanges;
456 std::vector<std::pair<pint_t, pint_t> > segmentFileOffsetRanges;
457 const macho_segment_command<P>* linkEditSegment = NULL;
458 const macho_segment_command<P>* stackSegment = NULL;
459 for (uint32_t i = 0; i < cmd_count; ++i) {
460 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
461 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
462 fSegments.push_back(segCmd);
463 if ( segCmd->cmdsize() != (sizeof(macho_segment_command<P>) + segCmd->nsects() * sizeof(macho_section_content<P>)) )
464 throw "invalid segment load command size";
465
466 // see if this overlaps another segment address range
467 uint64_t startAddr = segCmd->vmaddr();
468 uint64_t endAddr = startAddr + segCmd->vmsize();
469 for (typename std::vector<std::pair<pint_t, pint_t> >::iterator it = segmentAddressRanges.begin(); it != segmentAddressRanges.end(); ++it) {
470 if ( it->first < startAddr ) {
471 if ( it->second > startAddr )
472 throw "overlapping segment vm addresses";
473 }
474 else if ( it->first > startAddr ) {
475 if ( it->first < endAddr )
476 throw "overlapping segment vm addresses";
477 }
478 else {
479 throw "overlapping segment vm addresses";
480 }
481 segmentAddressRanges.push_back(std::make_pair<pint_t, pint_t>(startAddr, endAddr));
482 }
483 // see if this overlaps another segment file offset range
484 uint64_t startOffset = segCmd->fileoff();
485 uint64_t endOffset = startOffset + segCmd->filesize();
486 for (typename std::vector<std::pair<pint_t, pint_t> >::iterator it = segmentFileOffsetRanges.begin(); it != segmentFileOffsetRanges.end(); ++it) {
487 if ( it->first < startOffset ) {
488 if ( it->second > startOffset )
489 throw "overlapping segment file data";
490 }
491 else if ( it->first > startOffset ) {
492 if ( it->first < endOffset )
493 throw "overlapping segment file data";
494 }
495 else {
496 throw "overlapping segment file data";
497 }
498 segmentFileOffsetRanges.push_back(std::make_pair<pint_t, pint_t>(startOffset, endOffset));
499 // check is within file bounds
500 if ( (startOffset > fLength) || (endOffset > fLength) )
501 throw "segment file data is past end of file";
502 }
503 // verify it fits in file
504 if ( startOffset > fLength )
505 throw "segment fileoff does not fit in file";
506 if ( endOffset > fLength )
507 throw "segment fileoff+filesize does not fit in file";
508
509 // record special segments
510 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 )
511 linkEditSegment = segCmd;
512 else if ( strcmp(segCmd->segname(), "__UNIXSTACK") == 0 )
513 stackSegment = segCmd;
514
515 // cache interesting segments
516 if ( fFirstSegment == NULL )
517 fFirstSegment = segCmd;
518 if ( (fTEXTSegment == NULL) && (strcmp(segCmd->segname(), "__TEXT") == 0) )
519 fTEXTSegment = segCmd;
520 if ( (segCmd->initprot() & VM_PROT_WRITE) != 0 ) {
521 if ( fFirstWritableSegment == NULL )
522 fFirstWritableSegment = segCmd;
523 if ( segCmd->vmaddr() > 0x100000000ULL )
524 fWriteableSegmentWithAddrOver4G = true;
525 }
526
527 // check section ranges
528 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
529 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
530 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
531 // check all non-zero sized sections are within segment
532 if ( sect->addr() < startAddr )
533 throwf("section %s vm address not within segment", sect->sectname());
534 if ( (sect->addr()+sect->size()) > endAddr )
535 throwf("section %s vm address not within segment", sect->sectname());
536 if ( ((sect->flags() & SECTION_TYPE) != S_ZEROFILL)
537 && ((sect->flags() & SECTION_TYPE) != S_THREAD_LOCAL_ZEROFILL)
538 && (segCmd->filesize() != 0)
539 && (sect->size() != 0) ) {
540 if ( sect->offset() < startOffset )
541 throwf("section %s file offset not within segment", sect->sectname());
542 if ( (sect->offset()+sect->size()) > endOffset )
543 throwf("section %s file offset not within segment", sect->sectname());
544 }
545 checkSection(segCmd, sect);
546 ++fSectionCount;
547 }
548 }
549 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
550 }
551
552 // verify there was a LINKEDIT segment
553 if ( linkEditSegment == NULL )
554 throw "no __LINKEDIT segment";
555
556 // verify there was an executable __TEXT segment and load commands are in it
557 if ( fTEXTSegment == NULL )
558 throw "no __TEXT segment";
559 if ( fTEXTSegment->initprot() != (VM_PROT_READ|VM_PROT_EXECUTE) )
560 throw "__TEXT segment does not have r-x init permissions";
561 //if ( fTEXTSegment->maxprot() != (VM_PROT_READ|VM_PROT_EXECUTE|VM_PROT_WRITE) )
562 // throw "__TEXT segment does not have rwx max permissions";
563 if ( fTEXTSegment->fileoff() != 0 )
564 throw "__TEXT segment does not start at mach_header";
565 if ( fTEXTSegment->filesize() < (sizeof(macho_header<P>)+fHeader->sizeofcmds()) )
566 throw "__TEXT segment smaller than load commands";
567
568 // verify if custom stack used, that stack is in __UNIXSTACK segment
569 if ( threadInfo != NULL ) {
570 pint_t initialSP = getInitialStackPointer(threadInfo);
571 if ( initialSP != 0 ) {
572 if ( stackSegment == NULL )
573 throw "LC_UNIXTHREAD specifics custom initial stack pointer, but no __UNIXSTACK segment";
574 if ( (initialSP < stackSegment->vmaddr()) || (initialSP > (stackSegment->vmaddr()+stackSegment->vmsize())) )
575 throw "LC_UNIXTHREAD specifics custom initial stack pointer which does not point into __UNIXSTACK segment";
576 }
577 }
578
579 // verify __UNIXSTACK is zero fill
580 if ( stackSegment != NULL ) {
581 if ( (stackSegment->filesize() != 0) || (stackSegment->fileoff() != 0) )
582 throw "__UNIXSTACK is not a zero-fill segment";
583 if ( stackSegment->vmsize() < 4096 )
584 throw "__UNIXSTACK segment is too small";
585 }
586
587 // verify entry point is in __TEXT segment
588 if ( threadInfo != NULL ) {
589 pint_t initialPC = getEntryPoint(threadInfo);
590 if ( (initialPC < fTEXTSegment->vmaddr()) || (initialPC >= (fTEXTSegment->vmaddr()+fTEXTSegment->vmsize())) )
591 throwf("entry point 0x%0llX is outside __TEXT segment", (long long)initialPC);
592 }
593
594
595 // checks for executables
596 bool isStaticExecutable = false;
597 if ( fHeader->filetype() == MH_EXECUTE ) {
598 isStaticExecutable = true;
599 cmd = cmds;
600 for (uint32_t i = 0; i < cmd_count; ++i) {
601 switch ( cmd->cmd() ) {
602 case LC_LOAD_DYLINKER:
603 // the existence of a dyld load command makes a executable dynamic
604 isStaticExecutable = false;
605 break;
606 }
607 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
608 }
609 if ( isStaticExecutable ) {
610 if ( fHeader->flags() != MH_NOUNDEFS )
611 throw "invalid bits in mach_header flags for static executable";
612 }
613 }
614
615 // verify encryption info
616 if ( encryption_info != NULL ) {
617 if ( fHeader->filetype() != MH_EXECUTE )
618 throw "LC_ENCRYPTION_INFO load command is only legal in main executables";
619 if ( encryption_info->cryptoff() < (sizeof(macho_header<P>) + fHeader->sizeofcmds()) )
620 throw "LC_ENCRYPTION_INFO load command has cryptoff covers some load commands";
621 if ( (encryption_info->cryptoff() % 4096) != 0 )
622 throw "LC_ENCRYPTION_INFO load command has cryptoff which is not page aligned";
623 if ( (encryption_info->cryptsize() % 4096) != 0 )
624 throw "LC_ENCRYPTION_INFO load command has cryptsize which is not page sized";
625 for (typename std::vector<std::pair<pint_t, pint_t> >::iterator it = segmentFileOffsetRanges.begin();
626 it != segmentFileOffsetRanges.end(); ++it) {
627 if ( (it->first <= encryption_info->cryptoff()) && (encryption_info->cryptoff() < it->second) ) {
628 if ( (encryption_info->cryptoff() + encryption_info->cryptsize()) > it->second )
629 throw "LC_ENCRYPTION_INFO load command is not contained within one segment";
630 }
631 }
632 }
633
634 // check LC_SYMTAB, LC_DYSYMTAB, and LC_SEGMENT_SPLIT_INFO
635 cmd = cmds;
636 bool foundDynamicSymTab = false;
637 for (uint32_t i = 0; i < cmd_count; ++i) {
638 switch ( cmd->cmd() ) {
639 case LC_SYMTAB:
640 {
641 const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
642 fSymbolCount = symtab->nsyms();
643 fSymbols = (const macho_nlist<P>*)((char*)fHeader + symtab->symoff());
644 if ( symtab->symoff() < linkEditSegment->fileoff() )
645 throw "symbol table not in __LINKEDIT";
646 if ( (symtab->symoff() + fSymbolCount*sizeof(macho_nlist<P>*)) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
647 throw "symbol table end not in __LINKEDIT";
648 if ( (symtab->symoff() % sizeof(pint_t)) != 0 )
649 throw "symbol table start not pointer aligned";
650 fStrings = (char*)fHeader + symtab->stroff();
651 fStringsEnd = fStrings + symtab->strsize();
652 if ( symtab->stroff() < linkEditSegment->fileoff() )
653 throw "string pool not in __LINKEDIT";
654 if ( (symtab->stroff()+symtab->strsize()) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
655 throw "string pool extends beyond __LINKEDIT";
656 if ( (symtab->stroff() % 4) != 0 ) // work around until rdar://problem/4737991 is fixed
657 throw "string pool start not pointer aligned";
658 if ( (symtab->strsize() % sizeof(pint_t)) != 0 )
659 throw "string pool size not a multiple of pointer size";
660 }
661 break;
662 case LC_DYSYMTAB:
663 {
664 if ( isStaticExecutable )
665 throw "LC_DYSYMTAB should not be used in static executable";
666 foundDynamicSymTab = true;
667 fDynamicSymbolTable = (macho_dysymtab_command<P>*)cmd;
668 fIndirectTable = (uint32_t*)((char*)fHeader + fDynamicSymbolTable->indirectsymoff());
669 fIndirectTableCount = fDynamicSymbolTable->nindirectsyms();
670 if ( fIndirectTableCount != 0 ) {
671 if ( fDynamicSymbolTable->indirectsymoff() < linkEditSegment->fileoff() )
672 throw "indirect symbol table not in __LINKEDIT";
673 if ( (fDynamicSymbolTable->indirectsymoff()+fIndirectTableCount*8) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
674 throw "indirect symbol table not in __LINKEDIT";
675 if ( (fDynamicSymbolTable->indirectsymoff() % sizeof(pint_t)) != 0 )
676 throw "indirect symbol table not pointer aligned";
677 }
678 fLocalRelocationsCount = fDynamicSymbolTable->nlocrel();
679 if ( fLocalRelocationsCount != 0 ) {
680 fLocalRelocations = (const macho_relocation_info<P>*)((char*)fHeader + fDynamicSymbolTable->locreloff());
681 if ( fDynamicSymbolTable->locreloff() < linkEditSegment->fileoff() )
682 throw "local relocations not in __LINKEDIT";
683 if ( (fDynamicSymbolTable->locreloff()+fLocalRelocationsCount*sizeof(macho_relocation_info<P>)) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
684 throw "local relocations not in __LINKEDIT";
685 if ( (fDynamicSymbolTable->locreloff() % sizeof(pint_t)) != 0 )
686 throw "local relocations table not pointer aligned";
687 }
688 fExternalRelocationsCount = fDynamicSymbolTable->nextrel();
689 if ( fExternalRelocationsCount != 0 ) {
690 fExternalRelocations = (const macho_relocation_info<P>*)((char*)fHeader + fDynamicSymbolTable->extreloff());
691 if ( fDynamicSymbolTable->extreloff() < linkEditSegment->fileoff() )
692 throw "external relocations not in __LINKEDIT";
693 if ( (fDynamicSymbolTable->extreloff()+fExternalRelocationsCount*sizeof(macho_relocation_info<P>)) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
694 throw "external relocations not in __LINKEDIT";
695 if ( (fDynamicSymbolTable->extreloff() % sizeof(pint_t)) != 0 )
696 throw "external relocations table not pointer aligned";
697 }
698 }
699 break;
700 case LC_SEGMENT_SPLIT_INFO:
701 {
702 if ( isStaticExecutable )
703 throw "LC_SEGMENT_SPLIT_INFO should not be used in static executable";
704 const macho_linkedit_data_command<P>* info = (macho_linkedit_data_command<P>*)cmd;
705 if ( info->dataoff() < linkEditSegment->fileoff() )
706 throw "split seg info not in __LINKEDIT";
707 if ( (info->dataoff()+info->datasize()) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
708 throw "split seg info not in __LINKEDIT";
709 if ( (info->dataoff() % sizeof(pint_t)) != 0 )
710 throw "split seg info table not pointer aligned";
711 if ( (info->datasize() % sizeof(pint_t)) != 0 )
712 throw "split seg info size not a multiple of pointer size";
713 }
714 break;
715 case LC_FUNCTION_STARTS:
716 {
717 const macho_linkedit_data_command<P>* info = (macho_linkedit_data_command<P>*)cmd;
718 if ( info->dataoff() < linkEditSegment->fileoff() )
719 throw "function starts data not in __LINKEDIT";
720 if ( (info->dataoff()+info->datasize()) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
721 throw "function starts data not in __LINKEDIT";
722 if ( (info->dataoff() % sizeof(pint_t)) != 0 )
723 throw "function starts data table not pointer aligned";
724 if ( (info->datasize() % sizeof(pint_t)) != 0 )
725 throw "function starts data size not a multiple of pointer size";
726 }
727 break;
728 }
729 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
730 }
731 if ( !isStaticExecutable && !foundDynamicSymTab )
732 throw "missing dynamic symbol table";
733 if ( fStrings == NULL )
734 throw "missing symbol table";
735
736 }
737
738 template <typename A>
739 void MachOChecker<A>::checkSection(const macho_segment_command<P>* segCmd, const macho_section<P>* sect)
740 {
741 uint8_t sectionType = (sect->flags() & SECTION_TYPE);
742 if ( sectionType == S_ZEROFILL ) {
743 if ( sect->offset() != 0 )
744 throwf("section offset should be zero for zero-fill section %s", sect->sectname());
745 }
746
747 // check section's segment name matches segment
748 // if ( strncmp(sect->segname(), segCmd->segname(), 16) != 0 )
749 // throwf("section %s in segment %s has wrong segment name", sect->sectname(), segCmd->segname());
750
751 // more section tests here
752 }
753
754
755
756
757 template <typename A>
758 void MachOChecker<A>::checkIndirectSymbolTable()
759 {
760 // static executables don't have indirect symbol table
761 if ( fDynamicSymbolTable == NULL )
762 return;
763 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
764 const uint32_t cmd_count = fHeader->ncmds();
765 const macho_load_command<P>* cmd = cmds;
766 for (uint32_t i = 0; i < cmd_count; ++i) {
767 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
768 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
769 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
770 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
771 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
772 // make sure all magic sections that use indirect symbol table fit within it
773 uint32_t start = 0;
774 uint32_t elementSize = 0;
775 switch ( sect->flags() & SECTION_TYPE ) {
776 case S_SYMBOL_STUBS:
777 elementSize = sect->reserved2();
778 start = sect->reserved1();
779 break;
780 case S_LAZY_SYMBOL_POINTERS:
781 case S_NON_LAZY_SYMBOL_POINTERS:
782 elementSize = sizeof(pint_t);
783 start = sect->reserved1();
784 break;
785 }
786 if ( elementSize != 0 ) {
787 uint32_t count = sect->size() / elementSize;
788 if ( (count*elementSize) != sect->size() )
789 throwf("%s section size is not an even multiple of element size", sect->sectname());
790 if ( (start+count) > fIndirectTableCount )
791 throwf("%s section references beyond end of indirect symbol table (%d > %d)", sect->sectname(), start+count, fIndirectTableCount );
792 }
793 }
794 }
795 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
796 }
797 }
798
799
800
801
802 template <typename A>
803 void MachOChecker<A>::checkSymbolTable()
804 {
805 // verify no duplicate external symbol names
806 if ( fDynamicSymbolTable != NULL ) {
807 StringSet externalNames;
808 const macho_nlist<P>* const exportedStart = &fSymbols[fDynamicSymbolTable->iextdefsym()];
809 const macho_nlist<P>* const exportedEnd = &exportedStart[fDynamicSymbolTable->nextdefsym()];
810 int i = fDynamicSymbolTable->iextdefsym();
811 for(const macho_nlist<P>* p = exportedStart; p < exportedEnd; ++p, ++i) {
812 const char* symName = &fStrings[p->n_strx()];
813 if ( symName > fStringsEnd )
814 throw "string index out of range";
815 //fprintf(stderr, "sym[%d] = %s\n", i, symName);
816 if ( externalNames.find(symName) != externalNames.end() )
817 throwf("duplicate external symbol: %s", symName);
818 if ( (p->n_type() & N_EXT) == 0 )
819 throwf("non-external symbol in external symbol range: %s", symName);
820 // don't add N_INDR to externalNames because there is likely an undefine with same name
821 if ( (p->n_type() & N_INDR) == 0 )
822 externalNames.insert(symName);
823 }
824 // verify no undefines with same name as an external symbol
825 const macho_nlist<P>* const undefinesStart = &fSymbols[fDynamicSymbolTable->iundefsym()];
826 const macho_nlist<P>* const undefinesEnd = &undefinesStart[fDynamicSymbolTable->nundefsym()];
827 for(const macho_nlist<P>* p = undefinesStart; p < undefinesEnd; ++p) {
828 const char* symName = &fStrings[p->n_strx()];
829 if ( symName > fStringsEnd )
830 throw "string index out of range";
831 if ( externalNames.find(symName) != externalNames.end() )
832 throwf("undefine with same name as external symbol: %s", symName);
833 }
834 // verify all N_SECT values are valid
835 for(const macho_nlist<P>* p = fSymbols; p < &fSymbols[fSymbolCount]; ++p) {
836 uint8_t type = p->n_type();
837 if ( ((type & N_STAB) == 0) && ((type & N_TYPE) == N_SECT) ) {
838 if ( p->n_sect() > fSectionCount ) {
839 throwf("symbol '%s' has n_sect=%d which is too large", &fStrings[p->n_strx()], p->n_sect());
840 }
841 }
842 }
843 }
844 }
845
846
847 template <typename A>
848 void MachOChecker<A>::checkInitTerms()
849 {
850 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
851 const uint32_t cmd_count = fHeader->ncmds();
852 const macho_load_command<P>* cmd = cmds;
853 for (uint32_t i = 0; i < cmd_count; ++i) {
854 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
855 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
856 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
857 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
858 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
859 // make sure all magic sections that use indirect symbol table fit within it
860 uint32_t count;
861 pint_t* arrayStart;
862 pint_t* arrayEnd;
863 const char* kind = "initializer";
864 switch ( sect->flags() & SECTION_TYPE ) {
865 case S_MOD_TERM_FUNC_POINTERS:
866 kind = "terminator";
867 // fall through
868 case S_MOD_INIT_FUNC_POINTERS:
869 count = sect->size() / sizeof(pint_t);
870 if ( (count*sizeof(pint_t)) != sect->size() )
871 throwf("%s section size is not an even multiple of element size", sect->sectname());
872 if ( (sect->addr() % sizeof(pint_t)) != 0 )
873 throwf("%s section size is not pointer size aligned", sect->sectname());
874 // check each pointer in array points within TEXT
875 arrayStart = (pint_t*)((char*)fHeader + sect->offset());
876 arrayEnd = (pint_t*)((char*)fHeader + sect->offset() + sect->size());
877 for (pint_t* p=arrayStart; p < arrayEnd; ++p) {
878 pint_t pointer = P::getP(*p);
879 if ( (pointer < fTEXTSegment->vmaddr()) || (pointer >= (fTEXTSegment->vmaddr()+fTEXTSegment->vmsize())) )
880 throwf("%s 0x%08llX points outside __TEXT segment", kind, (long long)pointer);
881 }
882 // check each pointer in array will be rebased and not bound
883 if ( fSlidableImage ) {
884 pint_t sectionBeginAddr = sect->addr();
885 pint_t sectionEndddr = sect->addr() + sect->size();
886 for(pint_t addr = sectionBeginAddr; addr < sectionEndddr; addr += sizeof(pint_t)) {
887 if ( addressIsBindingSite(addr) )
888 throwf("%s at 0x%0llX has binding to external symbol", kind, (long long)addr);
889 if ( ! addressIsRebaseSite(addr) )
890 throwf("%s at 0x%0llX is not rebased", kind, (long long)addr);
891 }
892 }
893 break;
894 }
895 }
896 }
897 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
898 }
899
900 }
901
902
903 template <>
904 ppc::P::uint_t MachOChecker<ppc>::relocBase()
905 {
906 if ( fHeader->flags() & MH_SPLIT_SEGS )
907 return fFirstWritableSegment->vmaddr();
908 else
909 return fFirstSegment->vmaddr();
910 }
911
912 template <>
913 ppc64::P::uint_t MachOChecker<ppc64>::relocBase()
914 {
915 if ( fWriteableSegmentWithAddrOver4G )
916 return fFirstWritableSegment->vmaddr();
917 else
918 return fFirstSegment->vmaddr();
919 }
920
921 template <>
922 x86::P::uint_t MachOChecker<x86>::relocBase()
923 {
924 if ( fHeader->flags() & MH_SPLIT_SEGS )
925 return fFirstWritableSegment->vmaddr();
926 else
927 return fFirstSegment->vmaddr();
928 }
929
930 template <>
931 x86_64::P::uint_t MachOChecker<x86_64>::relocBase()
932 {
933 // check for split-seg
934 return fFirstWritableSegment->vmaddr();
935 }
936
937 template <>
938 arm::P::uint_t MachOChecker<arm>::relocBase()
939 {
940 if ( fHeader->flags() & MH_SPLIT_SEGS )
941 return fFirstWritableSegment->vmaddr();
942 else
943 return fFirstSegment->vmaddr();
944 }
945
946
947 template <typename A>
948 bool MachOChecker<A>::addressInWritableSegment(pint_t address)
949 {
950 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
951 const uint32_t cmd_count = fHeader->ncmds();
952 const macho_load_command<P>* cmd = cmds;
953 for (uint32_t i = 0; i < cmd_count; ++i) {
954 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
955 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
956 if ( (address >= segCmd->vmaddr()) && (address < segCmd->vmaddr()+segCmd->vmsize()) ) {
957 // if segment is writable, we are fine
958 if ( (segCmd->initprot() & VM_PROT_WRITE) != 0 )
959 return true;
960 // could be a text reloc, make sure section bit is set
961 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
962 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
963 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
964 if ( (sect->addr() <= address) && (address < (sect->addr()+sect->size())) ) {
965 // found section for this address, if has relocs we are fine
966 return ( (sect->flags() & (S_ATTR_EXT_RELOC|S_ATTR_LOC_RELOC)) != 0 );
967 }
968 }
969 }
970 }
971 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
972 }
973 return false;
974 }
975
976
977 template <>
978 void MachOChecker<ppc>::checkExternalReloation(const macho_relocation_info<P>* reloc)
979 {
980 if ( reloc->r_length() != 2 )
981 throw "bad external relocation length";
982 if ( reloc->r_type() != GENERIC_RELOC_VANILLA )
983 throw "unknown external relocation type";
984 if ( reloc->r_pcrel() != 0 )
985 throw "bad external relocation pc_rel";
986 if ( reloc->r_extern() == 0 )
987 throw "local relocation found with external relocations";
988 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
989 throw "external relocation address not in writable segment";
990 // FIX: check r_symbol
991 }
992
993 template <>
994 void MachOChecker<ppc64>::checkExternalReloation(const macho_relocation_info<P>* reloc)
995 {
996 if ( reloc->r_length() != 3 )
997 throw "bad external relocation length";
998 if ( reloc->r_type() != GENERIC_RELOC_VANILLA )
999 throw "unknown external relocation type";
1000 if ( reloc->r_pcrel() != 0 )
1001 throw "bad external relocation pc_rel";
1002 if ( reloc->r_extern() == 0 )
1003 throw "local relocation found with external relocations";
1004 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1005 throw "external relocation address not in writable segment";
1006 // FIX: check r_symbol
1007 }
1008
1009 template <>
1010 void MachOChecker<x86>::checkExternalReloation(const macho_relocation_info<P>* reloc)
1011 {
1012 if ( reloc->r_length() != 2 )
1013 throw "bad external relocation length";
1014 if ( reloc->r_type() != GENERIC_RELOC_VANILLA )
1015 throw "unknown external relocation type";
1016 if ( reloc->r_pcrel() != 0 )
1017 throw "bad external relocation pc_rel";
1018 if ( reloc->r_extern() == 0 )
1019 throw "local relocation found with external relocations";
1020 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1021 throw "external relocation address not in writable segment";
1022 // FIX: check r_symbol
1023 }
1024
1025
1026 template <>
1027 void MachOChecker<x86_64>::checkExternalReloation(const macho_relocation_info<P>* reloc)
1028 {
1029 if ( reloc->r_length() != 3 )
1030 throw "bad external relocation length";
1031 if ( reloc->r_type() != X86_64_RELOC_UNSIGNED )
1032 throw "unknown external relocation type";
1033 if ( reloc->r_pcrel() != 0 )
1034 throw "bad external relocation pc_rel";
1035 if ( reloc->r_extern() == 0 )
1036 throw "local relocation found with external relocations";
1037 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1038 throw "exernal relocation address not in writable segment";
1039 // FIX: check r_symbol
1040 }
1041
1042 template <>
1043 void MachOChecker<arm>::checkExternalReloation(const macho_relocation_info<P>* reloc)
1044 {
1045 if ( reloc->r_length() != 2 )
1046 throw "bad external relocation length";
1047 if ( reloc->r_type() != ARM_RELOC_VANILLA )
1048 throw "unknown external relocation type";
1049 if ( reloc->r_pcrel() != 0 )
1050 throw "bad external relocation pc_rel";
1051 if ( reloc->r_extern() == 0 )
1052 throw "local relocation found with external relocations";
1053 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1054 throw "external relocation address not in writable segment";
1055 // FIX: check r_symbol
1056 }
1057
1058
1059 template <>
1060 void MachOChecker<ppc>::checkLocalReloation(const macho_relocation_info<P>* reloc)
1061 {
1062 if ( reloc->r_address() & R_SCATTERED ) {
1063 // scattered
1064 const macho_scattered_relocation_info<P>* sreloc = (const macho_scattered_relocation_info<P>*)reloc;
1065 // FIX
1066
1067 }
1068 else {
1069 // ignore pair relocs
1070 if ( reloc->r_type() == PPC_RELOC_PAIR )
1071 return;
1072 // FIX
1073 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1074 throwf("local relocation address 0x%08X not in writable segment", reloc->r_address());
1075 }
1076 }
1077
1078
1079 template <>
1080 void MachOChecker<ppc64>::checkLocalReloation(const macho_relocation_info<P>* reloc)
1081 {
1082 if ( reloc->r_length() != 3 )
1083 throw "bad local relocation length";
1084 if ( reloc->r_type() != GENERIC_RELOC_VANILLA )
1085 throw "unknown local relocation type";
1086 if ( reloc->r_pcrel() != 0 )
1087 throw "bad local relocation pc_rel";
1088 if ( reloc->r_extern() != 0 )
1089 throw "external relocation found with local relocations";
1090 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1091 throw "local relocation address not in writable segment";
1092 }
1093
1094 template <>
1095 void MachOChecker<x86>::checkLocalReloation(const macho_relocation_info<P>* reloc)
1096 {
1097 // FIX
1098 }
1099
1100 template <>
1101 void MachOChecker<x86_64>::checkLocalReloation(const macho_relocation_info<P>* reloc)
1102 {
1103 if ( reloc->r_length() != 3 )
1104 throw "bad local relocation length";
1105 if ( reloc->r_type() != X86_64_RELOC_UNSIGNED )
1106 throw "unknown local relocation type";
1107 if ( reloc->r_pcrel() != 0 )
1108 throw "bad local relocation pc_rel";
1109 if ( reloc->r_extern() != 0 )
1110 throw "external relocation found with local relocations";
1111 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1112 throw "local relocation address not in writable segment";
1113 }
1114
1115 template <>
1116 void MachOChecker<arm>::checkLocalReloation(const macho_relocation_info<P>* reloc)
1117 {
1118 if ( reloc->r_address() & R_SCATTERED ) {
1119 // scattered
1120 const macho_scattered_relocation_info<P>* sreloc = (const macho_scattered_relocation_info<P>*)reloc;
1121 if ( sreloc->r_length() != 2 )
1122 throw "bad local scattered relocation length";
1123 if ( sreloc->r_type() != ARM_RELOC_PB_LA_PTR )
1124 throw "bad local scattered relocation type";
1125 }
1126 else {
1127 if ( reloc->r_length() != 2 )
1128 throw "bad local relocation length";
1129 if ( reloc->r_extern() != 0 )
1130 throw "external relocation found with local relocations";
1131 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1132 throw "local relocation address not in writable segment";
1133 }
1134 }
1135
1136 template <typename A>
1137 void MachOChecker<A>::checkRelocations()
1138 {
1139 // external relocations should be sorted to minimize dyld symbol lookups
1140 // therefore every reloc with the same r_symbolnum value should be contiguous
1141 std::set<uint32_t> previouslySeenSymbolIndexes;
1142 uint32_t lastSymbolIndex = 0xFFFFFFFF;
1143 const macho_relocation_info<P>* const externRelocsEnd = &fExternalRelocations[fExternalRelocationsCount];
1144 for (const macho_relocation_info<P>* reloc = fExternalRelocations; reloc < externRelocsEnd; ++reloc) {
1145 this->checkExternalReloation(reloc);
1146 if ( reloc->r_symbolnum() != lastSymbolIndex ) {
1147 if ( previouslySeenSymbolIndexes.count(reloc->r_symbolnum()) != 0 )
1148 throw "external relocations not sorted";
1149 previouslySeenSymbolIndexes.insert(lastSymbolIndex);
1150 lastSymbolIndex = reloc->r_symbolnum();
1151 }
1152 }
1153
1154 const macho_relocation_info<P>* const localRelocsEnd = &fLocalRelocations[fLocalRelocationsCount];
1155 for (const macho_relocation_info<P>* reloc = fLocalRelocations; reloc < localRelocsEnd; ++reloc) {
1156 this->checkLocalReloation(reloc);
1157 }
1158
1159 // verify any section with S_ATTR_LOC_RELOC bits set actually has text relocs
1160 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
1161 const uint32_t cmd_count = fHeader->ncmds();
1162 const macho_load_command<P>* cmd = cmds;
1163 for (uint32_t i = 0; i < cmd_count; ++i) {
1164 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1165 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
1166 // if segment is writable, we are fine
1167 if ( (segCmd->initprot() & VM_PROT_WRITE) != 0 )
1168 continue;
1169 // look at sections that have text reloc bit set
1170 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
1171 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
1172 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
1173 if ( (sect->flags() & S_ATTR_LOC_RELOC) != 0 ) {
1174 if ( ! hasTextRelocInRange(sect->addr(), sect->addr()+sect->size()) ) {
1175 throwf("section %s has attribute set that it has relocs, but it has none", sect->sectname());
1176 }
1177 }
1178 }
1179 }
1180 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
1181 }
1182 }
1183
1184 template <typename A>
1185 typename A::P::uint_t MachOChecker<A>::segStartAddress(uint8_t segIndex)
1186 {
1187 if ( segIndex > fSegments.size() )
1188 throw "segment index out of range";
1189 return fSegments[segIndex]->vmaddr();
1190 }
1191
1192 template <typename A>
1193 bool MachOChecker<A>::hasTextRelocInRange(pint_t rangeStart, pint_t rangeEnd)
1194 {
1195 // look at local relocs
1196 const macho_relocation_info<P>* const localRelocsEnd = &fLocalRelocations[fLocalRelocationsCount];
1197 for (const macho_relocation_info<P>* reloc = fLocalRelocations; reloc < localRelocsEnd; ++reloc) {
1198 pint_t relocAddress = reloc->r_address() + this->relocBase();
1199 if ( (rangeStart <= relocAddress) && (relocAddress < rangeEnd) )
1200 return true;
1201 }
1202 // look rebase info
1203 if ( fDyldInfo != NULL ) {
1204 const uint8_t* p = (uint8_t*)fHeader + fDyldInfo->rebase_off();
1205 const uint8_t* end = &p[fDyldInfo->rebase_size()];
1206
1207 uint8_t type = 0;
1208 uint64_t segOffset = 0;
1209 uint32_t count;
1210 uint32_t skip;
1211 int segIndex;
1212 pint_t segStartAddr = 0;
1213 pint_t addr;
1214 bool done = false;
1215 while ( !done && (p < end) ) {
1216 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
1217 uint8_t opcode = *p & REBASE_OPCODE_MASK;
1218 ++p;
1219 switch (opcode) {
1220 case REBASE_OPCODE_DONE:
1221 done = true;
1222 break;
1223 case REBASE_OPCODE_SET_TYPE_IMM:
1224 type = immediate;
1225 break;
1226 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1227 segIndex = immediate;
1228 segStartAddr = segStartAddress(segIndex);
1229 segOffset = read_uleb128(p, end);
1230 break;
1231 case REBASE_OPCODE_ADD_ADDR_ULEB:
1232 segOffset += read_uleb128(p, end);
1233 break;
1234 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
1235 segOffset += immediate*sizeof(pint_t);
1236 break;
1237 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
1238 for (int i=0; i < immediate; ++i) {
1239 addr = segStartAddr+segOffset;
1240 if ( (rangeStart <= addr) && (addr < rangeEnd) )
1241 return true;
1242 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1243 segOffset += sizeof(pint_t);
1244 }
1245 break;
1246 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
1247 count = read_uleb128(p, end);
1248 for (uint32_t i=0; i < count; ++i) {
1249 addr = segStartAddr+segOffset;
1250 if ( (rangeStart <= addr) && (addr < rangeEnd) )
1251 return true;
1252 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1253 segOffset += sizeof(pint_t);
1254 }
1255 break;
1256 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
1257 addr = segStartAddr+segOffset;
1258 if ( (rangeStart <= addr) && (addr < rangeEnd) )
1259 return true;
1260 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1261 segOffset += read_uleb128(p, end) + sizeof(pint_t);
1262 break;
1263 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
1264 count = read_uleb128(p, end);
1265 skip = read_uleb128(p, end);
1266 for (uint32_t i=0; i < count; ++i) {
1267 addr = segStartAddr+segOffset;
1268 if ( (rangeStart <= addr) && (addr < rangeEnd) )
1269 return true;
1270 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1271 segOffset += skip + sizeof(pint_t);
1272 }
1273 break;
1274 default:
1275 throwf("bad rebase opcode %d", *p);
1276 }
1277 }
1278 }
1279 }
1280
1281 template <typename A>
1282 bool MachOChecker<A>::addressIsRebaseSite(pint_t targetAddr)
1283 {
1284 // look at local relocs
1285 const macho_relocation_info<P>* const localRelocsEnd = &fLocalRelocations[fLocalRelocationsCount];
1286 for (const macho_relocation_info<P>* reloc = fLocalRelocations; reloc < localRelocsEnd; ++reloc) {
1287 pint_t relocAddress = reloc->r_address() + this->relocBase();
1288 if ( relocAddress == targetAddr )
1289 return true;
1290 }
1291 // look rebase info
1292 if ( fDyldInfo != NULL ) {
1293 const uint8_t* p = (uint8_t*)fHeader + fDyldInfo->rebase_off();
1294 const uint8_t* end = &p[fDyldInfo->rebase_size()];
1295
1296 uint8_t type = 0;
1297 uint64_t segOffset = 0;
1298 uint32_t count;
1299 uint32_t skip;
1300 int segIndex;
1301 pint_t segStartAddr = 0;
1302 pint_t addr;
1303 bool done = false;
1304 while ( !done && (p < end) ) {
1305 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
1306 uint8_t opcode = *p & REBASE_OPCODE_MASK;
1307 ++p;
1308 switch (opcode) {
1309 case REBASE_OPCODE_DONE:
1310 done = true;
1311 break;
1312 case REBASE_OPCODE_SET_TYPE_IMM:
1313 type = immediate;
1314 break;
1315 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1316 segIndex = immediate;
1317 segStartAddr = segStartAddress(segIndex);
1318 segOffset = read_uleb128(p, end);
1319 break;
1320 case REBASE_OPCODE_ADD_ADDR_ULEB:
1321 segOffset += read_uleb128(p, end);
1322 break;
1323 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
1324 segOffset += immediate*sizeof(pint_t);
1325 break;
1326 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
1327 for (int i=0; i < immediate; ++i) {
1328 addr = segStartAddr+segOffset;
1329 if ( addr == targetAddr )
1330 return true;
1331 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1332 segOffset += sizeof(pint_t);
1333 }
1334 break;
1335 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
1336 count = read_uleb128(p, end);
1337 for (uint32_t i=0; i < count; ++i) {
1338 addr = segStartAddr+segOffset;
1339 if ( addr == targetAddr )
1340 return true;
1341 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1342 segOffset += sizeof(pint_t);
1343 }
1344 break;
1345 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
1346 addr = segStartAddr+segOffset;
1347 if ( addr == targetAddr )
1348 return true;
1349 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1350 segOffset += read_uleb128(p, end) + sizeof(pint_t);
1351 break;
1352 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
1353 count = read_uleb128(p, end);
1354 skip = read_uleb128(p, end);
1355 for (uint32_t i=0; i < count; ++i) {
1356 addr = segStartAddr+segOffset;
1357 if ( addr == targetAddr )
1358 return true;
1359 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1360 segOffset += skip + sizeof(pint_t);
1361 }
1362 break;
1363 default:
1364 throwf("bad rebase opcode %d", *p);
1365 }
1366 }
1367 }
1368 return false;
1369 }
1370
1371
1372 template <typename A>
1373 bool MachOChecker<A>::addressIsBindingSite(pint_t targetAddr)
1374 {
1375 // look at external relocs
1376 const macho_relocation_info<P>* const externRelocsEnd = &fExternalRelocations[fExternalRelocationsCount];
1377 for (const macho_relocation_info<P>* reloc = fExternalRelocations; reloc < externRelocsEnd; ++reloc) {
1378 pint_t relocAddress = reloc->r_address() + this->relocBase();
1379 if ( relocAddress == targetAddr )
1380 return true;
1381 }
1382 // look bind info
1383 if ( fDyldInfo != NULL ) {
1384 const uint8_t* p = (uint8_t*)fHeader + fDyldInfo->bind_off();
1385 const uint8_t* end = &p[fDyldInfo->bind_size()];
1386
1387 uint8_t type = 0;
1388 uint64_t segOffset = 0;
1389 uint32_t count;
1390 uint32_t skip;
1391 uint8_t flags;
1392 const char* symbolName = NULL;
1393 int libraryOrdinal = 0;
1394 int segIndex;
1395 int64_t addend = 0;
1396 pint_t segStartAddr = 0;
1397 pint_t addr;
1398 bool done = false;
1399 while ( !done && (p < end) ) {
1400 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1401 uint8_t opcode = *p & BIND_OPCODE_MASK;
1402 ++p;
1403 switch (opcode) {
1404 case BIND_OPCODE_DONE:
1405 done = true;
1406 break;
1407 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1408 libraryOrdinal = immediate;
1409 break;
1410 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1411 libraryOrdinal = read_uleb128(p, end);
1412 break;
1413 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1414 // the special ordinals are negative numbers
1415 if ( immediate == 0 )
1416 libraryOrdinal = 0;
1417 else {
1418 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1419 libraryOrdinal = signExtended;
1420 }
1421 break;
1422 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1423 symbolName = (char*)p;
1424 while (*p != '\0')
1425 ++p;
1426 ++p;
1427 break;
1428 case BIND_OPCODE_SET_TYPE_IMM:
1429 type = immediate;
1430 break;
1431 case BIND_OPCODE_SET_ADDEND_SLEB:
1432 addend = read_sleb128(p, end);
1433 break;
1434 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1435 segIndex = immediate;
1436 segStartAddr = segStartAddress(segIndex);
1437 segOffset = read_uleb128(p, end);
1438 break;
1439 case BIND_OPCODE_ADD_ADDR_ULEB:
1440 segOffset += read_uleb128(p, end);
1441 break;
1442 case BIND_OPCODE_DO_BIND:
1443 if ( (segStartAddr+segOffset) == targetAddr )
1444 return true;
1445 segOffset += sizeof(pint_t);
1446 break;
1447 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1448 if ( (segStartAddr+segOffset) == targetAddr )
1449 return true;
1450 segOffset += read_uleb128(p, end) + sizeof(pint_t);
1451 break;
1452 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1453 if ( (segStartAddr+segOffset) == targetAddr )
1454 return true;
1455 segOffset += immediate*sizeof(pint_t) + sizeof(pint_t);
1456 break;
1457 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1458 count = read_uleb128(p, end);
1459 skip = read_uleb128(p, end);
1460 for (uint32_t i=0; i < count; ++i) {
1461 if ( (segStartAddr+segOffset) == targetAddr )
1462 return true;
1463 segOffset += skip + sizeof(pint_t);
1464 }
1465 break;
1466 default:
1467 throwf("bad bind opcode %d", *p);
1468 }
1469 }
1470 }
1471 return false;
1472 }
1473
1474
1475 static void check(const char* path)
1476 {
1477 struct stat stat_buf;
1478
1479 try {
1480 int fd = ::open(path, O_RDONLY, 0);
1481 if ( fd == -1 )
1482 throw "cannot open file";
1483 if ( ::fstat(fd, &stat_buf) != 0 )
1484 throwf("fstat(%s) failed, errno=%d\n", path, errno);
1485 uint32_t length = stat_buf.st_size;
1486 uint8_t* p = (uint8_t*)::mmap(NULL, stat_buf.st_size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0);
1487 if ( p == ((uint8_t*)(-1)) )
1488 throw "cannot map file";
1489 ::close(fd);
1490 const mach_header* mh = (mach_header*)p;
1491 if ( mh->magic == OSSwapBigToHostInt32(FAT_MAGIC) ) {
1492 const struct fat_header* fh = (struct fat_header*)p;
1493 const struct fat_arch* archs = (struct fat_arch*)(p + sizeof(struct fat_header));
1494 for (unsigned long i=0; i < OSSwapBigToHostInt32(fh->nfat_arch); ++i) {
1495 size_t offset = OSSwapBigToHostInt32(archs[i].offset);
1496 size_t size = OSSwapBigToHostInt32(archs[i].size);
1497 unsigned int cputype = OSSwapBigToHostInt32(archs[i].cputype);
1498
1499 switch(cputype) {
1500 case CPU_TYPE_POWERPC:
1501 if ( MachOChecker<ppc>::validFile(p + offset) )
1502 MachOChecker<ppc>::make(p + offset, size, path);
1503 else
1504 throw "in universal file, ppc slice does not contain ppc mach-o";
1505 break;
1506 case CPU_TYPE_I386:
1507 if ( MachOChecker<x86>::validFile(p + offset) )
1508 MachOChecker<x86>::make(p + offset, size, path);
1509 else
1510 throw "in universal file, i386 slice does not contain i386 mach-o";
1511 break;
1512 case CPU_TYPE_POWERPC64:
1513 if ( MachOChecker<ppc64>::validFile(p + offset) )
1514 MachOChecker<ppc64>::make(p + offset, size, path);
1515 else
1516 throw "in universal file, ppc64 slice does not contain ppc64 mach-o";
1517 break;
1518 case CPU_TYPE_X86_64:
1519 if ( MachOChecker<x86_64>::validFile(p + offset) )
1520 MachOChecker<x86_64>::make(p + offset, size, path);
1521 else
1522 throw "in universal file, x86_64 slice does not contain x86_64 mach-o";
1523 break;
1524 case CPU_TYPE_ARM:
1525 if ( MachOChecker<arm>::validFile(p + offset) )
1526 MachOChecker<arm>::make(p + offset, size, path);
1527 else
1528 throw "in universal file, arm slice does not contain arm mach-o";
1529 break;
1530 default:
1531 throwf("in universal file, unknown architecture slice 0x%x\n", cputype);
1532 }
1533 }
1534 }
1535 else if ( MachOChecker<x86>::validFile(p) ) {
1536 MachOChecker<x86>::make(p, length, path);
1537 }
1538 else if ( MachOChecker<ppc>::validFile(p) ) {
1539 MachOChecker<ppc>::make(p, length, path);
1540 }
1541 else if ( MachOChecker<ppc64>::validFile(p) ) {
1542 MachOChecker<ppc64>::make(p, length, path);
1543 }
1544 else if ( MachOChecker<x86_64>::validFile(p) ) {
1545 MachOChecker<x86_64>::make(p, length, path);
1546 }
1547 else if ( MachOChecker<arm>::validFile(p) ) {
1548 MachOChecker<arm>::make(p, length, path);
1549 }
1550 else {
1551 throw "not a known file type";
1552 }
1553 }
1554 catch (const char* msg) {
1555 throwf("%s in %s", msg, path);
1556 }
1557 }
1558
1559
1560 int main(int argc, const char* argv[])
1561 {
1562 bool progress = false;
1563 int result = 0;
1564 for(int i=1; i < argc; ++i) {
1565 const char* arg = argv[i];
1566 if ( arg[0] == '-' ) {
1567 if ( strcmp(arg, "-progress") == 0 ) {
1568 progress = true;
1569 }
1570 else {
1571 throwf("unknown option: %s\n", arg);
1572 }
1573 }
1574 else {
1575 bool success = true;
1576 try {
1577 check(arg);
1578 }
1579 catch (const char* msg) {
1580 fprintf(stderr, "machocheck failed: %s %s\n", arg, msg);
1581 result = 1;
1582 success = false;
1583 }
1584 if ( success && progress )
1585 printf("ok: %s\n", arg);
1586 }
1587 }
1588
1589 return result;
1590 }
1591
1592
1593