]> git.saurik.com Git - apple/ld64.git/blame_incremental - src/other/machochecker.cpp
ld64-133.3.tar.gz
[apple/ld64.git] / src / other / machochecker.cpp
... / ...
CommitLineData
1/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25#include <sys/types.h>
26#include <sys/stat.h>
27#include <sys/mman.h>
28#include <stdarg.h>
29#include <stdio.h>
30#include <fcntl.h>
31#include <unistd.h>
32#include <errno.h>
33
34#include <vector>
35#include <set>
36#include <ext/hash_set>
37
38#include "MachOFileAbstraction.hpp"
39#include "Architectures.hpp"
40
41
42 __attribute__((noreturn))
43void throwf(const char* format, ...)
44{
45 va_list list;
46 char* p;
47 va_start(list, format);
48 vasprintf(&p, format, list);
49 va_end(list);
50
51 const char* t = p;
52 throw t;
53}
54
55static uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end)
56{
57 uint64_t result = 0;
58 int bit = 0;
59 do {
60 if (p == end)
61 throwf("malformed uleb128");
62
63 uint64_t slice = *p & 0x7f;
64
65 if (bit >= 64 || slice << bit >> bit != slice)
66 throwf("uleb128 too big");
67 else {
68 result |= (slice << bit);
69 bit += 7;
70 }
71 }
72 while (*p++ & 0x80);
73 return result;
74}
75
76
77static int64_t read_sleb128(const uint8_t*& p, const uint8_t* end)
78{
79 int64_t result = 0;
80 int bit = 0;
81 uint8_t byte;
82 do {
83 if (p == end)
84 throwf("malformed sleb128");
85 byte = *p++;
86 result |= ((byte & 0x7f) << bit);
87 bit += 7;
88 } while (byte & 0x80);
89 // sign extend negative numbers
90 if ( (byte & 0x40) != 0 )
91 result |= (-1LL) << bit;
92 return result;
93}
94
95
96template <typename A>
97class MachOChecker
98{
99public:
100 static bool validFile(const uint8_t* fileContent);
101 static MachOChecker<A>* make(const uint8_t* fileContent, uint32_t fileLength, const char* path)
102 { return new MachOChecker<A>(fileContent, fileLength, path); }
103 virtual ~MachOChecker() {}
104
105
106private:
107 typedef typename A::P P;
108 typedef typename A::P::E E;
109 typedef typename A::P::uint_t pint_t;
110
111 class CStringEquals
112 {
113 public:
114 bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
115 };
116
117 typedef __gnu_cxx::hash_set<const char*, __gnu_cxx::hash<const char*>, CStringEquals> StringSet;
118
119 MachOChecker(const uint8_t* fileContent, uint32_t fileLength, const char* path);
120 void checkMachHeader();
121 void checkLoadCommands();
122 void checkSection(const macho_segment_command<P>* segCmd, const macho_section<P>* sect);
123 uint8_t loadCommandSizeMask();
124 void checkSymbolTable();
125 void checkInitTerms();
126 void checkIndirectSymbolTable();
127 void checkRelocations();
128 void checkExternalReloation(const macho_relocation_info<P>* reloc);
129 void checkLocalReloation(const macho_relocation_info<P>* reloc);
130 pint_t relocBase();
131 bool addressInWritableSegment(pint_t address);
132 bool hasTextRelocInRange(pint_t start, pint_t end);
133 pint_t segStartAddress(uint8_t segIndex);
134 bool addressIsRebaseSite(pint_t addr);
135 bool addressIsBindingSite(pint_t addr);
136 pint_t getInitialStackPointer(const macho_thread_command<P>*);
137 pint_t getEntryPoint(const macho_thread_command<P>*);
138
139
140
141 const char* fPath;
142 const macho_header<P>* fHeader;
143 uint32_t fLength;
144 const char* fStrings;
145 const char* fStringsEnd;
146 const macho_nlist<P>* fSymbols;
147 uint32_t fSymbolCount;
148 const macho_dysymtab_command<P>* fDynamicSymbolTable;
149 const uint32_t* fIndirectTable;
150 uint32_t fIndirectTableCount;
151 const macho_relocation_info<P>* fLocalRelocations;
152 uint32_t fLocalRelocationsCount;
153 const macho_relocation_info<P>* fExternalRelocations;
154 uint32_t fExternalRelocationsCount;
155 bool fWriteableSegmentWithAddrOver4G;
156 bool fSlidableImage;
157 const macho_segment_command<P>* fFirstSegment;
158 const macho_segment_command<P>* fFirstWritableSegment;
159 const macho_segment_command<P>* fTEXTSegment;
160 const macho_dyld_info_command<P>* fDyldInfo;
161 uint32_t fSectionCount;
162 std::vector<const macho_segment_command<P>*>fSegments;
163};
164
165
166
167template <>
168bool MachOChecker<ppc>::validFile(const uint8_t* fileContent)
169{
170 const macho_header<P>* header = (const macho_header<P>*)fileContent;
171 if ( header->magic() != MH_MAGIC )
172 return false;
173 if ( header->cputype() != CPU_TYPE_POWERPC )
174 return false;
175 switch (header->filetype()) {
176 case MH_EXECUTE:
177 case MH_DYLIB:
178 case MH_BUNDLE:
179 case MH_DYLINKER:
180 return true;
181 }
182 return false;
183}
184
185template <>
186bool MachOChecker<ppc64>::validFile(const uint8_t* fileContent)
187{
188 const macho_header<P>* header = (const macho_header<P>*)fileContent;
189 if ( header->magic() != MH_MAGIC_64 )
190 return false;
191 if ( header->cputype() != CPU_TYPE_POWERPC64 )
192 return false;
193 switch (header->filetype()) {
194 case MH_EXECUTE:
195 case MH_DYLIB:
196 case MH_BUNDLE:
197 case MH_DYLINKER:
198 return true;
199 }
200 return false;
201}
202
203template <>
204bool MachOChecker<x86>::validFile(const uint8_t* fileContent)
205{
206 const macho_header<P>* header = (const macho_header<P>*)fileContent;
207 if ( header->magic() != MH_MAGIC )
208 return false;
209 if ( header->cputype() != CPU_TYPE_I386 )
210 return false;
211 switch (header->filetype()) {
212 case MH_EXECUTE:
213 case MH_DYLIB:
214 case MH_BUNDLE:
215 case MH_DYLINKER:
216 return true;
217 }
218 return false;
219}
220
221template <>
222bool MachOChecker<x86_64>::validFile(const uint8_t* fileContent)
223{
224 const macho_header<P>* header = (const macho_header<P>*)fileContent;
225 if ( header->magic() != MH_MAGIC_64 )
226 return false;
227 if ( header->cputype() != CPU_TYPE_X86_64 )
228 return false;
229 switch (header->filetype()) {
230 case MH_EXECUTE:
231 case MH_DYLIB:
232 case MH_BUNDLE:
233 case MH_DYLINKER:
234 return true;
235 }
236 return false;
237}
238
239template <>
240bool MachOChecker<arm>::validFile(const uint8_t* fileContent)
241{
242 const macho_header<P>* header = (const macho_header<P>*)fileContent;
243 if ( header->magic() != MH_MAGIC )
244 return false;
245 if ( header->cputype() != CPU_TYPE_ARM )
246 return false;
247 switch (header->filetype()) {
248 case MH_EXECUTE:
249 case MH_DYLIB:
250 case MH_BUNDLE:
251 case MH_DYLINKER:
252 return true;
253 }
254 return false;
255}
256
257template <> uint8_t MachOChecker<ppc>::loadCommandSizeMask() { return 0x03; }
258template <> uint8_t MachOChecker<ppc64>::loadCommandSizeMask() { return 0x07; }
259template <> uint8_t MachOChecker<x86>::loadCommandSizeMask() { return 0x03; }
260template <> uint8_t MachOChecker<x86_64>::loadCommandSizeMask() { return 0x07; }
261template <> uint8_t MachOChecker<arm>::loadCommandSizeMask() { return 0x03; }
262
263
264template <>
265ppc::P::uint_t MachOChecker<ppc>::getInitialStackPointer(const macho_thread_command<ppc::P>* threadInfo)
266{
267 return threadInfo->thread_register(3);
268}
269
270template <>
271ppc64::P::uint_t MachOChecker<ppc64>::getInitialStackPointer(const macho_thread_command<ppc64::P>* threadInfo)
272{
273 return threadInfo->thread_register(3);
274}
275
276template <>
277x86::P::uint_t MachOChecker<x86>::getInitialStackPointer(const macho_thread_command<x86::P>* threadInfo)
278{
279 return threadInfo->thread_register(7);
280}
281
282template <>
283x86_64::P::uint_t MachOChecker<x86_64>::getInitialStackPointer(const macho_thread_command<x86_64::P>* threadInfo)
284{
285 return threadInfo->thread_register(7);
286}
287
288template <>
289arm::P::uint_t MachOChecker<arm>::getInitialStackPointer(const macho_thread_command<arm::P>* threadInfo)
290{
291 return threadInfo->thread_register(13);
292}
293
294
295
296
297
298template <>
299ppc::P::uint_t MachOChecker<ppc>::getEntryPoint(const macho_thread_command<ppc::P>* threadInfo)
300{
301 return threadInfo->thread_register(0);
302}
303
304template <>
305ppc64::P::uint_t MachOChecker<ppc64>::getEntryPoint(const macho_thread_command<ppc64::P>* threadInfo)
306{
307 return threadInfo->thread_register(0);
308}
309
310template <>
311x86::P::uint_t MachOChecker<x86>::getEntryPoint(const macho_thread_command<x86::P>* threadInfo)
312{
313 return threadInfo->thread_register(10);
314}
315
316template <>
317x86_64::P::uint_t MachOChecker<x86_64>::getEntryPoint(const macho_thread_command<x86_64::P>* threadInfo)
318{
319 return threadInfo->thread_register(16);
320}
321
322template <>
323arm::P::uint_t MachOChecker<arm>::getEntryPoint(const macho_thread_command<arm::P>* threadInfo)
324{
325 return threadInfo->thread_register(15);
326}
327
328
329template <typename A>
330MachOChecker<A>::MachOChecker(const uint8_t* fileContent, uint32_t fileLength, const char* path)
331 : fHeader(NULL), fLength(fileLength), fStrings(NULL), fSymbols(NULL), fSymbolCount(0), fDynamicSymbolTable(NULL), fIndirectTableCount(0),
332 fLocalRelocations(NULL), fLocalRelocationsCount(0), fExternalRelocations(NULL), fExternalRelocationsCount(0),
333 fWriteableSegmentWithAddrOver4G(false), fSlidableImage(false), fFirstSegment(NULL), fFirstWritableSegment(NULL),
334 fTEXTSegment(NULL), fDyldInfo(NULL), fSectionCount(0)
335{
336 // sanity check
337 if ( ! validFile(fileContent) )
338 throw "not a mach-o file that can be checked";
339
340 fPath = strdup(path);
341 fHeader = (const macho_header<P>*)fileContent;
342
343 // sanity check header
344 checkMachHeader();
345
346 // check load commands
347 checkLoadCommands();
348
349 checkIndirectSymbolTable();
350
351 checkRelocations();
352
353 checkSymbolTable();
354
355 checkInitTerms();
356}
357
358
359template <typename A>
360void MachOChecker<A>::checkMachHeader()
361{
362 if ( (fHeader->sizeofcmds() + sizeof(macho_header<P>)) > fLength )
363 throw "sizeofcmds in mach_header is larger than file";
364
365 uint32_t flags = fHeader->flags();
366 const uint32_t invalidBits = MH_INCRLINK | MH_LAZY_INIT | 0xFE000000;
367 if ( flags & invalidBits )
368 throw "invalid bits in mach_header flags";
369 if ( (flags & MH_NO_REEXPORTED_DYLIBS) && (fHeader->filetype() != MH_DYLIB) )
370 throw "MH_NO_REEXPORTED_DYLIBS bit of mach_header flags only valid for dylibs";
371
372 switch ( fHeader->filetype() ) {
373 case MH_EXECUTE:
374 fSlidableImage = ( flags & MH_PIE );
375 break;
376 case MH_DYLIB:
377 case MH_BUNDLE:
378 fSlidableImage = true;
379 break;
380 default:
381 throw "not a mach-o file type supported by this tool";
382 }
383}
384
385template <typename A>
386void MachOChecker<A>::checkLoadCommands()
387{
388 // check that all load commands fit within the load command space file
389 const macho_encryption_info_command<P>* encryption_info = NULL;
390 const macho_thread_command<P>* threadInfo = NULL;
391 const macho_entry_point_command<P>* entryPoint = NULL;
392 const uint8_t* const endOfFile = (uint8_t*)fHeader + fLength;
393 const uint8_t* const endOfLoadCommands = (uint8_t*)fHeader + sizeof(macho_header<P>) + fHeader->sizeofcmds();
394 const uint32_t cmd_count = fHeader->ncmds();
395 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
396 const macho_load_command<P>* cmd = cmds;
397 for (uint32_t i = 0; i < cmd_count; ++i) {
398 uint32_t size = cmd->cmdsize();
399 if ( (size & this->loadCommandSizeMask()) != 0 )
400 throwf("load command #%d has a unaligned size", i);
401 const uint8_t* endOfCmd = ((uint8_t*)cmd)+cmd->cmdsize();
402 if ( endOfCmd > endOfLoadCommands )
403 throwf("load command #%d extends beyond the end of the load commands", i);
404 if ( endOfCmd > endOfFile )
405 throwf("load command #%d extends beyond the end of the file", i);
406 switch ( cmd->cmd() ) {
407 case macho_segment_command<P>::CMD:
408 case LC_SYMTAB:
409 case LC_DYSYMTAB:
410 case LC_LOAD_DYLIB:
411 case LC_ID_DYLIB:
412 case LC_LOAD_DYLINKER:
413 case LC_ID_DYLINKER:
414 case macho_routines_command<P>::CMD:
415 case LC_SUB_FRAMEWORK:
416 case LC_SUB_CLIENT:
417 case LC_TWOLEVEL_HINTS:
418 case LC_PREBIND_CKSUM:
419 case LC_LOAD_WEAK_DYLIB:
420 case LC_LAZY_LOAD_DYLIB:
421 case LC_UUID:
422 case LC_REEXPORT_DYLIB:
423 case LC_SEGMENT_SPLIT_INFO:
424 case LC_CODE_SIGNATURE:
425 case LC_LOAD_UPWARD_DYLIB:
426 case LC_VERSION_MIN_MACOSX:
427 case LC_VERSION_MIN_IPHONEOS:
428 case LC_RPATH:
429 case LC_FUNCTION_STARTS:
430 case LC_DYLD_ENVIRONMENT:
431 case LC_DATA_IN_CODE:
432 case LC_DYLIB_CODE_SIGN_DRS:
433 case LC_SOURCE_VERSION:
434 break;
435 case LC_DYLD_INFO:
436 case LC_DYLD_INFO_ONLY:
437 fDyldInfo = (macho_dyld_info_command<P>*)cmd;
438 break;
439 case LC_ENCRYPTION_INFO:
440 encryption_info = (macho_encryption_info_command<P>*)cmd;
441 break;
442 case LC_SUB_UMBRELLA:
443 case LC_SUB_LIBRARY:
444 if ( fHeader->flags() & MH_NO_REEXPORTED_DYLIBS )
445 throw "MH_NO_REEXPORTED_DYLIBS bit of mach_header flags should not be set in an image with LC_SUB_LIBRARY or LC_SUB_UMBRELLA";
446 break;
447 case LC_MAIN:
448 if ( fHeader->filetype() != MH_EXECUTE )
449 throw "LC_MAIN can only be used in MH_EXECUTE file types";
450 entryPoint = (macho_entry_point_command<P>*)cmd;
451 break;
452 case LC_UNIXTHREAD:
453 if ( fHeader->filetype() != MH_EXECUTE )
454 throw "LC_UNIXTHREAD can only be used in MH_EXECUTE file types";
455 threadInfo = (macho_thread_command<P>*)cmd;
456 break;
457 default:
458 throwf("load command #%d is an unknown kind 0x%X", i, cmd->cmd());
459 }
460 cmd = (const macho_load_command<P>*)endOfCmd;
461 }
462
463 // check segments
464 cmd = cmds;
465 std::vector<std::pair<pint_t, pint_t> > segmentAddressRanges;
466 std::vector<std::pair<pint_t, pint_t> > segmentFileOffsetRanges;
467 const macho_segment_command<P>* linkEditSegment = NULL;
468 const macho_segment_command<P>* stackSegment = NULL;
469 for (uint32_t i = 0; i < cmd_count; ++i) {
470 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
471 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
472 fSegments.push_back(segCmd);
473 if ( segCmd->cmdsize() != (sizeof(macho_segment_command<P>) + segCmd->nsects() * sizeof(macho_section_content<P>)) )
474 throw "invalid segment load command size";
475
476 // see if this overlaps another segment address range
477 uint64_t startAddr = segCmd->vmaddr();
478 uint64_t endAddr = startAddr + segCmd->vmsize();
479 for (typename std::vector<std::pair<pint_t, pint_t> >::iterator it = segmentAddressRanges.begin(); it != segmentAddressRanges.end(); ++it) {
480 if ( it->first < startAddr ) {
481 if ( it->second > startAddr )
482 throw "overlapping segment vm addresses";
483 }
484 else if ( it->first > startAddr ) {
485 if ( it->first < endAddr )
486 throw "overlapping segment vm addresses";
487 }
488 else {
489 throw "overlapping segment vm addresses";
490 }
491 segmentAddressRanges.push_back(std::make_pair<pint_t, pint_t>(startAddr, endAddr));
492 }
493 // see if this overlaps another segment file offset range
494 uint64_t startOffset = segCmd->fileoff();
495 uint64_t endOffset = startOffset + segCmd->filesize();
496 for (typename std::vector<std::pair<pint_t, pint_t> >::iterator it = segmentFileOffsetRanges.begin(); it != segmentFileOffsetRanges.end(); ++it) {
497 if ( it->first < startOffset ) {
498 if ( it->second > startOffset )
499 throw "overlapping segment file data";
500 }
501 else if ( it->first > startOffset ) {
502 if ( it->first < endOffset )
503 throw "overlapping segment file data";
504 }
505 else {
506 throw "overlapping segment file data";
507 }
508 segmentFileOffsetRanges.push_back(std::make_pair<pint_t, pint_t>(startOffset, endOffset));
509 // check is within file bounds
510 if ( (startOffset > fLength) || (endOffset > fLength) )
511 throw "segment file data is past end of file";
512 }
513 // verify it fits in file
514 if ( startOffset > fLength )
515 throw "segment fileoff does not fit in file";
516 if ( endOffset > fLength )
517 throw "segment fileoff+filesize does not fit in file";
518
519 // record special segments
520 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 )
521 linkEditSegment = segCmd;
522 else if ( strcmp(segCmd->segname(), "__UNIXSTACK") == 0 )
523 stackSegment = segCmd;
524
525 // cache interesting segments
526 if ( fFirstSegment == NULL )
527 fFirstSegment = segCmd;
528 if ( (fTEXTSegment == NULL) && (strcmp(segCmd->segname(), "__TEXT") == 0) )
529 fTEXTSegment = segCmd;
530 if ( (segCmd->initprot() & VM_PROT_WRITE) != 0 ) {
531 if ( fFirstWritableSegment == NULL )
532 fFirstWritableSegment = segCmd;
533 if ( segCmd->vmaddr() > 0x100000000ULL )
534 fWriteableSegmentWithAddrOver4G = true;
535 }
536
537 // check section ranges
538 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
539 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
540 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
541 // check all non-zero sized sections are within segment
542 if ( sect->addr() < startAddr )
543 throwf("section %s vm address not within segment", sect->sectname());
544 if ( (sect->addr()+sect->size()) > endAddr )
545 throwf("section %s vm address not within segment", sect->sectname());
546 if ( ((sect->flags() & SECTION_TYPE) != S_ZEROFILL)
547 && ((sect->flags() & SECTION_TYPE) != S_THREAD_LOCAL_ZEROFILL)
548 && (segCmd->filesize() != 0)
549 && (sect->size() != 0) ) {
550 if ( sect->offset() < startOffset )
551 throwf("section %s file offset not within segment", sect->sectname());
552 if ( (sect->offset()+sect->size()) > endOffset )
553 throwf("section %s file offset not within segment", sect->sectname());
554 }
555 checkSection(segCmd, sect);
556 ++fSectionCount;
557 }
558 }
559 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
560 }
561
562 // verify there was a LINKEDIT segment
563 if ( linkEditSegment == NULL )
564 throw "no __LINKEDIT segment";
565
566 // verify there was an executable __TEXT segment and load commands are in it
567 if ( fTEXTSegment == NULL )
568 throw "no __TEXT segment";
569 if ( fTEXTSegment->initprot() != (VM_PROT_READ|VM_PROT_EXECUTE) )
570 throw "__TEXT segment does not have r-x init permissions";
571 //if ( fTEXTSegment->maxprot() != (VM_PROT_READ|VM_PROT_EXECUTE|VM_PROT_WRITE) )
572 // throw "__TEXT segment does not have rwx max permissions";
573 if ( fTEXTSegment->fileoff() != 0 )
574 throw "__TEXT segment does not start at mach_header";
575 if ( fTEXTSegment->filesize() < (sizeof(macho_header<P>)+fHeader->sizeofcmds()) )
576 throw "__TEXT segment smaller than load commands";
577
578 // verify if custom stack used, that stack is in __UNIXSTACK segment
579 if ( threadInfo != NULL ) {
580 pint_t initialSP = getInitialStackPointer(threadInfo);
581 if ( initialSP != 0 ) {
582 if ( stackSegment == NULL )
583 throw "LC_UNIXTHREAD specifics custom initial stack pointer, but no __UNIXSTACK segment";
584 if ( (initialSP < stackSegment->vmaddr()) || (initialSP > (stackSegment->vmaddr()+stackSegment->vmsize())) )
585 throw "LC_UNIXTHREAD specifics custom initial stack pointer which does not point into __UNIXSTACK segment";
586 }
587 }
588
589 // verify __UNIXSTACK is zero fill
590 if ( stackSegment != NULL ) {
591 if ( (stackSegment->filesize() != 0) || (stackSegment->fileoff() != 0) )
592 throw "__UNIXSTACK is not a zero-fill segment";
593 if ( stackSegment->vmsize() < 4096 )
594 throw "__UNIXSTACK segment is too small";
595 }
596
597 // verify entry point is in __TEXT segment
598 if ( threadInfo != NULL ) {
599 pint_t initialPC = getEntryPoint(threadInfo);
600 if ( (initialPC < fTEXTSegment->vmaddr()) || (initialPC >= (fTEXTSegment->vmaddr()+fTEXTSegment->vmsize())) )
601 throwf("entry point 0x%0llX is outside __TEXT segment", (long long)initialPC);
602 }
603 else if ( entryPoint != NULL ) {
604 pint_t initialOffset = entryPoint->entryoff();
605 if ( (initialOffset < fTEXTSegment->fileoff()) || (initialOffset >= (fTEXTSegment->fileoff()+fTEXTSegment->filesize())) )
606 throwf("entry point 0x%0llX is outside __TEXT segment", (long long)initialOffset);
607 }
608
609 // checks for executables
610 bool isStaticExecutable = false;
611 if ( fHeader->filetype() == MH_EXECUTE ) {
612 isStaticExecutable = true;
613 cmd = cmds;
614 for (uint32_t i = 0; i < cmd_count; ++i) {
615 switch ( cmd->cmd() ) {
616 case LC_LOAD_DYLINKER:
617 // the existence of a dyld load command makes a executable dynamic
618 isStaticExecutable = false;
619 break;
620 }
621 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
622 }
623 if ( isStaticExecutable ) {
624 if ( (fHeader->flags() != MH_NOUNDEFS) && (fHeader->flags() != (MH_NOUNDEFS|MH_PIE)) )
625 throw "invalid bits in mach_header flags for static executable";
626 }
627 }
628
629 // verify encryption info
630 if ( encryption_info != NULL ) {
631 if ( fHeader->filetype() != MH_EXECUTE )
632 throw "LC_ENCRYPTION_INFO load command is only legal in main executables";
633 if ( encryption_info->cryptoff() < (sizeof(macho_header<P>) + fHeader->sizeofcmds()) )
634 throw "LC_ENCRYPTION_INFO load command has cryptoff covers some load commands";
635 if ( (encryption_info->cryptoff() % 4096) != 0 )
636 throw "LC_ENCRYPTION_INFO load command has cryptoff which is not page aligned";
637 if ( (encryption_info->cryptsize() % 4096) != 0 )
638 throw "LC_ENCRYPTION_INFO load command has cryptsize which is not page sized";
639 for (typename std::vector<std::pair<pint_t, pint_t> >::iterator it = segmentFileOffsetRanges.begin();
640 it != segmentFileOffsetRanges.end(); ++it) {
641 if ( (it->first <= encryption_info->cryptoff()) && (encryption_info->cryptoff() < it->second) ) {
642 if ( (encryption_info->cryptoff() + encryption_info->cryptsize()) > it->second )
643 throw "LC_ENCRYPTION_INFO load command is not contained within one segment";
644 }
645 }
646 }
647
648 // check LC_SYMTAB, LC_DYSYMTAB, and LC_SEGMENT_SPLIT_INFO
649 cmd = cmds;
650 bool foundDynamicSymTab = false;
651 for (uint32_t i = 0; i < cmd_count; ++i) {
652 switch ( cmd->cmd() ) {
653 case LC_SYMTAB:
654 {
655 const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
656 fSymbolCount = symtab->nsyms();
657 fSymbols = (const macho_nlist<P>*)((char*)fHeader + symtab->symoff());
658 if ( symtab->symoff() < linkEditSegment->fileoff() )
659 throw "symbol table not in __LINKEDIT";
660 if ( (symtab->symoff() + fSymbolCount*sizeof(macho_nlist<P>*)) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
661 throw "symbol table end not in __LINKEDIT";
662 if ( (symtab->symoff() % sizeof(pint_t)) != 0 )
663 throw "symbol table start not pointer aligned";
664 fStrings = (char*)fHeader + symtab->stroff();
665 fStringsEnd = fStrings + symtab->strsize();
666 if ( symtab->stroff() < linkEditSegment->fileoff() )
667 throw "string pool not in __LINKEDIT";
668 if ( (symtab->stroff()+symtab->strsize()) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
669 throw "string pool extends beyond __LINKEDIT";
670 if ( (symtab->stroff() % 4) != 0 ) // work around until rdar://problem/4737991 is fixed
671 throw "string pool start not pointer aligned";
672 if ( (symtab->strsize() % sizeof(pint_t)) != 0 )
673 throw "string pool size not a multiple of pointer size";
674 }
675 break;
676 case LC_DYSYMTAB:
677 {
678 if ( isStaticExecutable &&! fSlidableImage )
679 throw "LC_DYSYMTAB should not be used in static executable";
680 foundDynamicSymTab = true;
681 fDynamicSymbolTable = (macho_dysymtab_command<P>*)cmd;
682 fIndirectTable = (uint32_t*)((char*)fHeader + fDynamicSymbolTable->indirectsymoff());
683 fIndirectTableCount = fDynamicSymbolTable->nindirectsyms();
684 if ( fIndirectTableCount != 0 ) {
685 if ( fDynamicSymbolTable->indirectsymoff() < linkEditSegment->fileoff() )
686 throw "indirect symbol table not in __LINKEDIT";
687 if ( (fDynamicSymbolTable->indirectsymoff()+fIndirectTableCount*8) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
688 throw "indirect symbol table not in __LINKEDIT";
689 if ( (fDynamicSymbolTable->indirectsymoff() % sizeof(pint_t)) != 0 )
690 throw "indirect symbol table not pointer aligned";
691 }
692 fLocalRelocationsCount = fDynamicSymbolTable->nlocrel();
693 if ( fLocalRelocationsCount != 0 ) {
694 fLocalRelocations = (const macho_relocation_info<P>*)((char*)fHeader + fDynamicSymbolTable->locreloff());
695 if ( fDynamicSymbolTable->locreloff() < linkEditSegment->fileoff() )
696 throw "local relocations not in __LINKEDIT";
697 if ( (fDynamicSymbolTable->locreloff()+fLocalRelocationsCount*sizeof(macho_relocation_info<P>)) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
698 throw "local relocations not in __LINKEDIT";
699 if ( (fDynamicSymbolTable->locreloff() % sizeof(pint_t)) != 0 )
700 throw "local relocations table not pointer aligned";
701 }
702 fExternalRelocationsCount = fDynamicSymbolTable->nextrel();
703 if ( fExternalRelocationsCount != 0 ) {
704 fExternalRelocations = (const macho_relocation_info<P>*)((char*)fHeader + fDynamicSymbolTable->extreloff());
705 if ( fDynamicSymbolTable->extreloff() < linkEditSegment->fileoff() )
706 throw "external relocations not in __LINKEDIT";
707 if ( (fDynamicSymbolTable->extreloff()+fExternalRelocationsCount*sizeof(macho_relocation_info<P>)) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
708 throw "external relocations not in __LINKEDIT";
709 if ( (fDynamicSymbolTable->extreloff() % sizeof(pint_t)) != 0 )
710 throw "external relocations table not pointer aligned";
711 }
712 }
713 break;
714 case LC_SEGMENT_SPLIT_INFO:
715 {
716 if ( isStaticExecutable )
717 throw "LC_SEGMENT_SPLIT_INFO should not be used in static executable";
718 const macho_linkedit_data_command<P>* info = (macho_linkedit_data_command<P>*)cmd;
719 if ( info->dataoff() < linkEditSegment->fileoff() )
720 throw "split seg info not in __LINKEDIT";
721 if ( (info->dataoff()+info->datasize()) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
722 throw "split seg info not in __LINKEDIT";
723 if ( (info->dataoff() % sizeof(pint_t)) != 0 )
724 throw "split seg info table not pointer aligned";
725 if ( (info->datasize() % sizeof(pint_t)) != 0 )
726 throw "split seg info size not a multiple of pointer size";
727 }
728 break;
729 case LC_FUNCTION_STARTS:
730 {
731 const macho_linkedit_data_command<P>* info = (macho_linkedit_data_command<P>*)cmd;
732 if ( info->dataoff() < linkEditSegment->fileoff() )
733 throw "function starts data not in __LINKEDIT";
734 if ( (info->dataoff()+info->datasize()) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
735 throw "function starts data not in __LINKEDIT";
736 if ( (info->dataoff() % sizeof(pint_t)) != 0 )
737 throw "function starts data table not pointer aligned";
738 if ( (info->datasize() % sizeof(pint_t)) != 0 )
739 throw "function starts data size not a multiple of pointer size";
740 }
741 break;
742 case LC_DATA_IN_CODE:
743 {
744 const macho_linkedit_data_command<P>* info = (macho_linkedit_data_command<P>*)cmd;
745 if ( info->dataoff() < linkEditSegment->fileoff() )
746 throw "data-in-code data not in __LINKEDIT";
747 if ( (info->dataoff()+info->datasize()) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
748 throw "data-in-code data not in __LINKEDIT";
749 if ( (info->dataoff() % sizeof(pint_t)) != 0 )
750 throw "data-in-code data table not pointer aligned";
751 if ( (info->datasize() % sizeof(pint_t)) != 0 )
752 throw "data-in-code data size not a multiple of pointer size";
753 }
754 break;
755 case LC_DYLIB_CODE_SIGN_DRS:
756 {
757 const macho_linkedit_data_command<P>* info = (macho_linkedit_data_command<P>*)cmd;
758 if ( info->dataoff() < linkEditSegment->fileoff() )
759 throw "dependent dylib DR data not in __LINKEDIT";
760 if ( (info->dataoff()+info->datasize()) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
761 throw "dependent dylib DR data not in __LINKEDIT";
762 if ( (info->dataoff() % sizeof(pint_t)) != 0 )
763 throw "dependent dylib DR data table not pointer aligned";
764 if ( (info->datasize() % sizeof(pint_t)) != 0 )
765 throw "dependent dylib DR data size not a multiple of pointer size";
766 }
767 break;
768 }
769 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
770 }
771 if ( !isStaticExecutable && !foundDynamicSymTab )
772 throw "missing dynamic symbol table";
773 if ( fStrings == NULL )
774 throw "missing symbol table";
775
776}
777
778template <typename A>
779void MachOChecker<A>::checkSection(const macho_segment_command<P>* segCmd, const macho_section<P>* sect)
780{
781 uint8_t sectionType = (sect->flags() & SECTION_TYPE);
782 if ( sectionType == S_ZEROFILL ) {
783 if ( sect->offset() != 0 )
784 throwf("section offset should be zero for zero-fill section %s", sect->sectname());
785 }
786
787 // check section's segment name matches segment
788// if ( strncmp(sect->segname(), segCmd->segname(), 16) != 0 )
789// throwf("section %s in segment %s has wrong segment name", sect->sectname(), segCmd->segname());
790
791 // more section tests here
792}
793
794
795
796
797template <typename A>
798void MachOChecker<A>::checkIndirectSymbolTable()
799{
800 // static executables don't have indirect symbol table
801 if ( fDynamicSymbolTable == NULL )
802 return;
803 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
804 const uint32_t cmd_count = fHeader->ncmds();
805 const macho_load_command<P>* cmd = cmds;
806 for (uint32_t i = 0; i < cmd_count; ++i) {
807 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
808 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
809 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
810 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
811 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
812 // make sure all magic sections that use indirect symbol table fit within it
813 uint32_t start = 0;
814 uint32_t elementSize = 0;
815 switch ( sect->flags() & SECTION_TYPE ) {
816 case S_SYMBOL_STUBS:
817 elementSize = sect->reserved2();
818 start = sect->reserved1();
819 break;
820 case S_LAZY_SYMBOL_POINTERS:
821 case S_NON_LAZY_SYMBOL_POINTERS:
822 elementSize = sizeof(pint_t);
823 start = sect->reserved1();
824 break;
825 }
826 if ( elementSize != 0 ) {
827 uint32_t count = sect->size() / elementSize;
828 if ( (count*elementSize) != sect->size() )
829 throwf("%s section size is not an even multiple of element size", sect->sectname());
830 if ( (start+count) > fIndirectTableCount )
831 throwf("%s section references beyond end of indirect symbol table (%d > %d)", sect->sectname(), start+count, fIndirectTableCount );
832 }
833 }
834 }
835 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
836 }
837}
838
839
840
841
842template <typename A>
843void MachOChecker<A>::checkSymbolTable()
844{
845 // verify no duplicate external symbol names
846 if ( fDynamicSymbolTable != NULL ) {
847 StringSet externalNames;
848 const macho_nlist<P>* const exportedStart = &fSymbols[fDynamicSymbolTable->iextdefsym()];
849 const macho_nlist<P>* const exportedEnd = &exportedStart[fDynamicSymbolTable->nextdefsym()];
850 int i = fDynamicSymbolTable->iextdefsym();
851 for(const macho_nlist<P>* p = exportedStart; p < exportedEnd; ++p, ++i) {
852 const char* symName = &fStrings[p->n_strx()];
853 if ( symName > fStringsEnd )
854 throw "string index out of range";
855 //fprintf(stderr, "sym[%d] = %s\n", i, symName);
856 if ( externalNames.find(symName) != externalNames.end() )
857 throwf("duplicate external symbol: %s", symName);
858 if ( (p->n_type() & N_EXT) == 0 )
859 throwf("non-external symbol in external symbol range: %s", symName);
860 // don't add N_INDR to externalNames because there is likely an undefine with same name
861 if ( (p->n_type() & N_INDR) == 0 )
862 externalNames.insert(symName);
863 }
864 // verify no undefines with same name as an external symbol
865 const macho_nlist<P>* const undefinesStart = &fSymbols[fDynamicSymbolTable->iundefsym()];
866 const macho_nlist<P>* const undefinesEnd = &undefinesStart[fDynamicSymbolTable->nundefsym()];
867 for(const macho_nlist<P>* p = undefinesStart; p < undefinesEnd; ++p) {
868 const char* symName = &fStrings[p->n_strx()];
869 if ( symName > fStringsEnd )
870 throw "string index out of range";
871 if ( externalNames.find(symName) != externalNames.end() )
872 throwf("undefine with same name as external symbol: %s", symName);
873 }
874 // verify all N_SECT values are valid
875 for(const macho_nlist<P>* p = fSymbols; p < &fSymbols[fSymbolCount]; ++p) {
876 uint8_t type = p->n_type();
877 if ( ((type & N_STAB) == 0) && ((type & N_TYPE) == N_SECT) ) {
878 if ( p->n_sect() > fSectionCount ) {
879 throwf("symbol '%s' has n_sect=%d which is too large", &fStrings[p->n_strx()], p->n_sect());
880 }
881 }
882 }
883 }
884}
885
886
887template <typename A>
888void MachOChecker<A>::checkInitTerms()
889{
890 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
891 const uint32_t cmd_count = fHeader->ncmds();
892 const macho_load_command<P>* cmd = cmds;
893 for (uint32_t i = 0; i < cmd_count; ++i) {
894 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
895 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
896 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
897 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
898 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
899 // make sure all magic sections that use indirect symbol table fit within it
900 uint32_t count;
901 pint_t* arrayStart;
902 pint_t* arrayEnd;
903 const char* kind = "initializer";
904 switch ( sect->flags() & SECTION_TYPE ) {
905 case S_MOD_TERM_FUNC_POINTERS:
906 kind = "terminator";
907 // fall through
908 case S_MOD_INIT_FUNC_POINTERS:
909 count = sect->size() / sizeof(pint_t);
910 if ( (count*sizeof(pint_t)) != sect->size() )
911 throwf("%s section size is not an even multiple of element size", sect->sectname());
912 if ( (sect->addr() % sizeof(pint_t)) != 0 )
913 throwf("%s section size is not pointer size aligned", sect->sectname());
914 // check each pointer in array points within TEXT
915 arrayStart = (pint_t*)((char*)fHeader + sect->offset());
916 arrayEnd = (pint_t*)((char*)fHeader + sect->offset() + sect->size());
917 for (pint_t* p=arrayStart; p < arrayEnd; ++p) {
918 pint_t pointer = P::getP(*p);
919 if ( (pointer < fTEXTSegment->vmaddr()) || (pointer >= (fTEXTSegment->vmaddr()+fTEXTSegment->vmsize())) )
920 throwf("%s 0x%08llX points outside __TEXT segment", kind, (long long)pointer);
921 }
922 // check each pointer in array will be rebased and not bound
923 if ( fSlidableImage ) {
924 pint_t sectionBeginAddr = sect->addr();
925 pint_t sectionEndddr = sect->addr() + sect->size();
926 for(pint_t addr = sectionBeginAddr; addr < sectionEndddr; addr += sizeof(pint_t)) {
927 if ( addressIsBindingSite(addr) )
928 throwf("%s at 0x%0llX has binding to external symbol", kind, (long long)addr);
929 if ( ! addressIsRebaseSite(addr) )
930 throwf("%s at 0x%0llX is not rebased", kind, (long long)addr);
931 }
932 }
933 break;
934 }
935 }
936 }
937 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
938 }
939
940}
941
942
943template <>
944ppc::P::uint_t MachOChecker<ppc>::relocBase()
945{
946 if ( fHeader->flags() & MH_SPLIT_SEGS )
947 return fFirstWritableSegment->vmaddr();
948 else
949 return fFirstSegment->vmaddr();
950}
951
952template <>
953ppc64::P::uint_t MachOChecker<ppc64>::relocBase()
954{
955 if ( fWriteableSegmentWithAddrOver4G )
956 return fFirstWritableSegment->vmaddr();
957 else
958 return fFirstSegment->vmaddr();
959}
960
961template <>
962x86::P::uint_t MachOChecker<x86>::relocBase()
963{
964 if ( fHeader->flags() & MH_SPLIT_SEGS )
965 return fFirstWritableSegment->vmaddr();
966 else
967 return fFirstSegment->vmaddr();
968}
969
970template <>
971x86_64::P::uint_t MachOChecker<x86_64>::relocBase()
972{
973 // check for split-seg
974 return fFirstWritableSegment->vmaddr();
975}
976
977template <>
978arm::P::uint_t MachOChecker<arm>::relocBase()
979{
980 if ( fHeader->flags() & MH_SPLIT_SEGS )
981 return fFirstWritableSegment->vmaddr();
982 else
983 return fFirstSegment->vmaddr();
984}
985
986
987template <typename A>
988bool MachOChecker<A>::addressInWritableSegment(pint_t address)
989{
990 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
991 const uint32_t cmd_count = fHeader->ncmds();
992 const macho_load_command<P>* cmd = cmds;
993 for (uint32_t i = 0; i < cmd_count; ++i) {
994 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
995 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
996 if ( (address >= segCmd->vmaddr()) && (address < segCmd->vmaddr()+segCmd->vmsize()) ) {
997 // if segment is writable, we are fine
998 if ( (segCmd->initprot() & VM_PROT_WRITE) != 0 )
999 return true;
1000 // could be a text reloc, make sure section bit is set
1001 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
1002 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
1003 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
1004 if ( (sect->addr() <= address) && (address < (sect->addr()+sect->size())) ) {
1005 // found section for this address, if has relocs we are fine
1006 return ( (sect->flags() & (S_ATTR_EXT_RELOC|S_ATTR_LOC_RELOC)) != 0 );
1007 }
1008 }
1009 }
1010 }
1011 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
1012 }
1013 return false;
1014}
1015
1016
1017template <>
1018void MachOChecker<ppc>::checkExternalReloation(const macho_relocation_info<P>* reloc)
1019{
1020 if ( reloc->r_length() != 2 )
1021 throw "bad external relocation length";
1022 if ( reloc->r_type() != GENERIC_RELOC_VANILLA )
1023 throw "unknown external relocation type";
1024 if ( reloc->r_pcrel() != 0 )
1025 throw "bad external relocation pc_rel";
1026 if ( reloc->r_extern() == 0 )
1027 throw "local relocation found with external relocations";
1028 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1029 throw "external relocation address not in writable segment";
1030 // FIX: check r_symbol
1031}
1032
1033template <>
1034void MachOChecker<ppc64>::checkExternalReloation(const macho_relocation_info<P>* reloc)
1035{
1036 if ( reloc->r_length() != 3 )
1037 throw "bad external relocation length";
1038 if ( reloc->r_type() != GENERIC_RELOC_VANILLA )
1039 throw "unknown external relocation type";
1040 if ( reloc->r_pcrel() != 0 )
1041 throw "bad external relocation pc_rel";
1042 if ( reloc->r_extern() == 0 )
1043 throw "local relocation found with external relocations";
1044 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1045 throw "external relocation address not in writable segment";
1046 // FIX: check r_symbol
1047}
1048
1049template <>
1050void MachOChecker<x86>::checkExternalReloation(const macho_relocation_info<P>* reloc)
1051{
1052 if ( reloc->r_length() != 2 )
1053 throw "bad external relocation length";
1054 if ( reloc->r_type() != GENERIC_RELOC_VANILLA )
1055 throw "unknown external relocation type";
1056 if ( reloc->r_pcrel() != 0 )
1057 throw "bad external relocation pc_rel";
1058 if ( reloc->r_extern() == 0 )
1059 throw "local relocation found with external relocations";
1060 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1061 throw "external relocation address not in writable segment";
1062 // FIX: check r_symbol
1063}
1064
1065
1066template <>
1067void MachOChecker<x86_64>::checkExternalReloation(const macho_relocation_info<P>* reloc)
1068{
1069 if ( reloc->r_length() != 3 )
1070 throw "bad external relocation length";
1071 if ( reloc->r_type() != X86_64_RELOC_UNSIGNED )
1072 throw "unknown external relocation type";
1073 if ( reloc->r_pcrel() != 0 )
1074 throw "bad external relocation pc_rel";
1075 if ( reloc->r_extern() == 0 )
1076 throw "local relocation found with external relocations";
1077 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1078 throw "exernal relocation address not in writable segment";
1079 // FIX: check r_symbol
1080}
1081
1082#if SUPPORT_ARCH_arm_any
1083template <>
1084void MachOChecker<arm>::checkExternalReloation(const macho_relocation_info<P>* reloc)
1085{
1086 if ( reloc->r_length() != 2 )
1087 throw "bad external relocation length";
1088 if ( reloc->r_type() != ARM_RELOC_VANILLA )
1089 throw "unknown external relocation type";
1090 if ( reloc->r_pcrel() != 0 )
1091 throw "bad external relocation pc_rel";
1092 if ( reloc->r_extern() == 0 )
1093 throw "local relocation found with external relocations";
1094 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1095 throw "external relocation address not in writable segment";
1096 // FIX: check r_symbol
1097}
1098#endif
1099
1100
1101template <>
1102void MachOChecker<ppc>::checkLocalReloation(const macho_relocation_info<P>* reloc)
1103{
1104 if ( reloc->r_address() & R_SCATTERED ) {
1105 // scattered
1106 const macho_scattered_relocation_info<P>* sreloc = (const macho_scattered_relocation_info<P>*)reloc;
1107 // FIX
1108
1109 }
1110 else {
1111 // FIX
1112 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1113 throwf("local relocation address 0x%08X not in writable segment", reloc->r_address());
1114 }
1115}
1116
1117
1118template <>
1119void MachOChecker<ppc64>::checkLocalReloation(const macho_relocation_info<P>* reloc)
1120{
1121 if ( reloc->r_length() != 3 )
1122 throw "bad local relocation length";
1123 if ( reloc->r_type() != GENERIC_RELOC_VANILLA )
1124 throw "unknown local relocation type";
1125 if ( reloc->r_pcrel() != 0 )
1126 throw "bad local relocation pc_rel";
1127 if ( reloc->r_extern() != 0 )
1128 throw "external relocation found with local relocations";
1129 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1130 throw "local relocation address not in writable segment";
1131}
1132
1133template <>
1134void MachOChecker<x86>::checkLocalReloation(const macho_relocation_info<P>* reloc)
1135{
1136 // FIX
1137}
1138
1139template <>
1140void MachOChecker<x86_64>::checkLocalReloation(const macho_relocation_info<P>* reloc)
1141{
1142 if ( reloc->r_length() != 3 )
1143 throw "bad local relocation length";
1144 if ( reloc->r_type() != X86_64_RELOC_UNSIGNED )
1145 throw "unknown local relocation type";
1146 if ( reloc->r_pcrel() != 0 )
1147 throw "bad local relocation pc_rel";
1148 if ( reloc->r_extern() != 0 )
1149 throw "external relocation found with local relocations";
1150 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1151 throw "local relocation address not in writable segment";
1152}
1153
1154#if SUPPORT_ARCH_arm_any
1155template <>
1156void MachOChecker<arm>::checkLocalReloation(const macho_relocation_info<P>* reloc)
1157{
1158 if ( reloc->r_address() & R_SCATTERED ) {
1159 // scattered
1160 const macho_scattered_relocation_info<P>* sreloc = (const macho_scattered_relocation_info<P>*)reloc;
1161 if ( sreloc->r_length() != 2 )
1162 throw "bad local scattered relocation length";
1163 if ( sreloc->r_type() != ARM_RELOC_PB_LA_PTR )
1164 throw "bad local scattered relocation type";
1165 }
1166 else {
1167 if ( reloc->r_length() != 2 )
1168 throw "bad local relocation length";
1169 if ( reloc->r_extern() != 0 )
1170 throw "external relocation found with local relocations";
1171 if ( ! this->addressInWritableSegment(reloc->r_address() + this->relocBase()) )
1172 throw "local relocation address not in writable segment";
1173 }
1174}
1175#endif
1176
1177template <typename A>
1178void MachOChecker<A>::checkRelocations()
1179{
1180 // external relocations should be sorted to minimize dyld symbol lookups
1181 // therefore every reloc with the same r_symbolnum value should be contiguous
1182 std::set<uint32_t> previouslySeenSymbolIndexes;
1183 uint32_t lastSymbolIndex = 0xFFFFFFFF;
1184 const macho_relocation_info<P>* const externRelocsEnd = &fExternalRelocations[fExternalRelocationsCount];
1185 for (const macho_relocation_info<P>* reloc = fExternalRelocations; reloc < externRelocsEnd; ++reloc) {
1186 this->checkExternalReloation(reloc);
1187 if ( reloc->r_symbolnum() != lastSymbolIndex ) {
1188 if ( previouslySeenSymbolIndexes.count(reloc->r_symbolnum()) != 0 )
1189 throw "external relocations not sorted";
1190 previouslySeenSymbolIndexes.insert(lastSymbolIndex);
1191 lastSymbolIndex = reloc->r_symbolnum();
1192 }
1193 }
1194
1195 const macho_relocation_info<P>* const localRelocsEnd = &fLocalRelocations[fLocalRelocationsCount];
1196 for (const macho_relocation_info<P>* reloc = fLocalRelocations; reloc < localRelocsEnd; ++reloc) {
1197 this->checkLocalReloation(reloc);
1198 }
1199
1200 // verify any section with S_ATTR_LOC_RELOC bits set actually has text relocs
1201 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
1202 const uint32_t cmd_count = fHeader->ncmds();
1203 const macho_load_command<P>* cmd = cmds;
1204 for (uint32_t i = 0; i < cmd_count; ++i) {
1205 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1206 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
1207 // if segment is writable, we are fine
1208 if ( (segCmd->initprot() & VM_PROT_WRITE) != 0 )
1209 continue;
1210 // look at sections that have text reloc bit set
1211 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
1212 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
1213 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
1214 if ( (sect->flags() & S_ATTR_LOC_RELOC) != 0 ) {
1215 if ( ! hasTextRelocInRange(sect->addr(), sect->addr()+sect->size()) ) {
1216 throwf("section %s has attribute set that it has relocs, but it has none", sect->sectname());
1217 }
1218 }
1219 }
1220 }
1221 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
1222 }
1223}
1224
1225template <typename A>
1226typename A::P::uint_t MachOChecker<A>::segStartAddress(uint8_t segIndex)
1227{
1228 if ( segIndex > fSegments.size() )
1229 throw "segment index out of range";
1230 return fSegments[segIndex]->vmaddr();
1231}
1232
1233template <typename A>
1234bool MachOChecker<A>::hasTextRelocInRange(pint_t rangeStart, pint_t rangeEnd)
1235{
1236 // look at local relocs
1237 const macho_relocation_info<P>* const localRelocsEnd = &fLocalRelocations[fLocalRelocationsCount];
1238 for (const macho_relocation_info<P>* reloc = fLocalRelocations; reloc < localRelocsEnd; ++reloc) {
1239 pint_t relocAddress = reloc->r_address() + this->relocBase();
1240 if ( (rangeStart <= relocAddress) && (relocAddress < rangeEnd) )
1241 return true;
1242 }
1243 // look rebase info
1244 if ( fDyldInfo != NULL ) {
1245 const uint8_t* p = (uint8_t*)fHeader + fDyldInfo->rebase_off();
1246 const uint8_t* end = &p[fDyldInfo->rebase_size()];
1247
1248 uint8_t type = 0;
1249 uint64_t segOffset = 0;
1250 uint32_t count;
1251 uint32_t skip;
1252 int segIndex;
1253 pint_t segStartAddr = 0;
1254 pint_t addr;
1255 bool done = false;
1256 while ( !done && (p < end) ) {
1257 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
1258 uint8_t opcode = *p & REBASE_OPCODE_MASK;
1259 ++p;
1260 switch (opcode) {
1261 case REBASE_OPCODE_DONE:
1262 done = true;
1263 break;
1264 case REBASE_OPCODE_SET_TYPE_IMM:
1265 type = immediate;
1266 break;
1267 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1268 segIndex = immediate;
1269 segStartAddr = segStartAddress(segIndex);
1270 segOffset = read_uleb128(p, end);
1271 break;
1272 case REBASE_OPCODE_ADD_ADDR_ULEB:
1273 segOffset += read_uleb128(p, end);
1274 break;
1275 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
1276 segOffset += immediate*sizeof(pint_t);
1277 break;
1278 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
1279 for (int i=0; i < immediate; ++i) {
1280 addr = segStartAddr+segOffset;
1281 if ( (rangeStart <= addr) && (addr < rangeEnd) )
1282 return true;
1283 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1284 segOffset += sizeof(pint_t);
1285 }
1286 break;
1287 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
1288 count = read_uleb128(p, end);
1289 for (uint32_t i=0; i < count; ++i) {
1290 addr = segStartAddr+segOffset;
1291 if ( (rangeStart <= addr) && (addr < rangeEnd) )
1292 return true;
1293 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1294 segOffset += sizeof(pint_t);
1295 }
1296 break;
1297 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
1298 addr = segStartAddr+segOffset;
1299 if ( (rangeStart <= addr) && (addr < rangeEnd) )
1300 return true;
1301 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1302 segOffset += read_uleb128(p, end) + sizeof(pint_t);
1303 break;
1304 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
1305 count = read_uleb128(p, end);
1306 skip = read_uleb128(p, end);
1307 for (uint32_t i=0; i < count; ++i) {
1308 addr = segStartAddr+segOffset;
1309 if ( (rangeStart <= addr) && (addr < rangeEnd) )
1310 return true;
1311 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1312 segOffset += skip + sizeof(pint_t);
1313 }
1314 break;
1315 default:
1316 throwf("bad rebase opcode %d", *p);
1317 }
1318 }
1319 }
1320 return false;
1321}
1322
1323template <typename A>
1324bool MachOChecker<A>::addressIsRebaseSite(pint_t targetAddr)
1325{
1326 // look at local relocs
1327 const macho_relocation_info<P>* const localRelocsEnd = &fLocalRelocations[fLocalRelocationsCount];
1328 for (const macho_relocation_info<P>* reloc = fLocalRelocations; reloc < localRelocsEnd; ++reloc) {
1329 pint_t relocAddress = reloc->r_address() + this->relocBase();
1330 if ( relocAddress == targetAddr )
1331 return true;
1332 }
1333 // look rebase info
1334 if ( fDyldInfo != NULL ) {
1335 const uint8_t* p = (uint8_t*)fHeader + fDyldInfo->rebase_off();
1336 const uint8_t* end = &p[fDyldInfo->rebase_size()];
1337
1338 uint8_t type = 0;
1339 uint64_t segOffset = 0;
1340 uint32_t count;
1341 uint32_t skip;
1342 int segIndex;
1343 pint_t segStartAddr = 0;
1344 pint_t addr;
1345 bool done = false;
1346 while ( !done && (p < end) ) {
1347 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
1348 uint8_t opcode = *p & REBASE_OPCODE_MASK;
1349 ++p;
1350 switch (opcode) {
1351 case REBASE_OPCODE_DONE:
1352 done = true;
1353 break;
1354 case REBASE_OPCODE_SET_TYPE_IMM:
1355 type = immediate;
1356 break;
1357 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1358 segIndex = immediate;
1359 segStartAddr = segStartAddress(segIndex);
1360 segOffset = read_uleb128(p, end);
1361 break;
1362 case REBASE_OPCODE_ADD_ADDR_ULEB:
1363 segOffset += read_uleb128(p, end);
1364 break;
1365 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
1366 segOffset += immediate*sizeof(pint_t);
1367 break;
1368 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
1369 for (int i=0; i < immediate; ++i) {
1370 addr = segStartAddr+segOffset;
1371 if ( addr == targetAddr )
1372 return true;
1373 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1374 segOffset += sizeof(pint_t);
1375 }
1376 break;
1377 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
1378 count = read_uleb128(p, end);
1379 for (uint32_t i=0; i < count; ++i) {
1380 addr = segStartAddr+segOffset;
1381 if ( addr == targetAddr )
1382 return true;
1383 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1384 segOffset += sizeof(pint_t);
1385 }
1386 break;
1387 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
1388 addr = segStartAddr+segOffset;
1389 if ( addr == targetAddr )
1390 return true;
1391 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1392 segOffset += read_uleb128(p, end) + sizeof(pint_t);
1393 break;
1394 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
1395 count = read_uleb128(p, end);
1396 skip = read_uleb128(p, end);
1397 for (uint32_t i=0; i < count; ++i) {
1398 addr = segStartAddr+segOffset;
1399 if ( addr == targetAddr )
1400 return true;
1401 //printf("%-7s %-16s 0x%08llX %s\n", segName, sectionName(segIndex, segStartAddr+segOffset), segStartAddr+segOffset, typeName);
1402 segOffset += skip + sizeof(pint_t);
1403 }
1404 break;
1405 default:
1406 throwf("bad rebase opcode %d", *p);
1407 }
1408 }
1409 }
1410 return false;
1411}
1412
1413
1414template <typename A>
1415bool MachOChecker<A>::addressIsBindingSite(pint_t targetAddr)
1416{
1417 // look at external relocs
1418 const macho_relocation_info<P>* const externRelocsEnd = &fExternalRelocations[fExternalRelocationsCount];
1419 for (const macho_relocation_info<P>* reloc = fExternalRelocations; reloc < externRelocsEnd; ++reloc) {
1420 pint_t relocAddress = reloc->r_address() + this->relocBase();
1421 if ( relocAddress == targetAddr )
1422 return true;
1423 }
1424 // look bind info
1425 if ( fDyldInfo != NULL ) {
1426 const uint8_t* p = (uint8_t*)fHeader + fDyldInfo->bind_off();
1427 const uint8_t* end = &p[fDyldInfo->bind_size()];
1428
1429 uint8_t type = 0;
1430 uint64_t segOffset = 0;
1431 uint32_t count;
1432 uint32_t skip;
1433 uint8_t flags;
1434 const char* symbolName = NULL;
1435 int libraryOrdinal = 0;
1436 int segIndex;
1437 int64_t addend = 0;
1438 pint_t segStartAddr = 0;
1439 pint_t addr;
1440 bool done = false;
1441 while ( !done && (p < end) ) {
1442 uint8_t immediate = *p & BIND_IMMEDIATE_MASK;
1443 uint8_t opcode = *p & BIND_OPCODE_MASK;
1444 ++p;
1445 switch (opcode) {
1446 case BIND_OPCODE_DONE:
1447 done = true;
1448 break;
1449 case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
1450 libraryOrdinal = immediate;
1451 break;
1452 case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
1453 libraryOrdinal = read_uleb128(p, end);
1454 break;
1455 case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
1456 // the special ordinals are negative numbers
1457 if ( immediate == 0 )
1458 libraryOrdinal = 0;
1459 else {
1460 int8_t signExtended = BIND_OPCODE_MASK | immediate;
1461 libraryOrdinal = signExtended;
1462 }
1463 break;
1464 case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
1465 symbolName = (char*)p;
1466 while (*p != '\0')
1467 ++p;
1468 ++p;
1469 break;
1470 case BIND_OPCODE_SET_TYPE_IMM:
1471 type = immediate;
1472 break;
1473 case BIND_OPCODE_SET_ADDEND_SLEB:
1474 addend = read_sleb128(p, end);
1475 break;
1476 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
1477 segIndex = immediate;
1478 segStartAddr = segStartAddress(segIndex);
1479 segOffset = read_uleb128(p, end);
1480 break;
1481 case BIND_OPCODE_ADD_ADDR_ULEB:
1482 segOffset += read_uleb128(p, end);
1483 break;
1484 case BIND_OPCODE_DO_BIND:
1485 if ( (segStartAddr+segOffset) == targetAddr )
1486 return true;
1487 segOffset += sizeof(pint_t);
1488 break;
1489 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
1490 if ( (segStartAddr+segOffset) == targetAddr )
1491 return true;
1492 segOffset += read_uleb128(p, end) + sizeof(pint_t);
1493 break;
1494 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
1495 if ( (segStartAddr+segOffset) == targetAddr )
1496 return true;
1497 segOffset += immediate*sizeof(pint_t) + sizeof(pint_t);
1498 break;
1499 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
1500 count = read_uleb128(p, end);
1501 skip = read_uleb128(p, end);
1502 for (uint32_t i=0; i < count; ++i) {
1503 if ( (segStartAddr+segOffset) == targetAddr )
1504 return true;
1505 segOffset += skip + sizeof(pint_t);
1506 }
1507 break;
1508 default:
1509 throwf("bad bind opcode %d", *p);
1510 }
1511 }
1512 }
1513 return false;
1514}
1515
1516
1517static void check(const char* path)
1518{
1519 struct stat stat_buf;
1520
1521 try {
1522 int fd = ::open(path, O_RDONLY, 0);
1523 if ( fd == -1 )
1524 throw "cannot open file";
1525 if ( ::fstat(fd, &stat_buf) != 0 )
1526 throwf("fstat(%s) failed, errno=%d\n", path, errno);
1527 uint32_t length = stat_buf.st_size;
1528 uint8_t* p = (uint8_t*)::mmap(NULL, stat_buf.st_size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0);
1529 if ( p == ((uint8_t*)(-1)) )
1530 throw "cannot map file";
1531 ::close(fd);
1532 const mach_header* mh = (mach_header*)p;
1533 if ( mh->magic == OSSwapBigToHostInt32(FAT_MAGIC) ) {
1534 const struct fat_header* fh = (struct fat_header*)p;
1535 const struct fat_arch* archs = (struct fat_arch*)(p + sizeof(struct fat_header));
1536 for (unsigned long i=0; i < OSSwapBigToHostInt32(fh->nfat_arch); ++i) {
1537 size_t offset = OSSwapBigToHostInt32(archs[i].offset);
1538 size_t size = OSSwapBigToHostInt32(archs[i].size);
1539 unsigned int cputype = OSSwapBigToHostInt32(archs[i].cputype);
1540
1541 switch(cputype) {
1542 case CPU_TYPE_POWERPC:
1543 if ( MachOChecker<ppc>::validFile(p + offset) )
1544 MachOChecker<ppc>::make(p + offset, size, path);
1545 else
1546 throw "in universal file, ppc slice does not contain ppc mach-o";
1547 break;
1548 case CPU_TYPE_I386:
1549 if ( MachOChecker<x86>::validFile(p + offset) )
1550 MachOChecker<x86>::make(p + offset, size, path);
1551 else
1552 throw "in universal file, i386 slice does not contain i386 mach-o";
1553 break;
1554 case CPU_TYPE_POWERPC64:
1555 if ( MachOChecker<ppc64>::validFile(p + offset) )
1556 MachOChecker<ppc64>::make(p + offset, size, path);
1557 else
1558 throw "in universal file, ppc64 slice does not contain ppc64 mach-o";
1559 break;
1560 case CPU_TYPE_X86_64:
1561 if ( MachOChecker<x86_64>::validFile(p + offset) )
1562 MachOChecker<x86_64>::make(p + offset, size, path);
1563 else
1564 throw "in universal file, x86_64 slice does not contain x86_64 mach-o";
1565 break;
1566#if SUPPORT_ARCH_arm_any
1567 case CPU_TYPE_ARM:
1568 if ( MachOChecker<arm>::validFile(p + offset) )
1569 MachOChecker<arm>::make(p + offset, size, path);
1570 else
1571 throw "in universal file, arm slice does not contain arm mach-o";
1572 break;
1573#endif
1574 default:
1575 throwf("in universal file, unknown architecture slice 0x%x\n", cputype);
1576 }
1577 }
1578 }
1579 else if ( MachOChecker<x86>::validFile(p) ) {
1580 MachOChecker<x86>::make(p, length, path);
1581 }
1582 else if ( MachOChecker<ppc>::validFile(p) ) {
1583 MachOChecker<ppc>::make(p, length, path);
1584 }
1585 else if ( MachOChecker<ppc64>::validFile(p) ) {
1586 MachOChecker<ppc64>::make(p, length, path);
1587 }
1588 else if ( MachOChecker<x86_64>::validFile(p) ) {
1589 MachOChecker<x86_64>::make(p, length, path);
1590 }
1591#if SUPPORT_ARCH_arm_any
1592 else if ( MachOChecker<arm>::validFile(p) ) {
1593 MachOChecker<arm>::make(p, length, path);
1594 }
1595#endif
1596 else {
1597 throw "not a known file type";
1598 }
1599 }
1600 catch (const char* msg) {
1601 throwf("%s in %s", msg, path);
1602 }
1603}
1604
1605
1606int main(int argc, const char* argv[])
1607{
1608 bool progress = false;
1609 int result = 0;
1610 for(int i=1; i < argc; ++i) {
1611 const char* arg = argv[i];
1612 if ( arg[0] == '-' ) {
1613 if ( strcmp(arg, "-progress") == 0 ) {
1614 progress = true;
1615 }
1616 else {
1617 throwf("unknown option: %s\n", arg);
1618 }
1619 }
1620 else {
1621 bool success = true;
1622 try {
1623 check(arg);
1624 }
1625 catch (const char* msg) {
1626 fprintf(stderr, "machocheck failed: %s %s\n", arg, msg);
1627 result = 1;
1628 success = false;
1629 }
1630 if ( success && progress )
1631 printf("ok: %s\n", arg);
1632 }
1633 }
1634
1635 return result;
1636}
1637
1638
1639