]> git.saurik.com Git - apple/ld64.git/blame - src/machochecker.cpp
ld64-47.2.tar.gz
[apple/ld64.git] / src / machochecker.cpp
CommitLineData
d696c285
A
1/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25#include <sys/types.h>
26#include <sys/stat.h>
27#include <sys/mman.h>
28#include <stdarg.h>
29#include <stdio.h>
30#include <fcntl.h>
31#include <unistd.h>
32#include <mach-o/loader.h>
33#include <mach-o/fat.h>
34#include <mach-o/stab.h>
35
36#include <vector>
37
38#include "MachOFileAbstraction.hpp"
39#include "Architectures.hpp"
40
41
42 __attribute__((noreturn))
43void throwf(const char* format, ...)
44{
45 va_list list;
46 char* p;
47 va_start(list, format);
48 vasprintf(&p, format, list);
49 va_end(list);
50
51 const char* t = p;
52 throw t;
53}
54
55
56template <typename A>
57class MachOChecker
58{
59public:
60 static bool validFile(const uint8_t* fileContent);
61 static MachOChecker<A>* make(const uint8_t* fileContent, uint32_t fileLength, const char* path)
62 { return new MachOChecker<A>(fileContent, fileLength, path); }
63 virtual ~MachOChecker() {}
64
65
66private:
67 typedef typename A::P P;
68 typedef typename A::P::E E;
69 typedef typename A::P::uint_t pint_t;
70
71 MachOChecker(const uint8_t* fileContent, uint32_t fileLength, const char* path);
72 void checkMachHeader();
73 void checkLoadCommands();
74 void checkSection(const macho_segment_command<P>* segCmd, const macho_section<P>* sect);
75 uint8_t loadCommandSizeMask();
76 void checkIndirectSymbolTable();
77
78 const char* fPath;
79 const macho_header<P>* fHeader;
80 uint32_t fLength;
81 const char* fStrings;
82 const char* fStringsEnd;
83 const macho_nlist<P>* fSymbols;
84 uint32_t fSymbolCount;
85 const uint32_t* fIndirectTable;
86 uint32_t fIndirectTableCount;
87
88};
89
90
91
92template <>
93bool MachOChecker<ppc>::validFile(const uint8_t* fileContent)
94{
95 const macho_header<P>* header = (const macho_header<P>*)fileContent;
96 if ( header->magic() != MH_MAGIC )
97 return false;
98 if ( header->cputype() != CPU_TYPE_POWERPC )
99 return false;
100 switch (header->filetype()) {
101 case MH_EXECUTE:
102 case MH_DYLIB:
103 case MH_BUNDLE:
104 case MH_DYLINKER:
105 return true;
106 }
107 return false;
108}
109
110template <>
111bool MachOChecker<ppc64>::validFile(const uint8_t* fileContent)
112{
113 const macho_header<P>* header = (const macho_header<P>*)fileContent;
114 if ( header->magic() != MH_MAGIC_64 )
115 return false;
116 if ( header->cputype() != CPU_TYPE_POWERPC64 )
117 return false;
118 switch (header->filetype()) {
119 case MH_EXECUTE:
120 case MH_DYLIB:
121 case MH_BUNDLE:
122 case MH_DYLINKER:
123 return true;
124 }
125 return false;
126}
127
128template <>
129bool MachOChecker<x86>::validFile(const uint8_t* fileContent)
130{
131 const macho_header<P>* header = (const macho_header<P>*)fileContent;
132 if ( header->magic() != MH_MAGIC )
133 return false;
134 if ( header->cputype() != CPU_TYPE_I386 )
135 return false;
136 switch (header->filetype()) {
137 case MH_EXECUTE:
138 case MH_DYLIB:
139 case MH_BUNDLE:
140 case MH_DYLINKER:
141 return true;
142 }
143 return false;
144}
145
146
147
148template <> uint8_t MachOChecker<ppc>::loadCommandSizeMask() { return 0x03; }
149template <> uint8_t MachOChecker<ppc64>::loadCommandSizeMask() { return 0x07; }
150template <> uint8_t MachOChecker<x86>::loadCommandSizeMask() { return 0x03; }
151
152
153template <typename A>
154MachOChecker<A>::MachOChecker(const uint8_t* fileContent, uint32_t fileLength, const char* path)
155 : fHeader(NULL), fLength(fileLength), fStrings(NULL), fSymbols(NULL), fSymbolCount(0), fIndirectTableCount(0)
156{
157 // sanity check
158 if ( ! validFile(fileContent) )
159 throw "not a mach-o file that can be checked";
160
161 fPath = strdup(path);
162 fHeader = (const macho_header<P>*)fileContent;
163
164 // sanity check header
165 checkMachHeader();
166
167 // check load commands
168 checkLoadCommands();
169
170 checkIndirectSymbolTable();
171
172}
173
174
175template <typename A>
176void MachOChecker<A>::checkMachHeader()
177{
178 if ( (fHeader->sizeofcmds() + sizeof(macho_header<P>)) > fLength )
179 throw "sizeofcmds in mach_header is larger than file";
180
181 uint32_t flags = fHeader->flags();
182 uint32_t invalidBits = MH_INCRLINK | MH_LAZY_INIT | 0xFFFC0000;
183 if ( flags & invalidBits )
184 throw "invalid bits in mach_header flags";
185
186}
187
188template <typename A>
189void MachOChecker<A>::checkLoadCommands()
190{
191 // check that all load commands fit within the load command space file
192 const uint8_t* const endOfFile = (uint8_t*)fHeader + fLength;
193 const uint8_t* const endOfLoadCommands = (uint8_t*)fHeader + sizeof(macho_header<P>) + fHeader->sizeofcmds();
194 const uint32_t cmd_count = fHeader->ncmds();
195 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
196 const macho_load_command<P>* cmd = cmds;
197 for (uint32_t i = 0; i < cmd_count; ++i) {
198 uint32_t size = cmd->cmdsize();
199 if ( (size & this->loadCommandSizeMask()) != 0 )
200 throwf("load command #%d has a unaligned size", i);
201 const uint8_t* endOfCmd = ((uint8_t*)cmd)+cmd->cmdsize();
202 if ( endOfCmd > endOfLoadCommands )
203 throwf("load command #%d extends beyond the end of the load commands", i);
204 if ( endOfCmd > endOfFile )
205 throwf("load command #%d extends beyond the end of the file", i);
206 switch ( cmd->cmd() ) {
207 case macho_segment_command<P>::CMD:
208 case LC_SYMTAB:
209 case LC_UNIXTHREAD:
210 case LC_DYSYMTAB:
211 case LC_LOAD_DYLIB:
212 case LC_ID_DYLIB:
213 case LC_LOAD_DYLINKER:
214 case LC_ID_DYLINKER:
215 case macho_routines_command<P>::CMD:
216 case LC_SUB_FRAMEWORK:
217 case LC_SUB_UMBRELLA:
218 case LC_SUB_CLIENT:
219 case LC_TWOLEVEL_HINTS:
220 case LC_PREBIND_CKSUM:
221 case LC_LOAD_WEAK_DYLIB:
222 case LC_UUID:
223 break;
224 default:
225 throwf("load command #%d is an unknown kind 0x%X", i, cmd->cmd());
226 }
227 cmd = (const macho_load_command<P>*)endOfCmd;
228 }
229
230 // check segments
231 cmd = cmds;
232 std::vector<std::pair<pint_t, pint_t> > segmentAddressRanges;
233 std::vector<std::pair<pint_t, pint_t> > segmentFileOffsetRanges;
234 const macho_segment_command<P>* linkEditSegment = NULL;
235 for (uint32_t i = 0; i < cmd_count; ++i) {
236 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
237 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
238 if ( segCmd->cmdsize() != (sizeof(macho_segment_command<P>) + segCmd->nsects() * sizeof(macho_section_content<P>)) )
239 throw "invalid segment load command size";
240
241 // see if this overlaps another segment address range
242 uint64_t startAddr = segCmd->vmaddr();
243 uint64_t endAddr = startAddr + segCmd->vmsize();
244 for (typename std::vector<std::pair<pint_t, pint_t> >::iterator it = segmentAddressRanges.begin(); it != segmentAddressRanges.end(); ++it) {
245 if ( it->first < startAddr ) {
246 if ( it->second > startAddr )
247 throw "overlapping segment vm addresses";
248 }
249 else if ( it->first > startAddr ) {
250 if ( it->first < endAddr )
251 throw "overlapping segment vm addresses";
252 }
253 else {
254 throw "overlapping segment vm addresses";
255 }
256 segmentAddressRanges.push_back(std::make_pair<pint_t, pint_t>(startAddr, endAddr));
257 }
258 // see if this overlaps another segment file offset range
259 uint64_t startOffset = segCmd->fileoff();
260 uint64_t endOffset = startOffset + segCmd->filesize();
261 for (typename std::vector<std::pair<pint_t, pint_t> >::iterator it = segmentFileOffsetRanges.begin(); it != segmentFileOffsetRanges.end(); ++it) {
262 if ( it->first < startOffset ) {
263 if ( it->second > startOffset )
264 throw "overlapping segment file data";
265 }
266 else if ( it->first > startOffset ) {
267 if ( it->first < endOffset )
268 throw "overlapping segment file data";
269 }
270 else {
271 throw "overlapping segment file data";
272 }
273 segmentFileOffsetRanges.push_back(std::make_pair<pint_t, pint_t>(startOffset, endOffset));
274 // check is within file bounds
275 if ( (startOffset > fLength) || (endOffset > fLength) )
276 throw "segment file data is past end of file";
277 }
278 // verify it fits in file
279 if ( startOffset > fLength )
280 throw "segment fileoff does not fit in file";
281 if ( endOffset > fLength )
282 throw "segment fileoff+filesize does not fit in file";
283
284 // keep LINKEDIT segment
285 if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 )
286 linkEditSegment = segCmd;
287
288 // check section ranges
289 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
290 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
291 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
292 // check all sections are within segment
293 if ( sect->addr() < startAddr )
294 throwf("section %s vm address not within segment", sect->sectname());
295 if ( (sect->addr()+sect->size()) > endAddr )
296 throwf("section %s vm address not within segment", sect->sectname());
297 if ( (sect->flags() &SECTION_TYPE) != S_ZEROFILL ) {
298 if ( sect->offset() < startOffset )
299 throwf("section %s file offset not within segment", sect->sectname());
300 if ( (sect->offset()+sect->size()) > endOffset )
301 throwf("section %s file offset not within segment", sect->sectname());
302 }
303 checkSection(segCmd, sect);
304 }
305 }
306 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
307 }
308
309 // verify there was a LINKEDIT segment
310 if ( linkEditSegment == NULL )
311 throw "no __LINKEDIT segment";
312
313 // checks for executables
314 bool isStaticExecutable = false;
315 if ( fHeader->filetype() == MH_EXECUTE ) {
316 isStaticExecutable = true;
317 cmd = cmds;
318 for (uint32_t i = 0; i < cmd_count; ++i) {
319 switch ( cmd->cmd() ) {
320 case LC_LOAD_DYLINKER:
321 // the existence of a dyld load command makes a executable dynamic
322 isStaticExecutable = false;
323 break;
324 }
325 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
326 }
327 if ( isStaticExecutable ) {
328 if ( fHeader->flags() != MH_NOUNDEFS )
329 throw "invalid bits in mach_header flags for static executable";
330 }
331 }
332
333 // check LC_SYMTAB and LC_DYSYMTAB
334 cmd = cmds;
335 bool foundDynamicSymTab = false;
336 for (uint32_t i = 0; i < cmd_count; ++i) {
337 switch ( cmd->cmd() ) {
338 case LC_SYMTAB:
339 {
340 const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
341 fSymbolCount = symtab->nsyms();
342 fSymbols = (const macho_nlist<P>*)((char*)fHeader + symtab->symoff());
343 if ( symtab->symoff() < linkEditSegment->fileoff() )
344 throw "symbol table not in __LINKEDIT";
345 if ( (symtab->symoff() + fSymbolCount*sizeof(macho_nlist<P>*)) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
346 throw "symbol table end not in __LINKEDIT";
347 fStrings = (char*)fHeader + symtab->stroff();
348 fStringsEnd = fStrings + symtab->strsize();
349 if ( symtab->stroff() < linkEditSegment->fileoff() )
350 throw "string pool not in __LINKEDIT";
351 if ( (symtab->stroff()+symtab->strsize()) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
352 throw "string pool extends beyond __LINKEDIT";
353 }
354 break;
355 case LC_DYSYMTAB:
356 {
357 if ( isStaticExecutable )
358 throw "LC_DYSYMTAB should not be used in static executable";
359 foundDynamicSymTab = true;
360 const macho_dysymtab_command<P>* dsymtab = (struct macho_dysymtab_command<P>*)cmd;
361 fIndirectTable = (uint32_t*)((char*)fHeader + dsymtab->indirectsymoff());
362 fIndirectTableCount = dsymtab->nindirectsyms();
363 if ( dsymtab->indirectsymoff() < linkEditSegment->fileoff() )
364 throw "indirect symbol table not in __LINKEDIT";
365 if ( (dsymtab->indirectsymoff()+fIndirectTableCount*8) > (linkEditSegment->fileoff()+linkEditSegment->filesize()) )
366 throw "indirect symbol table not in __LINKEDIT";
367 }
368 break;
369 }
370 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
371 }
372 if ( !isStaticExecutable && !foundDynamicSymTab )
373 throw "missing dynamic symbol table";
374 if ( fStrings == NULL )
375 throw "missing symbol table";
376
377
378
379}
380
381template <typename A>
382void MachOChecker<A>::checkSection(const macho_segment_command<P>* segCmd, const macho_section<P>* sect)
383{
384 uint8_t sectionType = (sect->flags() & SECTION_TYPE);
385 if ( sectionType == S_ZEROFILL ) {
386 if ( sect->offset() != 0 )
387 throwf("section offset should be zero for zero-fill section %s", sect->sectname());
388 }
389
390 // more section tests here
391}
392
393template <typename A>
394void MachOChecker<A>::checkIndirectSymbolTable()
395{
396 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
397 const uint32_t cmd_count = fHeader->ncmds();
398 const macho_load_command<P>* cmd = cmds;
399 for (uint32_t i = 0; i < cmd_count; ++i) {
400 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
401 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
402 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
403 const macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
404 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
405 // make sure all magic sections that use indirect symbol table fit within it
406 uint32_t start = 0;
407 uint32_t elementSize = 0;
408 switch ( sect->flags() & SECTION_TYPE ) {
409 case S_SYMBOL_STUBS:
410 elementSize = sect->reserved2();
411 start = sect->reserved1();
412 break;
413 case S_LAZY_SYMBOL_POINTERS:
414 case S_NON_LAZY_SYMBOL_POINTERS:
415 elementSize = sizeof(pint_t);
416 start = sect->reserved1();
417 break;
418 }
419 if ( elementSize != 0 ) {
420 uint32_t count = sect->size() / elementSize;
421 if ( (count*elementSize) != sect->size() )
422 throwf("%s section size is not an even multiple of element size", sect->sectname());
423 if ( (start+count) > fIndirectTableCount )
424 throwf("%s section references beyond end of indirect symbol table (%d > %d)", sect->sectname(), start+count, fIndirectTableCount );
425 }
426 }
427 }
428 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
429 }
430}
431
432
433static void check(const char* path)
434{
435 struct stat stat_buf;
436
437 try {
438 int fd = ::open(path, O_RDONLY, 0);
439 if ( fd == -1 )
440 throw "cannot open file";
441 ::fstat(fd, &stat_buf);
442 uint32_t length = stat_buf.st_size;
443 uint8_t* p = (uint8_t*)::mmap(NULL, stat_buf.st_size, PROT_READ, MAP_FILE, fd, 0);
444 ::close(fd);
445 const mach_header* mh = (mach_header*)p;
446 if ( mh->magic == OSSwapBigToHostInt32(FAT_MAGIC) ) {
447 const struct fat_header* fh = (struct fat_header*)p;
448 const struct fat_arch* archs = (struct fat_arch*)(p + sizeof(struct fat_header));
449 for (unsigned long i=0; i < fh->nfat_arch; ++i) {
450 if ( archs[i].cputype == CPU_TYPE_POWERPC ) {
451 if ( MachOChecker<ppc>::validFile(p + archs[i].offset) )
452 MachOChecker<ppc>::make(p + archs[i].offset, archs[i].size, path);
453 else
454 throw "in universal file, ppc slice does not contain ppc mach-o";
455 }
456 else if ( archs[i].cputype == CPU_TYPE_I386 ) {
457 if ( MachOChecker<x86>::validFile(p + archs[i].offset) )
458 MachOChecker<x86>::make(p + archs[i].offset, archs[i].size, path);
459 else
460 throw "in universal file, i386 slice does not contain i386 mach-o";
461 }
462 else if ( archs[i].cputype == CPU_TYPE_POWERPC64 ) {
463 if ( MachOChecker<ppc64>::validFile(p + archs[i].offset) )
464 MachOChecker<ppc64>::make(p + archs[i].offset, archs[i].size, path);
465 else
466 throw "in universal file, ppc64 slice does not contain ppc64 mach-o";
467 }
468 else {
469 throw "in universal file, unknown architecture slice";
470 }
471 }
472 }
473 else if ( MachOChecker<x86>::validFile(p) ) {
474 MachOChecker<x86>::make(p, length, path);
475 }
476 else if ( MachOChecker<ppc>::validFile(p) ) {
477 MachOChecker<ppc>::make(p, length, path);
478 }
479 else if ( MachOChecker<ppc64>::validFile(p) ) {
480 MachOChecker<ppc64>::make(p, length, path);
481 }
482 else {
483 throw "not a known file type";
484 }
485 }
486 catch (const char* msg) {
487 throwf("%s in %s", msg, path);
488 }
489}
490
491
492int main(int argc, const char* argv[])
493{
494 try {
495 for(int i=1; i < argc; ++i) {
496 const char* arg = argv[i];
497 if ( arg[0] == '-' ) {
498 if ( strcmp(arg, "-no_content") == 0 ) {
499
500 }
501 else {
502 throwf("unknown option: %s\n", arg);
503 }
504 }
505 else {
506 check(arg);
507 }
508 }
509 }
510 catch (const char* msg) {
511 fprintf(stderr, "machocheck failed: %s\n", msg);
512 return 1;
513 }
514
515 return 0;
516}
517
518
519