]> git.saurik.com Git - apple/ld64.git/blame - src/MachOReaderRelocatable.hpp
ld64-47.2.tar.gz
[apple/ld64.git] / src / MachOReaderRelocatable.hpp
CommitLineData
d696c285
A
1/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25#ifndef __OBJECT_FILE_MACH_O__
26#define __OBJECT_FILE_MACH_O__
27
28#include <stdint.h>
29#include <math.h>
30#include <unistd.h>
31#include <sys/param.h>
32#include <mach-o/ppc/reloc.h>
33#include <mach-o/stab.h>
34#ifndef S_ATTR_DEBUG
35 #define S_ATTR_DEBUG 0x02000000
36#endif
37
38#include <vector>
39#include <set>
40#include <algorithm>
41
42#include "MachOFileAbstraction.hpp"
43#include "Architectures.hpp"
44#include "ObjectFile.h"
45#include "dwarf2.h"
46#include "debugline.h"
47
48
49//
50//
51// To implement architecture xxx, you must write template specializations for the following six methods:
52// Reader<xxx>::validFile()
53// Reader<xxx>::validSectionType()
54// Reader<xxx>::addRelocReference()
55// Reference<xxx>::getDescription()
56//
57//
58
59
60
61extern __attribute__((noreturn)) void throwf(const char* format, ...);
62
63namespace mach_o {
64namespace relocatable {
65
66
67
68// forward reference
69template <typename A> class Reader;
70template <typename A> class SymbolAtomSorter;
71
72struct AtomAndOffset
73{
74 AtomAndOffset(ObjectFile::Atom* a=NULL) : atom(a), offset(0) {}
75 AtomAndOffset(ObjectFile::Atom* a, uint32_t off) : atom(a), offset(off) {}
76 ObjectFile::Atom* atom;
77 uint32_t offset;
78};
79
80
81template <typename A>
82class Reference : public ObjectFile::Reference
83{
84public:
85 typedef typename A::P P;
86 typedef typename A::P::uint_t pint_t;
87 typedef typename A::ReferenceKinds Kinds;
88
89 Reference(Kinds kind, const AtomAndOffset& at, const AtomAndOffset& toTarget);
90 Reference(Kinds kind, const AtomAndOffset& at, const AtomAndOffset& fromTarget, const AtomAndOffset& toTarget);
91 Reference(Kinds kind, const AtomAndOffset& at, const char* toName, uint32_t toOffset);
92
93 virtual ~Reference() {}
94
95
96 virtual bool isTargetUnbound() const { return ( fToTarget.atom == NULL ); }
97 virtual bool isFromTargetUnbound() const { return ( fFromTarget.atom == NULL ); }
98 virtual uint8_t getKind() const { return (uint8_t)fKind; }
99 virtual uint64_t getFixUpOffset() const { return fFixUpOffsetInSrc; }
100 virtual const char* getTargetName() const { return (fToTargetName != NULL) ? fToTargetName : fToTarget.atom->getName(); }
101 virtual ObjectFile::Atom& getTarget() const { return *fToTarget.atom; }
102 virtual uint64_t getTargetOffset() const { return fToTarget.offset; }
103 virtual bool hasFromTarget() const { return ( (fFromTarget.atom != NULL) || (fFromTargetName != NULL) ); }
104 virtual ObjectFile::Atom& getFromTarget() const { return *fFromTarget.atom; }
105 virtual const char* getFromTargetName() const { return (fFromTargetName != NULL) ? fFromTargetName : fFromTarget.atom->getName(); }
106 virtual void setTarget(ObjectFile::Atom& target, uint64_t offset) { fToTarget.atom = &target; fToTarget.offset = offset; }
107 virtual void setToTargetOffset(uint64_t offset) { fToTarget.offset = offset; }
108 virtual void setFromTarget(ObjectFile::Atom& target) { fFromTarget.atom = &target; }
109 virtual void setFromTargetName(const char* name) { fFromTargetName = name; }
110 virtual void setFromTargetOffset(uint64_t offset) { fFromTarget.offset = offset; }
111 virtual const char* getDescription() const;
112 virtual uint64_t getFromTargetOffset() const { return fFromTarget.offset; }
113
114
115private:
116 pint_t fFixUpOffsetInSrc;
117 AtomAndOffset fToTarget;
118 AtomAndOffset fFromTarget;
119 const char* fToTargetName;
120 const char* fFromTargetName;
121 Kinds fKind;
122};
123
124
125template <typename A>
126Reference<A>::Reference(Kinds kind, const AtomAndOffset& at, const AtomAndOffset& toTarget)
127 : fFixUpOffsetInSrc(at.offset), fToTarget(toTarget), fToTargetName(NULL), fFromTargetName(NULL),
128 fKind(kind)
129{
130 // make reference a by-name where needed
131 if ( (kind != A::kNoFixUp) && (kind != A::kFollowOn) && (toTarget.atom->getScope() != ObjectFile::Atom::scopeTranslationUnit) ) {
132 fToTargetName = toTarget.atom->getName();
133 //fprintf(stderr, "Reference(): changing to by-name %p %s, target scope=%d\n", toTarget.atom, fToTargetName, toTarget.atom->getScope());
134 fToTarget.atom = NULL;
135 }
136 ((class BaseAtom*)at.atom)->addReference(this);
137 //fprintf(stderr, "Reference(): %p fToTarget<%s, %08X>\n", this, (fToTarget.atom != NULL) ? fToTarget.atom->getDisplayName() : fToTargetName , fToTarget.offset);
138}
139
140template <typename A>
141Reference<A>::Reference(Kinds kind, const AtomAndOffset& at, const AtomAndOffset& fromTarget, const AtomAndOffset& toTarget)
142 : fFixUpOffsetInSrc(at.offset), fToTarget(toTarget), fFromTarget(fromTarget),
143 fToTargetName(NULL), fFromTargetName(NULL), fKind(kind)
144{
145 // make reference a by-name where needed
146 if ( (kind != A::kNoFixUp) && (kind != A::kFollowOn)
147 && (toTarget.atom->getScope() != ObjectFile::Atom::scopeTranslationUnit)
148 && (toTarget.atom != at.atom) ) {
149 fToTargetName = toTarget.atom->getName();
150 fToTarget.atom = NULL;
151 }
152 ((class BaseAtom*)at.atom)->addReference(this);
153 //fprintf(stderr, "Reference(): %p kind=%d, fToTarget<%s, %08X>, fromTarget<%s, %08X>\n", this, kind,
154 // this->getTargetName(), fToTarget.offset, this->getFromTargetName(), fromTarget.offset);
155}
156
157template <typename A>
158Reference<A>::Reference(Kinds kind, const AtomAndOffset& at, const char* toName, uint32_t toOffset)
159 : fFixUpOffsetInSrc(at.offset),
160 fToTargetName(toName), fFromTargetName(NULL), fKind(kind)
161{
162 fToTarget.offset = toOffset;
163 ((class BaseAtom*)at.atom)->addReference(this);
164}
165
166
167template <typename A>
168class Segment : public ObjectFile::Segment
169{
170public:
171 Segment(const macho_section<typename A::P>* sect);
172 virtual const char* getName() const { return fSection->segname(); }
173 virtual bool isContentReadable() const { return true; }
174 virtual bool isContentWritable() const { return fWritable; }
175 virtual bool isContentExecutable() const { return fExecutable; }
176private:
177 const macho_section<typename A::P>* fSection;
178 bool fWritable;
179 bool fExecutable;
180};
181
182template <typename A>
183Segment<A>::Segment(const macho_section<typename A::P>* sect)
184 : fSection(sect), fWritable(false), fExecutable(false)
185{
186 if ( strcmp(fSection->segname(), "__DATA") == 0 ) {
187 fWritable = true;
188 }
189 else if ( strcmp(fSection->segname(), "__OBJC") == 0 ) {
190 fWritable = true;
191 }
192 else if ( strcmp(fSection->segname(), "__TEXT") == 0 ) {
193 fExecutable = true;
194 }
195 else if ( strcmp(fSection->segname(), "__IMPORT") == 0 ) {
196 fWritable = true;
197 fExecutable = true;
198 }
199}
200
201
202class DataSegment : public ObjectFile::Segment
203{
204public:
205 virtual const char* getName() const { return "__DATA"; }
206 virtual bool isContentReadable() const { return true; }
207 virtual bool isContentWritable() const { return true; }
208 virtual bool isContentExecutable() const { return false; }
209
210 static DataSegment fgSingleton;
211};
212
213DataSegment DataSegment::fgSingleton;
214
215
216class BaseAtom : public ObjectFile::Atom
217{
218public:
219 BaseAtom() : fStabsStartIndex(0), fStabsCount(0) {}
220
221 virtual void setSize(uint64_t size) = 0;
222 virtual void addReference(ObjectFile::Reference* ref) = 0;
223 virtual void addLineInfo(const ObjectFile::LineInfo& info) = 0;
224 virtual void alignAtLeast(uint8_t align) = 0;
225
226 uint32_t fStabsStartIndex;
227 uint32_t fStabsCount;
228};
229
230
231//
232// A SymbolAtom represents a chunk of a mach-o object file that has a symbol table entry
233// pointing to it. A C function or global variable is represented by one of these atoms.
234//
235//
236template <typename A>
237class SymbolAtom : public BaseAtom
238{
239public:
240 virtual ObjectFile::Reader* getFile() const { return &fOwner; }
241 virtual bool getTranslationUnitSource(const char** dir, const char** name) const
242 { return fOwner.getTranslationUnitSource(dir, name); }
243 virtual const char* getName() const { return &fOwner.fStrings[fSymbol->n_strx()]; }
244 virtual const char* getDisplayName() const { return getName(); }
245 virtual ObjectFile::Atom::Scope getScope() const { return fScope; }
246 virtual ObjectFile::Atom::DefinitionKind getDefinitionKind() const { return ((fSymbol->n_desc() & N_WEAK_DEF) != 0)
247 ? ObjectFile::Atom::kWeakDefinition : ObjectFile::Atom::kRegularDefinition; }
248 virtual SymbolTableInclusion getSymbolTableInclusion() const { return ((fSymbol->n_desc() & REFERENCED_DYNAMICALLY) != 0)
249 ? ObjectFile::Atom::kSymbolTableInAndNeverStrip : ObjectFile::Atom::kSymbolTableIn; }
250 virtual bool isZeroFill() const { return ((fSection->flags() & SECTION_TYPE) == S_ZEROFILL); }
251 virtual uint64_t getSize() const { return fSize; }
252 virtual std::vector<ObjectFile::Reference*>& getReferences() const { return (std::vector<ObjectFile::Reference*>&)(fReferences); }
253 virtual bool mustRemainInSection() const { return true; }
254 virtual const char* getSectionName() const;
255 virtual Segment<A>& getSegment() const { return *fSegment; }
256 virtual bool requiresFollowOnAtom() const;
257 virtual ObjectFile::Atom& getFollowOnAtom() const;
258 virtual std::vector<ObjectFile::LineInfo>* getLineInfo() const { return (std::vector<ObjectFile::LineInfo>*)&fLineInfo; }
259 virtual uint8_t getAlignment() const { return fAlignment; }
260 virtual void copyRawContent(uint8_t buffer[]) const;
261 virtual void setScope(ObjectFile::Atom::Scope newScope) { fScope = newScope; }
262 virtual void setSize(uint64_t size);
263 virtual void addReference(ObjectFile::Reference* ref) { fReferences.insert(fReferences.begin(), (Reference<A>*)ref); }
264 virtual void addLineInfo(const ObjectFile::LineInfo& info) { fLineInfo.push_back(info); }
265 virtual void alignAtLeast(uint8_t align) { fAlignment = std::max(align, fAlignment); }
266
267protected:
268 typedef typename A::P P;
269 typedef typename A::P::E E;
270 typedef typename A::P::uint_t pint_t;
271 typedef typename A::ReferenceKinds Kinds;
272 typedef typename std::vector<Reference<A>*> ReferenceVector;
273 typedef typename ReferenceVector::iterator ReferenceVectorIterator; // seems to help C++ parser
274 typedef typename ReferenceVector::const_iterator ReferenceVectorConstIterator; // seems to help C++ parser
275 friend class Reader<A>;
276 friend class SymbolAtomSorter<A>;
277
278 SymbolAtom(Reader<A>&, const macho_nlist<P>*, const macho_section<P>*);
279 virtual ~SymbolAtom() {}
280
281 Reader<A>& fOwner;
282 const macho_nlist<P>* fSymbol;
283 pint_t fAddress;
284 pint_t fSize;
285 const macho_section<P>* fSection;
286 Segment<A>* fSegment;
287 ReferenceVector fReferences;
288 std::vector<ObjectFile::LineInfo> fLineInfo;
289 ObjectFile::Atom::Scope fScope;
290 uint8_t fAlignment;
291};
292
293
294template <typename A>
295SymbolAtom<A>::SymbolAtom(Reader<A>& owner, const macho_nlist<P>* symbol, const macho_section<P>* section)
296 : fOwner(owner), fSymbol(symbol), fAddress(0), fSize(0), fSection(section), fSegment(NULL), fAlignment(0)
297{
298 uint8_t type = symbol->n_type();
299 if ( (type & N_EXT) == 0 )
300 fScope = ObjectFile::Atom::scopeTranslationUnit;
301 else if ( (type & N_PEXT) != 0 )
302 fScope = ObjectFile::Atom::scopeLinkageUnit;
303 else
304 fScope = ObjectFile::Atom::scopeGlobal;
305 if ( (type & N_TYPE) == N_SECT ) {
306 // real definition
307 fSegment = new Segment<A>(fSection);
308 fAddress = fSymbol->n_value();
309 if ( (fSymbol->n_desc() & N_NO_DEAD_STRIP) != 0 )
310 this->setDontDeadStrip();
311 }
312 else {
313 printf("unknown symbol type: %d\n", type);
314 }
315 //fprintf(stderr, "SymbolAtom(%p) %s fAddress=0x%X\n", this, this->getDisplayName(), (uint32_t)fAddress);
316 // support for .o files built with old ld64
317 if ( (fSymbol->n_desc() & N_WEAK_DEF) && (strcmp(fSection->sectname(),"__picsymbolstub1__TEXT") == 0) ) {
318 const char* name = this->getName();
319 const int nameLen = strlen(name);
320 if ( (nameLen > 6) && strcmp(&name[nameLen-5], "$stub") == 0 ) {
321 // switch symbol to point at name that does not have trailing $stub
322 char correctName[nameLen];
323 strncpy(correctName, name, nameLen-5);
324 correctName[nameLen-5] = '\0';
325 const macho_nlist<P>* symbolsStart = fOwner.fSymbols;
326 const macho_nlist<P>* symbolsEnd = &symbolsStart[fOwner.fSymbolCount];
327 for(const macho_nlist<P>* s = symbolsStart; s < symbolsEnd; ++s) {
328 if ( strcmp(&fOwner.fStrings[s->n_strx()], correctName) == 0 ) {
329 fSymbol = s;
330 break;
331 }
332 }
333 }
334 }
335 // support for labeled stubs
336 switch ( section->flags() & SECTION_TYPE ) {
337 case S_SYMBOL_STUBS:
338 setSize(section->reserved2());
339 break;
340 case S_LAZY_SYMBOL_POINTERS:
341 case S_NON_LAZY_SYMBOL_POINTERS:
342 setSize(sizeof(pint_t));
343 break;
344 case S_4BYTE_LITERALS:
345 setSize(4);
346 break;
347 case S_8BYTE_LITERALS:
348 setSize(8);
349 break;
350 case S_CSTRING_LITERALS:
351 setSize(strlen((char*)(fOwner.fHeader) + section->offset() + fAddress - section->addr()) + 1);
352 case S_REGULAR:
353 case S_ZEROFILL:
354 case S_COALESCED:
355 // size calculate later after next atom is found
356 break;
357 }
358}
359
360
361template <typename A>
362void SymbolAtom<A>::setSize(uint64_t size)
363{
364 fSize = size;
365
366 // Try to compute the alignment base on the address aligned at in object file and the size
367 uint8_t sizeAlign = __builtin_ctz(fSize);
368 uint8_t sizeAndSectAlign = std::min((uint8_t)fSection->align(), sizeAlign);
369 // If address is zero, can't figure out better alignment than section alignment and size
370 if ( fAddress == 0 )
371 fAlignment = sizeAndSectAlign;
372 else
373 fAlignment = std::min((uint8_t)__builtin_ctz(fAddress), sizeAndSectAlign);
374}
375
376
377template <typename A>
378const char* SymbolAtom<A>::getSectionName() const
379{
380 if ( strlen(fSection->sectname()) > 15 ) {
381 static char temp[18];
382 strncpy(temp, fSection->sectname(), 16);
383 temp[17] = '\0';
384 return temp;
385 }
386 return fSection->sectname();
387}
388
389template <typename A>
390bool SymbolAtom<A>::requiresFollowOnAtom() const
391{
392 // requires follow-on if built with old compiler and not the last atom
393 if ( (fOwner.fHeader->flags() & MH_SUBSECTIONS_VIA_SYMBOLS) == 0) {
394 for (ReferenceVectorConstIterator it=fReferences.begin(); it != fReferences.end(); it++) {
395 Reference<A>* ref = *it;
396 if ( ref->getKind() == A::kFollowOn )
397 return true;
398 }
399 }
400 return false;
401}
402
403template <typename A>
404ObjectFile::Atom& SymbolAtom<A>::getFollowOnAtom() const
405{
406 for (ReferenceVectorConstIterator it=fReferences.begin(); it != fReferences.end(); it++) {
407 Reference<A>* ref = *it;
408 if ( ref->getKind() == A::kFollowOn )
409 return ref->getTarget();
410 }
411 return *((ObjectFile::Atom*)NULL);
412}
413
414
415
416
417template <typename A>
418void SymbolAtom<A>::copyRawContent(uint8_t buffer[]) const
419{
420 // copy base bytes
421 if ( isZeroFill() )
422 bzero(buffer, fSize);
423 else {
424 uint32_t fileOffset = fSection->offset() - fSection->addr() + fAddress;
425 memcpy(buffer, (char*)(fOwner.fHeader)+fileOffset, fSize);
426 }
427}
428
429
430template <typename A>
431class SymbolAtomSorter
432{
433public:
434 SymbolAtomSorter(std::map<uint32_t, BaseAtom*>& map) : fMap(map) {}
435
436 typedef typename A::P::uint_t pint_t;
437
438 bool operator()(ObjectFile::Atom* left, ObjectFile::Atom* right)
439 {
440 pint_t leftAddr = ((SymbolAtom<A>*)left)->fAddress;
441 pint_t rightAddr = ((SymbolAtom<A>*)right)->fAddress;
442 if ( leftAddr == rightAddr ) {
443 // two atoms with same address, must have been a function with multiple labels
444 // make sure we sort these so the one with real content (in map) is last
445 std::map<uint32_t, BaseAtom*>::iterator pos = fMap.find(leftAddr);
446 if ( pos != fMap.end() ) {
447 return ( pos->second == right );
448 }
449 return false;
450 }
451 else {
452 return ( leftAddr < rightAddr );
453 }
454 }
455private:
456 std::map<uint32_t, BaseAtom*>& fMap;
457};
458
459
460//
461// A TentativeAtom represents a C "common" or "tentative" defintion of data.
462// For instance, "int foo;" is neither a declaration or a definition and
463// is represented by a TentativeAtom.
464//
465template <typename A>
466class TentativeAtom : public BaseAtom
467{
468public:
469 virtual ObjectFile::Reader* getFile() const { return &fOwner; }
470 virtual bool getTranslationUnitSource(const char** dir, const char** name) const
471 { return fOwner.getTranslationUnitSource(dir, name); }
472 virtual const char* getName() const { return &fOwner.fStrings[fSymbol->n_strx()]; }
473 virtual const char* getDisplayName() const { return getName(); }
474 virtual ObjectFile::Atom::Scope getScope() const { return fScope; }
475 virtual ObjectFile::Atom::DefinitionKind getDefinitionKind() const { return ObjectFile::Atom::kTentativeDefinition; }
476 virtual bool isZeroFill() const { return true; }
477 virtual SymbolTableInclusion getSymbolTableInclusion() const { return ((fSymbol->n_desc() & REFERENCED_DYNAMICALLY) != 0)
478 ? ObjectFile::Atom::kSymbolTableInAndNeverStrip : ObjectFile::Atom::kSymbolTableIn; }
479 virtual uint64_t getSize() const { return fSymbol->n_value(); }
480 virtual std::vector<ObjectFile::Reference*>& getReferences() const { return fgNoReferences; }
481 virtual bool mustRemainInSection() const { return true; }
482 virtual const char* getSectionName() const { return "__common"; }
483 virtual ObjectFile::Segment& getSegment() const { return DataSegment::fgSingleton; }
484 virtual bool requiresFollowOnAtom() const { return false; }
485 virtual ObjectFile::Atom& getFollowOnAtom() const { return *(ObjectFile::Atom*)NULL; }
486 virtual std::vector<ObjectFile::LineInfo>* getLineInfo() const { return NULL; }
487 virtual uint8_t getAlignment() const;
488 virtual void copyRawContent(uint8_t buffer[]) const;
489 virtual void setScope(ObjectFile::Atom::Scope newScope) { fScope = newScope; }
490 virtual void setSize(uint64_t size) { }
491 virtual void addReference(ObjectFile::Reference* ref) { throw "can't add references"; }
492 virtual void addLineInfo(const ObjectFile::LineInfo& info) { throw "can't add line info to tentative definition"; }
493 virtual void alignAtLeast(uint8_t align) { }
494
495protected:
496 typedef typename A::P P;
497 typedef typename A::P::E E;
498 typedef typename A::P::uint_t pint_t;
499 typedef typename A::ReferenceKinds Kinds;
500 friend class Reader<A>;
501
502 TentativeAtom(Reader<A>&, const macho_nlist<P>*);
503 virtual ~TentativeAtom() {}
504
505 Reader<A>& fOwner;
506 const macho_nlist<P>* fSymbol;
507 ObjectFile::Atom::Scope fScope;
508 static std::vector<ObjectFile::Reference*> fgNoReferences;
509};
510
511template <typename A>
512std::vector<ObjectFile::Reference*> TentativeAtom<A>::fgNoReferences;
513
514template <typename A>
515TentativeAtom<A>::TentativeAtom(Reader<A>& owner, const macho_nlist<P>* symbol)
516 : fOwner(owner), fSymbol(symbol)
517{
518 uint8_t type = symbol->n_type();
519 if ( (type & N_EXT) == 0 )
520 fScope = ObjectFile::Atom::scopeTranslationUnit;
521 else if ( (type & N_PEXT) != 0 )
522 fScope = ObjectFile::Atom::scopeLinkageUnit;
523 else
524 fScope = ObjectFile::Atom::scopeGlobal;
525 if ( ((type & N_TYPE) == N_UNDF) && (symbol->n_value() != 0) ) {
526 // tentative definition
527 }
528 else {
529 printf("unknown symbol type: %d\n", type);
530 }
531 //fprintf(stderr, "TentativeAtom(%p) %s\n", this, this->getDisplayName());
532}
533
534
535template <typename A>
536uint8_t TentativeAtom<A>::getAlignment() const
537{
538 // common symbols align to their size
539 // that is, a 4-byte common aligns to 4-bytes
540 // to be safe, odd size commons align to the next power-of-2 size
541 uint8_t alignment = (uint8_t)ceil(log2(this->getSize()));
542 // limit alignment of extremely large commons to 2^15 bytes (8-page)
543 if ( alignment < 15 )
544 return alignment;
545 else
546 return 15;
547}
548
549template <typename A>
550void TentativeAtom<A>::copyRawContent(uint8_t buffer[]) const
551{
552 bzero(buffer, getSize());
553}
554
555
556//
557// An AnonymousAtom represents compiler generated data that has no name.
558// For instance, a literal C-string or a 64-bit floating point constant
559// is represented by an AnonymousAtom.
560//
561template <typename A>
562class AnonymousAtom : public BaseAtom
563{
564public:
565 virtual ObjectFile::Reader* getFile() const { return &fOwner; }
566 virtual bool getTranslationUnitSource(const char** dir, const char** name) const { return false; }
567 virtual const char* getName() const { return fSynthesizedName; }
568 virtual const char* getDisplayName() const;
569 virtual ObjectFile::Atom::Scope getScope() const;
570 virtual ObjectFile::Atom::DefinitionKind getDefinitionKind() const;
571 virtual ObjectFile::Atom::SymbolTableInclusion getSymbolTableInclusion() const { return ObjectFile::Atom::kSymbolTableNotIn; }
572 virtual bool isZeroFill() const;
573 virtual uint64_t getSize() const { return fSize; }
574 virtual std::vector<ObjectFile::Reference*>& getReferences() const { return (std::vector<ObjectFile::Reference*>&)(fReferences); }
575 virtual bool mustRemainInSection() const { return true; }
576 virtual const char* getSectionName() const;
577 virtual Segment<A>& getSegment() const { return *fSegment; }
578 virtual bool requiresFollowOnAtom() const;
579 virtual ObjectFile::Atom& getFollowOnAtom() const;
580 virtual std::vector<ObjectFile::LineInfo>* getLineInfo() const { return NULL; }
581 virtual uint8_t getAlignment() const;
582 virtual void copyRawContent(uint8_t buffer[]) const;
583 virtual void setScope(ObjectFile::Atom::Scope newScope) { }
584 virtual void setSize(uint64_t size) { fSize = size; }
585 virtual void addReference(ObjectFile::Reference* ref) { fReferences.insert(fReferences.begin(), (Reference<A>*)ref); }
586 virtual void addLineInfo(const ObjectFile::LineInfo& info) { fprintf(stderr, "can't add line info to anonymous symbol %s\n", this->getDisplayName()); }
587 virtual void alignAtLeast(uint8_t align) { }
588 BaseAtom* redirectTo() { return fRedirect; }
589 bool isWeakImportStub() { return fWeakImportStub; }
590
591protected:
592 typedef typename A::P P;
593 typedef typename A::P::E E;
594 typedef typename A::P::uint_t pint_t;
595 typedef typename A::ReferenceKinds Kinds;
596 typedef typename std::vector<Reference<A>*> ReferenceVector;
597 typedef typename ReferenceVector::iterator ReferenceVectorIterator; // seems to help C++ parser
598 typedef typename ReferenceVector::const_iterator ReferenceVectorConstIterator; // seems to help C++ parser
599 friend class Reader<A>;
600
601 AnonymousAtom(Reader<A>&, const macho_section<P>*, uint32_t addr, uint32_t size);
602 virtual ~AnonymousAtom() {}
603
604 Reader<A>& fOwner;
605 const char* fSynthesizedName;
606 const macho_section<P>* fSection;
607 uint32_t fAddress;
608 uint32_t fSize;
609 Segment<A>* fSegment;
610 ReferenceVector fReferences;
611 BaseAtom* fRedirect;
612 bool fWeakImportStub;
613 bool fReallyNonLazyPointer; // HACK until compiler stops emitting anonymous non-lazy pointers
614};
615
616template <typename A>
617AnonymousAtom<A>::AnonymousAtom(Reader<A>& owner, const macho_section<P>* section, uint32_t addr, uint32_t size)
618 : fOwner(owner), fSynthesizedName(NULL), fSection(section), fAddress(addr), fSize(size), fSegment(NULL),
619 fWeakImportStub(false), fReallyNonLazyPointer(false)
620{
621 fSegment = new Segment<A>(fSection);
622 fRedirect = this;
623 uint8_t type = fSection->flags() & SECTION_TYPE;
624 switch ( type ) {
625 case S_ZEROFILL:
626 {
627 asprintf((char**)&fSynthesizedName, "zero-fill-at-0x%08X", addr);
628 }
629 break;
630 case S_REGULAR:
631 // handle .o files created by old ld64 -r that are missing cstring section type
632 if ( strcmp(fSection->sectname(), "__cstring") != 0 )
633 break;
634 // else fall into cstring case
635 case S_CSTRING_LITERALS:
636 {
637 const char* str = (char*)(owner.fHeader) + section->offset() + addr - section->addr();
638 asprintf((char**)&fSynthesizedName, "cstring=%s", str);
639 }
640 break;
641 case S_4BYTE_LITERALS:
642 {
643 uint32_t value = E::get32(*(uint32_t*)(((uint8_t*)owner.fHeader) + section->offset() + addr - section->addr()));
644 asprintf((char**)&fSynthesizedName, "4-byte-literal=0x%08X", value);
645 }
646 break;
647 case S_8BYTE_LITERALS:
648 {
649 uint64_t value = E::get64(*(uint64_t*)(((uint8_t*)owner.fHeader) + section->offset() + addr - section->addr()));
650 asprintf((char**)&fSynthesizedName, "8-byte-literal=0x%016llX", value);
651 }
652 break;
653 case S_LITERAL_POINTERS:
654 {
655 // FIX FIX, we need the name to include the name of the target so that we can coalesce them
656 asprintf((char**)&fSynthesizedName, "literal-pointer@%d", addr - (uint32_t)fSection->addr());
657 }
658 break;
659 case S_MOD_INIT_FUNC_POINTERS:
660 asprintf((char**)&fSynthesizedName, "initializer$%d", (addr - (uint32_t)fSection->addr())/sizeof(pint_t));
661 break;
662 case S_MOD_TERM_FUNC_POINTERS:
663 asprintf((char**)&fSynthesizedName, "terminator$%d", (addr - (uint32_t)fSection->addr())/sizeof(pint_t));
664 break;
665 case S_SYMBOL_STUBS:
666 {
667 uint32_t index = (fAddress - fSection->addr()) / fSection->reserved2();
668 index += fSection->reserved1();
669 uint32_t symbolIndex = E::get32(fOwner.fIndirectTable[index]);
670 const macho_nlist<P>* sym = &fOwner.fSymbols[symbolIndex];
671 uint32_t strOffset = sym->n_strx();
672 // want name to not have $stub suffix, this is what automatic stub generation expects
673 fSynthesizedName = &fOwner.fStrings[strOffset];
674 // check for weak import
675 fWeakImportStub = fOwner.isWeakImportSymbol(sym);
676 // sometimes the compiler gets confused and generates a stub to a static function
677 // if so, we should redirect any call to the stub to be calls to the real static function atom
678 if ( ((sym->n_type() & N_TYPE) != N_UNDF) && ((sym->n_desc() & N_WEAK_DEF) == 0) ) {
679 BaseAtom* staticAtom = fOwner.findAtomByName(fSynthesizedName);
680 if ( staticAtom != NULL )
681 fRedirect = staticAtom;
682 }
683 }
684 break;
685 case S_LAZY_SYMBOL_POINTERS:
686 case S_NON_LAZY_SYMBOL_POINTERS:
687 {
688 uint32_t index = (fAddress - fSection->addr()) / sizeof(pint_t);
689 index += fSection->reserved1();
690 uint32_t symbolIndex = E::get32(fOwner.fIndirectTable[index]);
691 if ( symbolIndex == INDIRECT_SYMBOL_LOCAL ) {
692 // Silly codegen with non-lazy pointer to a local symbol
693 // All atoms not created yet, so we need to scan symbol table
694 uint32_t fileOffset = fSection->offset() - fSection->addr() + fAddress;
695 pint_t nonLazyPtrValue = P::getP(*((pint_t*)((char*)(fOwner.fHeader)+fileOffset)));
696 const macho_nlist<P>* end = &fOwner.fSymbols[fOwner.fSymbolCount];
697 for (const macho_nlist<P>* sym = fOwner.fSymbols; sym < end; ++sym) {
698 if ( ((sym->n_type() & N_TYPE) == N_SECT) && (sym->n_value() == nonLazyPtrValue) ) {
699 const char* name = &fOwner.fStrings[sym->n_strx()];
700 char* str = new char[strlen(name)+16];
701 strcpy(str, name);
702 strcat(str, "$non_lazy_ptr");
703 fSynthesizedName = str;
704 // add direct reference to target later, because its atom may not be constructed yet
705 fOwner.fLocalNonLazys.push_back(this);
706 return;
707 }
708 }
709 throwf("malformed .o file: non-lazy-pointer with value 0x%08X missing symbol", nonLazyPtrValue);
710 }
711 const macho_nlist<P>* targetSymbol = &fOwner.fSymbols[symbolIndex];
712 const char* name = &fOwner.fStrings[targetSymbol->n_strx()];
713 char* str = new char[strlen(name)+16];
714 strcpy(str, name);
715 if ( type == S_LAZY_SYMBOL_POINTERS )
716 strcat(str, "$lazy_ptr");
717 else
718 strcat(str, "$non_lazy_ptr");
719 fSynthesizedName = str;
720
721 if ( fOwner.isWeakImportSymbol(targetSymbol) )
722 new Reference<A>(A::kPointerWeakImport, AtomAndOffset(this), name, 0);
723 else
724 new Reference<A>(A::kPointer, AtomAndOffset(this), name, 0);
725 }
726 break;
727 default:
728 throwf("section type %d not supported with address=0x%08X", type, addr);
729 }
730 //fprintf(stderr, "AnonymousAtom(%p) %s \n", this, this->getDisplayName());
731}
732
733
734template <typename A>
735const char* AnonymousAtom<A>::getDisplayName() const
736{
737 if ( fSynthesizedName != NULL )
738 return fSynthesizedName;
739
740 static char temp[512];
741 if ( (fSection->flags() & SECTION_TYPE) == S_CSTRING_LITERALS ) {
742 uint32_t fileOffset = fSection->offset() - fSection->addr() + fAddress;
743 sprintf(temp, "atom string literal: \"%s\"", (char*)(fOwner.fHeader)+fileOffset);
744 }
745 else {
746 sprintf(temp, "%s@%d", fSection->sectname(), fAddress - (uint32_t)fSection->addr() );
747 }
748 return temp;
749}
750
751template <typename A>
752ObjectFile::Atom::Scope AnonymousAtom<A>::getScope() const
753{
754 if ( fReallyNonLazyPointer )
755 return ObjectFile::Atom::scopeLinkageUnit;
756 // in order for literals to be coalesced they must be scoped to linkage unit
757 switch ( fSection->flags() & SECTION_TYPE ) {
758 case S_CSTRING_LITERALS:
759 case S_4BYTE_LITERALS:
760 case S_8BYTE_LITERALS:
761 case S_SYMBOL_STUBS:
762 case S_NON_LAZY_SYMBOL_POINTERS:
763 return ObjectFile::Atom::scopeLinkageUnit;
764 default:
765 return ObjectFile::Atom::scopeTranslationUnit;
766 }
767}
768
769template <typename A>
770ObjectFile::Atom::DefinitionKind AnonymousAtom<A>::getDefinitionKind() const
771{
772 if ( fReallyNonLazyPointer )
773 return ObjectFile::Atom::kWeakDefinition;
774 // in order for literals to be coalesced they must be weak
775 switch ( fSection->flags() & SECTION_TYPE ) {
776 case S_CSTRING_LITERALS:
777 case S_4BYTE_LITERALS:
778 case S_8BYTE_LITERALS:
779 case S_NON_LAZY_SYMBOL_POINTERS:
780 return ObjectFile::Atom::kWeakDefinition;
781 default:
782 return ObjectFile::Atom::kRegularDefinition;
783 }
784}
785
786template <typename A>
787bool AnonymousAtom<A>::isZeroFill() const
788{
789 return ( (fSection->flags() & SECTION_TYPE) == S_ZEROFILL );
790}
791
792
793template <typename A>
794const char* AnonymousAtom<A>::getSectionName() const
795{
796 if ( fReallyNonLazyPointer )
797 return "__nl_symbol_ptr";
798 if ( strlen(fSection->sectname()) > 15 ) {
799 static char temp[18];
800 strncpy(temp, fSection->sectname(), 16);
801 temp[17] = '\0';
802 return temp;
803 }
804 return fSection->sectname();
805}
806
807template <typename A>
808uint8_t AnonymousAtom<A>::getAlignment() const
809{
810 if ( fReallyNonLazyPointer )
811 return (uint8_t)log2(sizeof(pint_t));
812 switch ( fSection->flags() & SECTION_TYPE ) {
813 case S_4BYTE_LITERALS:
814 return 2;
815 case S_8BYTE_LITERALS:
816 return 3;
817 case S_NON_LAZY_SYMBOL_POINTERS:
818 return (uint8_t)log2(sizeof(pint_t));
819 default:
820 return fSection->align();
821 }
822}
823
824template <typename A>
825bool AnonymousAtom<A>::requiresFollowOnAtom() const
826{
827 // requires follow-on if built with old compiler and not the last atom
828 if ( (fOwner.fHeader->flags() & MH_SUBSECTIONS_VIA_SYMBOLS) == 0) {
829 for (ReferenceVectorConstIterator it=fReferences.begin(); it != fReferences.end(); it++) {
830 Reference<A>* ref = *it;
831 if ( ref->getKind() == A::kFollowOn )
832 return true;
833 }
834 }
835 return false;
836}
837
838template <typename A>
839ObjectFile::Atom& AnonymousAtom<A>::getFollowOnAtom() const
840{
841 for (ReferenceVectorConstIterator it=fReferences.begin(); it != fReferences.end(); it++) {
842 Reference<A>* ref = *it;
843 if ( ref->getKind() == A::kFollowOn )
844 return ref->getTarget();
845 }
846 return *((ObjectFile::Atom*)NULL);
847}
848
849template <typename A>
850void AnonymousAtom<A>::copyRawContent(uint8_t buffer[]) const
851{
852 // copy base bytes
853 if ( isZeroFill() )
854 bzero(buffer, fSize);
855 else {
856 uint32_t fileOffset = fSection->offset() - fSection->addr() + fAddress;
857 memcpy(buffer, (char*)(fOwner.fHeader)+fileOffset, fSize);
858 }
859}
860
861
862
863
864template <typename A>
865class Reader : public ObjectFile::Reader
866{
867public:
868 static bool validFile(const uint8_t* fileContent);
869 static Reader<A>* make(const uint8_t* fileContent, const char* path, time_t modTime,
870 const ObjectFile::ReaderOptions& options)
871 { return new Reader<A>(fileContent, path, modTime, options); }
872 virtual ~Reader() {}
873
874 virtual const char* getPath() { return fPath; }
875 virtual time_t getModificationTime() { return fModTime; }
876 virtual ObjectFile::Reader::DebugInfoKind getDebugInfoKind() { return fDebugInfo; }
877 virtual std::vector<class ObjectFile::Atom*>& getAtoms() { return (std::vector<class ObjectFile::Atom*>&)(fAtoms); }
878 virtual std::vector<class ObjectFile::Atom*>* getJustInTimeAtomsFor(const char* name) { return NULL; }
879 virtual std::vector<Stab>* getStabs() { return &fStabs; }
880
881 bool getTranslationUnitSource(const char** dir, const char** name) const;
882
883private:
884 typedef typename A::P P;
885 typedef typename A::P::E E;
886 typedef typename A::P::uint_t pint_t;
887 //typedef typename std::vector<Atom<A>*> AtomVector;
888 //typedef typename AtomVector::iterator AtomVectorIterator; // seems to help C++ parser
889 typedef typename A::ReferenceKinds Kinds;
890 friend class AnonymousAtom<A>;
891 friend class TentativeAtom<A>;
892 friend class SymbolAtom<A>;
893 Reader(const uint8_t* fileContent, const char* path, time_t modTime, const ObjectFile::ReaderOptions& options);
894 bool addRelocReference(const macho_section<P>* sect, const macho_relocation_info<P>* reloc);
895 bool addRelocReference_powerpc(const macho_section<P>* sect, const macho_relocation_info<P>* reloc);
896 Kinds pointerDiffKindForLength_powerpc(uint8_t r_length);
897 bool read_comp_unit(const char ** name, const char ** comp_dir, uint64_t *stmt_list);
898 static bool isWeakImportSymbol(const macho_nlist<P>* sym);
899 static bool skip_form(const uint8_t ** offset, const uint8_t * end, uint64_t form, uint8_t addr_size, bool dwarf64);
900 static const char* assureFullPath(const char* path);
901 AtomAndOffset findAtomAndOffset(uint32_t addr);
902 AtomAndOffset findAtomAndOffset(uint32_t baseAddr, uint32_t realAddr);
903 Reference<A>* makeReference(Kinds kind, uint32_t atAddr, uint32_t toAddr);
904 Reference<A>* makeReference(Kinds kind, uint32_t atAddr, uint32_t fromAddr, uint32_t toAddr);
905 Reference<A>* makeReferenceWithToBase(Kinds kind, uint32_t atAddr, uint32_t toAddr, uint32_t toBaseAddr);
906 Reference<A>* makeReferenceWithToBase(Kinds kind, uint32_t atAddr, uint32_t fromAddr, uint32_t toAddr, uint32_t toBaseAddr);
907 Reference<A>* makeByNameReference(Kinds kind, uint32_t atAddr, const char* toName, uint32_t toOffset);
908 Reference<A>* makeReferenceToEH(const char* ehName, pint_t ehAtomAddress, const macho_section<P>* ehSect);
909 void validSectionType(uint8_t type);
910
911 BaseAtom* findAtomByName(const char*);
912
913 const char* fPath;
914 time_t fModTime;
915 const ObjectFile::ReaderOptions& fOptions;
916 const macho_header<P>* fHeader;
917 const char* fStrings;
918 const macho_nlist<P>* fSymbols;
919 uint32_t fSymbolCount;
920 const macho_segment_command<P>* fSegment;
921 const uint32_t* fIndirectTable;
922 std::vector<ObjectFile::Atom*> fAtoms;
923 std::map<uint32_t, BaseAtom*> fAddrToAtom;
924 std::vector<class AnonymousAtom<A>*> fLocalNonLazys;
925 ObjectFile::Reader::DebugInfoKind fDebugInfo;
926 const macho_section<P>* fDwarfDebugInfoSect;
927 const macho_section<P>* fDwarfDebugAbbrevSect;
928 const macho_section<P>* fDwarfDebugLineSect;
929 const char* fDwarfTranslationUnitDir;
930 const char* fDwarfTranslationUnitFile;
931 std::map<uint32_t,const char*> fDwarfIndexToFile;
932 std::vector<Stab> fStabs;
933};
934
935
936template <typename A>
937Reader<A>::Reader(const uint8_t* fileContent, const char* path, time_t modTime, const ObjectFile::ReaderOptions& options)
938 : fPath(strdup(path)), fModTime(modTime), fOptions(options), fHeader((const macho_header<P>*)fileContent),
939 fStrings(NULL), fSymbols(NULL), fSymbolCount(0), fSegment(NULL), fIndirectTable(NULL),
940 fDebugInfo(kDebugInfoNone), fDwarfDebugInfoSect(NULL), fDwarfDebugAbbrevSect(NULL),
941 fDwarfTranslationUnitDir(NULL), fDwarfTranslationUnitFile(NULL)
942{
943 // sanity check
944 if ( ! validFile(fileContent) )
945 throw "not a valid mach-o object file";
946
947 // cache intersting pointers
948 const macho_header<P>* header = (const macho_header<P>*)fileContent;
949 const uint32_t cmd_count = header->ncmds();
950 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
951 const macho_load_command<P>* cmd = cmds;
952 for (uint32_t i = 0; i < cmd_count; ++i) {
953 switch (cmd->cmd()) {
954 case LC_SYMTAB:
955 {
956 const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
957 fSymbolCount = symtab->nsyms();
958 fSymbols = (const macho_nlist<P>*)((char*)header + symtab->symoff());
959 fStrings = (char*)header + symtab->stroff();
960 }
961 break;
962 case LC_DYSYMTAB:
963 {
964 const macho_dysymtab_command<P>* dsymtab = (struct macho_dysymtab_command<P>*)cmd;
965 fIndirectTable = (uint32_t*)((char*)fHeader + dsymtab->indirectsymoff());
966 }
967 break;
968 case LC_UUID:
969 if (getDebugInfoKind() != kDebugInfoDwarf)
970 fDebugInfo = kDebugInfoStabsUUID;
971 break;
972
973 default:
974 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
975 fSegment = (macho_segment_command<P>*)cmd;
976 }
977 break;
978 }
979 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
980 }
981 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)fSegment + sizeof(macho_segment_command<P>));
982 const macho_section<P>* const sectionsEnd = &sectionsStart[fSegment->nsects()];
983
984 // inital guess for number of atoms
985 fAtoms.reserve(fSymbolCount);
986
987 // add all atoms that have entries in symbol table
988 const macho_section<P>* sections = (macho_section<P>*)((char*)fSegment + sizeof(macho_segment_command<P>));
989 for (uint32_t i=0; i < fSymbolCount; ++i) {
990 const macho_nlist<P>& sym = fSymbols[i];
991 if ( (sym.n_type() & N_STAB) == 0 ) {
992 uint8_t type = (sym.n_type() & N_TYPE);
993 if ( type == N_SECT ) {
994 const macho_section<P>* section = &sections[sym.n_sect()-1];
995 bool suppress = false;
996 // ignore atoms in debugger sections
997 if ( (section->flags() & S_ATTR_DEBUG) == 0 ) {
998 // ignore labels for atoms in other sections
999 switch ( section->flags() & SECTION_TYPE ) {
1000 case S_REGULAR:
1001 if ( (sym.n_desc() & N_WEAK_DEF) && strcmp(section->sectname(), "__picsymbolstub1__TEXT") == 0 )
1002 suppress = true; // ignore stubs in crt1.o built by old ld64 that was missing S_SYMBOL_STUBS
1003 case S_ZEROFILL:
1004 case S_COALESCED:
1005 case S_4BYTE_LITERALS:
1006 case S_8BYTE_LITERALS:
1007 case S_CSTRING_LITERALS:
1008 {
1009 BaseAtom* newAtom = new SymbolAtom<A>(*this, &sym, section);
1010 std::map<uint32_t, BaseAtom*>::iterator pos = fAddrToAtom.find(sym.n_value());
1011 if ( pos != fAddrToAtom.end() ) {
1012 // another label to an existing address
1013 // make this one be the real one and followed by the previous
1014 BaseAtom* existingAtom = pos->second;
1015 //fprintf(stderr, "new atom %s has same address as existing atom %s\n", newAtom->getDisplayName(), existingAtom->getDisplayName());
1016 new Reference<A>(A::kFollowOn, AtomAndOffset(newAtom), AtomAndOffset(existingAtom));
1017 newAtom->setSize(0);
1018 }
1019 else {
1020 fAddrToAtom[sym.n_value()] = newAtom;
1021 }
1022 if ( ! suppress )
1023 fAtoms.push_back(newAtom);
1024 }
1025 break;
1026 case S_SYMBOL_STUBS:
1027 case S_LAZY_SYMBOL_POINTERS:
1028 case S_NON_LAZY_SYMBOL_POINTERS:
1029 // ignore symboled stubs produces by old ld64
1030 break;
1031 default:
1032 fprintf(stderr, "ld64 warning: symbol %s found in unsupported section in %s\n",
1033 &fStrings[sym.n_strx()], this->getPath());
1034 }
1035 }
1036 }
1037 else if ( (type == N_UNDF) && (sym.n_value() != 0) ) {
1038 fAtoms.push_back(new TentativeAtom<A>(*this, &sym));
1039 }
1040 }
1041 }
1042
1043 // sort SymbolAtoms by address
1044 std::sort(fAtoms.begin(), fAtoms.end(), SymbolAtomSorter<A>(fAddrToAtom));
1045
1046 // add all fixed size anonymous atoms from special sections
1047 for (const macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
1048 uint32_t atomSize = 0;
1049 uint8_t type (sect->flags() & SECTION_TYPE);
1050 validSectionType(type);
1051 bool suppress = false;
1052 switch ( type ) {
1053 case S_SYMBOL_STUBS:
1054 suppress = true;
1055 atomSize = sect->reserved2();
1056 break;
1057 case S_LAZY_SYMBOL_POINTERS:
1058 suppress = true;
1059 atomSize = sizeof(pint_t);
1060 break;
1061 case S_NON_LAZY_SYMBOL_POINTERS:
1062 case S_LITERAL_POINTERS:
1063 case S_MOD_INIT_FUNC_POINTERS:
1064 case S_MOD_TERM_FUNC_POINTERS:
1065 atomSize = sizeof(pint_t);
1066 break;
1067 case S_INTERPOSING:
1068 atomSize = sizeof(pint_t)*2;
1069 break;
1070 case S_4BYTE_LITERALS:
1071 atomSize = 4;
1072 break;
1073 case S_8BYTE_LITERALS:
1074 atomSize = 8;
1075 break;
1076 }
1077 if ( atomSize != 0 ) {
1078 for(uint32_t sectOffset=0; sectOffset < sect->size(); sectOffset += atomSize) {
1079 uint32_t atomAddr = sect->addr() + sectOffset;
1080 // add if not already an atom at that address
1081 if ( fAddrToAtom.find(atomAddr) == fAddrToAtom.end() ) {
1082 AnonymousAtom<A>* newAtom = new AnonymousAtom<A>(*this, sect, atomAddr, atomSize);
1083 if ( !suppress )
1084 fAtoms.push_back(newAtom);
1085 fAddrToAtom[atomAddr] = newAtom->redirectTo();
1086 }
1087 }
1088 }
1089 }
1090
1091 // add all c-string anonymous atoms
1092 for (const macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
1093 if ( ((sect->flags() & SECTION_TYPE) == S_CSTRING_LITERALS) || strcmp(sect->sectname(), "__cstring") == 0 ) {
1094 uint32_t stringLen;
1095 uint32_t stringAddr;
1096 BaseAtom* firstEmptyString = NULL;
1097 for(uint32_t sectOffset=0; sectOffset < sect->size(); sectOffset += stringLen) {
1098 stringAddr = sect->addr() + sectOffset;
1099 stringLen = strlen((char*)(fHeader) + sect->offset() + sectOffset) + 1;
1100 // add if not already an atom at that address
1101 if ( fAddrToAtom.find(stringAddr) == fAddrToAtom.end() ) {
1102 BaseAtom* newAtom = new AnonymousAtom<A>(*this, sect, stringAddr, stringLen);
1103 if ( stringLen == 1 ) {
1104 // because of padding it may look like there are lots of empty strings
1105 // map them all to the first empty string
1106 if ( firstEmptyString == NULL ) {
1107 firstEmptyString = newAtom;
1108 fAtoms.push_back(firstEmptyString);
1109 }
1110 fAddrToAtom[stringAddr] = firstEmptyString;
1111 }
1112 else {
1113 fAtoms.push_back(newAtom);
1114 fAddrToAtom[stringAddr] = newAtom;
1115 }
1116 }
1117 }
1118 }
1119 }
1120
1121 // create atoms to cover any non-debug ranges not handled above
1122 for (const macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
1123 pint_t sectionStartAddr = sect->addr();
1124 pint_t sectionEndAddr = sect->addr() + sect->size();
1125 const bool setFollowOnAtom = ((fHeader->flags() & MH_SUBSECTIONS_VIA_SYMBOLS) == 0);
1126 if ( sect->size() != 0 ) {
1127 // ignore dwarf sections. If ld every supports processing dwarf, this logic will need to change
1128 if ( (sect->flags() & S_ATTR_DEBUG) != 0 ) {
1129 fDebugInfo = kDebugInfoDwarf;
1130 if ( strcmp(sect->sectname(), "__debug_info") == 0 )
1131 fDwarfDebugInfoSect = sect;
1132 else if ( strcmp(sect->sectname(), "__debug_abbrev") == 0 )
1133 fDwarfDebugAbbrevSect = sect;
1134 else if ( strcmp(sect->sectname(), "__debug_line") == 0 )
1135 fDwarfDebugLineSect = sect;
1136 }
1137 else {
1138 if ( strcmp(sect->segname(), "__DWARFA") == 0 ) {
1139 throw "object file contains old DWARF debug info - rebuild with newer compiler";
1140 }
1141 uint8_t type (sect->flags() & SECTION_TYPE);
1142 switch ( type ) {
1143 case S_REGULAR:
1144 case S_ZEROFILL:
1145 case S_COALESCED:
1146 // detect if compiler has generated anonymous non-lazy pointers at end of __data section
1147 // HACK BEGIN - until compiler stops generated anonymous non-lazy pointers
1148 if ( (sect->size() >= sizeof(pint_t))
1149 && ((sect->size() % sizeof(pint_t)) == 0)
1150 && (sect->align() >= log2(sizeof(pint_t)))
1151 && (strcmp(sect->sectname(), "__data") == 0)
1152 && (strcmp(sect->segname(), "__DATA") == 0) ) {
1153 // find every pointer sized external reloc from end of section and split off into own atom
1154 uint32_t possiblePointerAddress = sect->size() - sizeof(pint_t);
1155 const uint8_t* sectionContent = ((uint8_t*)(fHeader))+sect->offset();
1156 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)((char*)(fHeader) + sect->reloff());
1157 const macho_relocation_info<P>* relocsEnd = &relocs[sect->nreloc()];
1158 for (const macho_relocation_info<P>* r = relocs; r < relocsEnd; ++r) {
1159 if ( ((r->r_address() & R_SCATTERED) == 0)
1160 && r->r_extern()
1161 && (r->r_address() == possiblePointerAddress)
1162 && (fAddrToAtom.find(possiblePointerAddress+sect->addr()) == fAddrToAtom.end())
1163 && (P::getP(*((pint_t*)(sectionContent+possiblePointerAddress))) == 0) ) {
1164 // create an anonymous atom to cover this non-lazy pointer
1165 AnonymousAtom<A>* newAtom = new AnonymousAtom<A>(*this, sect, sect->addr()+possiblePointerAddress, sizeof(pint_t));
1166 const macho_nlist<P>* targetSymbol = &fSymbols[r->r_symbolnum()];
1167 char* name;
1168 asprintf(&name, "%s$non_lazy_ptr", &fStrings[targetSymbol->n_strx()]);
1169 newAtom->fSynthesizedName = name;
1170 newAtom->fReallyNonLazyPointer = true;
1171 fAtoms.push_back(newAtom);
1172 fAddrToAtom[sect->addr()+possiblePointerAddress] = newAtom;
1173 possiblePointerAddress -= sizeof(pint_t);
1174 sectionEndAddr -= sizeof(pint_t);
1175 }
1176 else {
1177 break;
1178 }
1179 }
1180 }
1181 // HACK END - until compiler stops generated anonymous non-lazy pointers
1182 uint32_t previousAtomAddr = 0;
1183 BaseAtom* previousAtom = NULL;
1184 if ( fAddrToAtom.find(sectionStartAddr) == fAddrToAtom.end() ) {
1185 // if there is not an atom already at the start of this section, add an anonymous one
1186 BaseAtom* newAtom = new AnonymousAtom<A>(*this, sect, sect->addr(), 0);
1187 fAtoms.push_back(newAtom);
1188 fAddrToAtom[sect->addr()] = newAtom;
1189 previousAtomAddr = sectionStartAddr;
1190 previousAtom = newAtom;
1191 }
1192 // calculate size of all atoms in this section and add follow-on references
1193 for (std::map<uint32_t, BaseAtom*>::iterator it=fAddrToAtom.begin(); it != fAddrToAtom.end(); it++) {
1194 // note: this algorithm depends on the map iterator returning entries in address order
1195 if ( (it->first >= sectionStartAddr) && (it->first < sectionEndAddr) ) {
1196 //fprintf(stderr, " atom %s in section\n", it->second->getDisplayName());
1197 if ( previousAtom != NULL ) {
1198 previousAtom->setSize(it->first - previousAtomAddr);
1199 // FIX FIX: this setting of followOn atoms does not work when there are multiple
1200 // labels for the same atom
1201 if ( setFollowOnAtom && (it->second != previousAtom) )
1202 makeReference(A::kFollowOn, previousAtomAddr, it->first);
1203 }
1204 previousAtomAddr = it->first;
1205 previousAtom = it->second;
1206 }
1207 }
1208 if ( previousAtom != NULL ) {
1209 // set last atom in section
1210 previousAtom->setSize(sectionEndAddr - previousAtomAddr);
1211 }
1212 break;
1213 }
1214 }
1215 }
1216 }
1217
1218 // add relocation based references
1219 for (const macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
1220 // ignore dwarf sections. If ld every supports processing dwarf, this logic will need to change
1221 if ( (sect->flags() & S_ATTR_DEBUG) == 0 ) {
1222 switch ( sect->flags() & SECTION_TYPE ) {
1223 case S_SYMBOL_STUBS:
1224 case S_LAZY_SYMBOL_POINTERS:
1225 // we ignore compiler generated stubs, so ignore those relocs too
1226 break;
1227 default:
1228 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)((char*)(fHeader) + sect->reloff());
1229 const uint32_t relocCount = sect->nreloc();
1230 //fprintf(stderr, "relocCount = %d in section %s\n", relocCount, sect->sectname());
1231 for (uint32_t r = 0; r < relocCount; ++r) {
1232 try {
1233 if ( addRelocReference(sect, &relocs[r]) )
1234 ++r; // skip next
1235 }
1236 catch (const char* msg) {
1237 throwf("in section %s,%s reloc %u: %s\n", sect->segname(), sect->sectname(), r, msg);
1238 }
1239 }
1240 }
1241 }
1242 }
1243
1244 // add direct references to local non-lazy-pointers, can do this now that all atoms are constructed
1245 for (typename std::vector<AnonymousAtom<A>*>::iterator it=fLocalNonLazys.begin(); it != fLocalNonLazys.end(); it++) {
1246 AnonymousAtom<A>* localNonLazy = *it;
1247 uint32_t fileOffset = localNonLazy->fSection->offset() - localNonLazy->fSection->addr() + localNonLazy->fAddress;
1248 pint_t nonLazyPtrValue = P::getP(*((pint_t*)((char*)(fHeader)+fileOffset)));
1249 makeReference(A::kPointer, localNonLazy->fAddress, nonLazyPtrValue);
1250 }
1251
1252 // add implicit direct reference from each C++ function to its eh info
1253 for (const macho_section<P>* sect=sectionsStart; sect < sectionsEnd; ++sect) {
1254 if ( ((sect->flags() & SECTION_TYPE) == S_COALESCED) && (strcmp(sect->sectname(), "__eh_frame") == 0) ) {
1255 for (std::map<uint32_t, BaseAtom*>::iterator it=fAddrToAtom.begin(); it != fAddrToAtom.end(); it++) {
1256 // note: this algorithm depens on the map iterator returning entries in address order
1257 if ( (it->first >= sect->addr()) && (it->first < sect->addr()+sect->size()) ) {
1258 uint32_t ehAtomAddress = it->first;
1259 BaseAtom* ehAtom = it->second;
1260 const char* ehName = ehAtom->getName();
1261 if ( (ehName != NULL) && (strcmp(&ehName[strlen(ehName)-3], ".eh") == 0) )
1262 makeReferenceToEH(ehName, ehAtomAddress, sect);
1263 }
1264 }
1265 }
1266 }
1267
1268
1269 //for (std::map<uint32_t, BaseAtom*>::iterator it=fAddrToAtom.begin(); it != fAddrToAtom.end(); it++) {
1270 // fprintf(stderr, "[0x%0X -> 0x%0llX) : %s\n", it->first, it->first+it->second->getSize(), it->second->getDisplayName());
1271 //}
1272
1273 // add translation unit info from dwarf
1274 uint64_t stmtList;
1275 if ( (fDebugInfo == kDebugInfoDwarf) && (fOptions.fDebugInfoStripping != ObjectFile::ReaderOptions::kDebugInfoNone) ) {
1276 if ( !read_comp_unit(&fDwarfTranslationUnitFile, &fDwarfTranslationUnitDir, &stmtList) ) {
1277 // if can't parse dwarf, warn and give up
1278 fDwarfTranslationUnitFile = NULL;
1279 fDwarfTranslationUnitDir = NULL;
1280 fprintf(stderr, "ld64: warning can't parse dwarf compilation unit info in %s\n", this->getPath());
1281 fDebugInfo = kDebugInfoNone;
1282 }
1283 }
1284
1285 // add line number info to atoms from dwarf
1286 if ( (fDebugInfo == kDebugInfoDwarf) && (fOptions.fDebugInfoStripping != ObjectFile::ReaderOptions::kDebugInfoNone) ) {
1287 // file with just data will have no __debug_line info
1288 if ( (fDwarfDebugLineSect != NULL) && (fDwarfDebugLineSect->size() != 0) && (fAddrToAtom.size() != 0) ) {
1289 // validate stmt_list
1290 if ( (stmtList != (uint64_t)-1) && (stmtList < fDwarfDebugLineSect->size()) ) {
1291 const uint8_t* debug_line = (uint8_t*)(fHeader) + fDwarfDebugLineSect->offset();
1292 if ( debug_line != NULL ) {
1293 struct line_reader_data* lines = line_open(&debug_line[stmtList],
1294 fDwarfDebugLineSect->size() - stmtList, E::little_endian);
1295 struct line_info result;
1296 ObjectFile::Atom* curAtom = NULL;
1297 uint32_t curAtomOffset = 0;
1298 uint32_t curAtomAddress = 0;
1299 uint32_t curAtomSize = 0;
1300 while ( line_next (lines, &result, line_stop_line) ) {
1301 // for performance, see if in next pc is in current atom
1302 if ( (curAtom != NULL) && (result.pc <= curAtomAddress+curAtomSize) && (curAtomAddress <= result.pc) ) {
1303 curAtomOffset = result.pc - curAtomAddress;
1304 }
1305 else {
1306 // do slow look up of atom by address
1307 AtomAndOffset ao = this->findAtomAndOffset(result.pc);
1308 curAtom = ao.atom;
1309 if ( curAtom == NULL )
1310 break; // file has line info but no functions
1311 curAtomOffset = ao.offset;
1312 curAtomAddress = result.pc;
1313 curAtomSize = curAtom->getSize();
1314 }
1315 const char* filename;
1316 std::map<uint32_t,const char*>::iterator pos = fDwarfIndexToFile.find(result.file);
1317 if ( pos == fDwarfIndexToFile.end() ) {
1318 filename = line_file(lines, result.file);
1319 fDwarfIndexToFile[result.file] = filename;
1320 }
1321 else {
1322 filename = pos->second;
1323 }
1324 ObjectFile::LineInfo info;
1325 info.atomOffset = curAtomOffset;
1326 info.fileName = filename;
1327 info.lineNumber = result.line;
1328 //fprintf(stderr, "addr=0x%08llX, line=%lld, file=%s\n", result.pc, result.line, filename);
1329 ((BaseAtom*)curAtom)->addLineInfo(info);
1330 }
1331 line_free(lines);
1332 }
1333 else {
1334 fprintf(stderr, "ld64: warning could not parse dwarf line number info in %s\n", this->getPath());
1335 }
1336 }
1337 }
1338 }
1339
1340 // if no dwarf, try processing stabs debugging info
1341 if ( (fDebugInfo == kDebugInfoNone) && (fOptions.fDebugInfoStripping != ObjectFile::ReaderOptions::kDebugInfoNone) ) {
1342 // scan symbol table for stabs entries
1343 fStabs.reserve(fSymbolCount); // reduce re-allocations
1344 BaseAtom* currentAtom = NULL;
1345 pint_t currentAtomAddress = 0;
1346 enum { start, inBeginEnd, inFun } state = start;
1347 for (uint32_t symbolIndex = 0; symbolIndex < fSymbolCount; ++symbolIndex ) {
1348 const macho_nlist<P>* sym = &fSymbols[symbolIndex];
1349 uint8_t type = sym->n_type();
1350 const char* symString = (sym->n_strx() != 0) ? &fStrings[sym->n_strx()] : NULL;
1351 if ( (type & N_STAB) != 0 ) {
1352 fDebugInfo = kDebugInfoStabs;
1353 Stab stab;
1354 stab.atom = NULL;
1355 stab.type = type;
1356 stab.other = sym->n_sect();
1357 stab.desc = sym->n_desc();
1358 stab.value = sym->n_value();
1359 stab.string = NULL;
1360 switch (state) {
1361 case start:
1362 switch (type) {
1363 case N_BNSYM:
1364 // beginning of function block
1365 state = inBeginEnd;
1366 // fall into case to lookup atom by addresss
1367 case N_LCSYM:
1368 case N_STSYM:
1369 currentAtomAddress = sym->n_value();
1370 currentAtom = (BaseAtom*)this->findAtomAndOffset(currentAtomAddress).atom;
1371 if ( currentAtom != NULL ) {
1372 stab.atom = currentAtom;
1373 stab.string = symString;
1374 }
1375 else {
1376 fprintf(stderr, "can't find atom for stabs BNSYM at %08llX in %s\n",
1377 (uint64_t)sym->n_value(), path);
1378 }
1379 break;
1380 case N_SO:
1381 case N_OSO:
1382 case N_OPT:
1383 case N_LSYM:
1384 // not associated with an atom, just copy
1385 stab.string = symString;
1386 break;
1387 case N_GSYM:
1388 // n_value field is NOT atom address ;-(
1389 // need to find atom by name match
1390 const char* colon = strchr(symString, ':');
1391 if ( colon != NULL ) {
1392 // build underscore leading name
1393 int nameLen = colon - symString;
1394 char symName[nameLen+2];
1395 strlcpy(&symName[1], symString, nameLen+1);
1396 symName[0] = '_';
1397 symName[nameLen+1] = '\0';
1398 currentAtom = findAtomByName(symName);
1399 if ( currentAtom != NULL ) {
1400 stab.atom = currentAtom;
1401 stab.string = symString;
1402 }
1403 }
1404 if ( stab.atom == NULL ) {
1405 fprintf(stderr, "can't find atom for N_GSYM stabs %s in %s\n", symString, path);
1406 }
1407 break;
1408 case N_FUN:
1409 // old style stabs without BNSYM
1410 state = inFun;
1411 currentAtomAddress = sym->n_value();
1412 currentAtom = (BaseAtom*)this->findAtomAndOffset(currentAtomAddress).atom;
1413 if ( currentAtom != NULL ) {
1414 stab.atom = currentAtom;
1415 stab.string = symString;
1416 }
1417 else {
1418 fprintf(stderr, "can't find atom for stabs FUN at %08llX in %s\n",
1419 (uint64_t)currentAtomAddress, path);
1420 }
1421 break;
1422 case N_SOL:
1423 case N_SLINE:
1424 stab.string = symString;
1425 // old stabs
1426 break;
1427 case N_BINCL:
1428 case N_EINCL:
1429 case N_EXCL:
1430 stab.string = symString;
1431 // -gfull built .o file
1432 break;
1433 default:
1434 fprintf(stderr, "unknown stabs type 0x%X in %s\n", type, path);
1435 }
1436 break;
1437 case inBeginEnd:
1438 stab.atom = currentAtom;
1439 switch (type) {
1440 case N_ENSYM:
1441 state = start;
1442 currentAtom = NULL;
1443 break;
1444 case N_LCSYM:
1445 case N_STSYM:
1446 BaseAtom* nestedAtom = (BaseAtom*)this->findAtomAndOffset(sym->n_value()).atom;
1447 if ( nestedAtom != NULL ) {
1448 stab.atom = nestedAtom;
1449 stab.string = symString;
1450 }
1451 else {
1452 fprintf(stderr, "can't find atom for stabs 0x%X at %08llX in %s\n",
1453 type, (uint64_t)sym->n_value(), path);
1454 }
1455 break;
1456 case N_LBRAC:
1457 case N_RBRAC:
1458 case N_SLINE:
1459 // adjust value to be offset in atom
1460 stab.value -= currentAtomAddress;
1461 default:
1462 stab.string = symString;
1463 break;
1464 }
1465 break;
1466 case inFun:
1467 switch (type) {
1468 case N_FUN:
1469 if ( sym->n_sect() != 0 ) {
1470 // found another start stab, must be really old stabs...
1471 currentAtomAddress = sym->n_value();
1472 currentAtom = (BaseAtom*)this->findAtomAndOffset(currentAtomAddress).atom;
1473 if ( currentAtom != NULL ) {
1474 stab.atom = currentAtom;
1475 stab.string = symString;
1476 }
1477 else {
1478 fprintf(stderr, "can't find atom for stabs FUN at %08llX in %s\n",
1479 (uint64_t)currentAtomAddress, path);
1480 }
1481 }
1482 else {
1483 // found ending stab, switch back to start state
1484 stab.string = symString;
1485 stab.atom = currentAtom;
1486 state = start;
1487 currentAtom = NULL;
1488 }
1489 break;
1490 case N_LBRAC:
1491 case N_RBRAC:
1492 case N_SLINE:
1493 // adjust value to be offset in atom
1494 stab.value -= currentAtomAddress;
1495 stab.atom = currentAtom;
1496 break;
1497 case N_SO:
1498 stab.string = symString;
1499 state = start;
1500 break;
1501 default:
1502 stab.atom = currentAtom;
1503 stab.string = symString;
1504 break;
1505 }
1506 break;
1507 }
1508 // add to list of stabs for this .o file
1509 fStabs.push_back(stab);
1510 }
1511 }
1512 }
1513
1514
1515#if 0
1516 // special case precompiled header .o file (which has no content) to have one empty atom
1517 if ( fAtoms.size() == 0 ) {
1518 int pathLen = strlen(path);
1519 if ( (pathLen > 6) && (strcmp(&path[pathLen-6], ".gch.o")==0) ) {
1520 ObjectFile::Atom* phony = new AnonymousAtom<A>(*this, (uint32_t)0);
1521 //phony->fSynthesizedName = ".gch.o";
1522 fAtoms.push_back(phony);
1523 }
1524 }
1525#endif
1526}
1527
1528
1529template <typename A>
1530void Reader<A>::validSectionType(uint8_t type)
1531{
1532}
1533
1534template <typename A>
1535bool Reader<A>::getTranslationUnitSource(const char** dir, const char** name) const
1536{
1537 if ( fDebugInfo == kDebugInfoDwarf ) {
1538 *dir = fDwarfTranslationUnitDir;
1539 *name = fDwarfTranslationUnitFile;
1540 return true;
1541 }
1542 return false;
1543}
1544
1545template <typename A>
1546BaseAtom* Reader<A>::findAtomByName(const char* name)
1547{
1548 // first search the more important atoms
1549 for (std::map<uint32_t, BaseAtom*>::iterator it=fAddrToAtom.begin(); it != fAddrToAtom.end(); it++) {
1550 const char* atomName = it->second->getName();
1551 if ( (atomName != NULL) && (strcmp(atomName, name) == 0) ) {
1552 return it->second;
1553 }
1554 }
1555 // try all atoms, because this might have been a tentative definition
1556 for (std::vector<ObjectFile::Atom*>::iterator it=fAtoms.begin(); it != fAtoms.end(); it++) {
1557 BaseAtom* atom = (BaseAtom*)(*it);
1558 const char* atomName = atom->getName();
1559 if ( (atomName != NULL) && (strcmp(atomName, name) == 0) ) {
1560 return atom;
1561 }
1562 }
1563 return NULL;
1564}
1565
1566template <typename A>
1567Reference<A>* Reader<A>::makeReference(Kinds kind, uint32_t atAddr, uint32_t toAddr)
1568{
1569 return new Reference<A>(kind, findAtomAndOffset(atAddr), findAtomAndOffset(toAddr));
1570}
1571
1572template <typename A>
1573Reference<A>* Reader<A>::makeReference(Kinds kind, uint32_t atAddr, uint32_t fromAddr, uint32_t toAddr)
1574{
1575 return new Reference<A>(kind, findAtomAndOffset(atAddr), findAtomAndOffset(fromAddr), findAtomAndOffset(toAddr));
1576}
1577
1578template <typename A>
1579Reference<A>* Reader<A>::makeReferenceWithToBase(Kinds kind, uint32_t atAddr, uint32_t toAddr, uint32_t toBaseAddr)
1580{
1581 return new Reference<A>(kind, findAtomAndOffset(atAddr), findAtomAndOffset(toBaseAddr, toAddr));
1582}
1583
1584template <typename A>
1585Reference<A>* Reader<A>::makeReferenceWithToBase(Kinds kind, uint32_t atAddr, uint32_t fromAddr, uint32_t toAddr, uint32_t toBaseAddr)
1586{
1587 return new Reference<A>(kind, findAtomAndOffset(atAddr), findAtomAndOffset(fromAddr), findAtomAndOffset(toBaseAddr, toAddr));
1588}
1589
1590template <typename A>
1591Reference<A>* Reader<A>::makeByNameReference(Kinds kind, uint32_t atAddr, const char* toName, uint32_t toOffset)
1592{
1593 return new Reference<A>(kind, findAtomAndOffset(atAddr), toName, toOffset);
1594}
1595
1596template <typename A>
1597Reference<A>* Reader<A>::makeReferenceToEH(const char* ehName, pint_t ehAtomAddress, const macho_section<P>* ehSect)
1598{
1599 // add a direct reference from function atom to its eh frame atom
1600 const uint8_t* ehContent = (const uint8_t*)(fHeader) + ehAtomAddress - ehSect->addr() + ehSect->offset();
1601 int32_t deltaMinus8 = P::getP(*(pint_t*)(&ehContent[8])); // offset 8 in eh info is delta to function
1602 uint32_t funcAddr = ehAtomAddress + deltaMinus8 + 8;
1603 return makeReference(A::kNoFixUp, funcAddr, ehAtomAddress) ;
1604}
1605
1606
1607
1608template <typename A>
1609AtomAndOffset Reader<A>::findAtomAndOffset(uint32_t addr)
1610{
1611 // STL has no built-in for "find largest key that is same or less than"
1612 std::map<uint32_t, BaseAtom*>::iterator it = fAddrToAtom.upper_bound(addr);
1613 --it; // upper_bound gets us next key, so we back up one
1614 AtomAndOffset result;
1615 result.atom = it->second;
1616 result.offset = addr - it->first;
1617 //fprintf(stderr, "findAtomAndOffset(0x%0X) ==> %s (0x%0X -> 0x%0llX)\n",
1618 // addr, result.atom->getDisplayName(), it->first, it->first+result.atom->getSize());
1619 return result;
1620}
1621
1622// "scattered" relocations enable you to offset into an atom past the end of it
1623// baseAddr is the address of the target atom,
1624// realAddr is the points into it
1625template <typename A>
1626AtomAndOffset Reader<A>::findAtomAndOffset(uint32_t baseAddr, uint32_t realAddr)
1627{
1628 std::map<uint32_t, BaseAtom*>::iterator it = fAddrToAtom.find(baseAddr);
1629 if ( it != fAddrToAtom.end() ) {
1630 AtomAndOffset result;
1631 result.atom = it->second;
1632 result.offset = realAddr - it->first;
1633 //fprintf(stderr, "findAtomAndOffset(0x%08X, 0x%08X) => %s + 0x%08X\n", baseAddr, realAddr, result.atom->getDisplayName(), result.offset);
1634 return result;
1635 }
1636 // getting here means we have a scattered relocation to an address without a label
1637 // we should never get here...
1638 // one case we do get here is because sometimes the compiler generates non-lazy pointers in the __data section
1639 return findAtomAndOffset(realAddr);
1640}
1641
1642
1643/* Skip over a LEB128 value (signed or unsigned). */
1644static void
1645skip_leb128 (const uint8_t ** offset, const uint8_t * end)
1646{
1647 while (*offset != end && **offset >= 0x80)
1648 (*offset)++;
1649 if (*offset != end)
1650 (*offset)++;
1651}
1652
1653/* Read a ULEB128 into a 64-bit word. Return (uint64_t)-1 on overflow
1654 or error. On overflow, skip past the rest of the uleb128. */
1655static uint64_t
1656read_uleb128 (const uint8_t ** offset, const uint8_t * end)
1657{
1658 uint64_t result = 0;
1659 int bit = 0;
1660
1661 do {
1662 uint64_t b;
1663
1664 if (*offset == end)
1665 return (uint64_t) -1;
1666
1667 b = **offset & 0x7f;
1668
1669 if (bit >= 64 || b << bit >> bit != b)
1670 result = (uint64_t) -1;
1671 else
1672 result |= b << bit, bit += 7;
1673 } while (*(*offset)++ >= 0x80);
1674 return result;
1675}
1676
1677
1678/* Skip over a DWARF attribute of form FORM. */
1679template <typename A>
1680bool Reader<A>::skip_form(const uint8_t ** offset, const uint8_t * end, uint64_t form,
1681 uint8_t addr_size, bool dwarf64)
1682{
1683 int64_t sz=0;
1684
1685 switch (form)
1686 {
1687 case DW_FORM_addr:
1688 sz = addr_size;
1689 break;
1690
1691 case DW_FORM_block2:
1692 if (end - *offset < 2)
1693 return false;
1694 sz = 2 + A::P::E::get16(*(uint16_t*)offset);
1695 break;
1696
1697 case DW_FORM_block4:
1698 if (end - *offset < 4)
1699 return false;
1700 sz = 2 + A::P::E::get32(*(uint32_t*)offset);
1701 break;
1702
1703 case DW_FORM_data2:
1704 case DW_FORM_ref2:
1705 sz = 2;
1706 break;
1707
1708 case DW_FORM_data4:
1709 case DW_FORM_ref4:
1710 sz = 4;
1711 break;
1712
1713 case DW_FORM_data8:
1714 case DW_FORM_ref8:
1715 sz = 8;
1716 break;
1717
1718 case DW_FORM_string:
1719 while (*offset != end && **offset)
1720 ++*offset;
1721 case DW_FORM_data1:
1722 case DW_FORM_flag:
1723 case DW_FORM_ref1:
1724 sz = 1;
1725 break;
1726
1727 case DW_FORM_block:
1728 sz = read_uleb128 (offset, end);
1729 break;
1730
1731 case DW_FORM_block1:
1732 if (*offset == end)
1733 return false;
1734 sz = 1 + **offset;
1735 break;
1736
1737 case DW_FORM_sdata:
1738 case DW_FORM_udata:
1739 case DW_FORM_ref_udata:
1740 skip_leb128 (offset, end);
1741 return true;
1742
1743 case DW_FORM_strp:
1744 case DW_FORM_ref_addr:
1745 sz = dwarf64 ? 8 : 4;
1746 break;
1747
1748 default:
1749 return false;
1750 }
1751 if (end - *offset < sz)
1752 return false;
1753 *offset += sz;
1754 return true;
1755}
1756
1757// Look at the compilation unit DIE and determine
1758// its NAME, compilation directory (in COMP_DIR) and its
1759// line number information offset (in STMT_LIST). NAME and COMP_DIR
1760// may be NULL (especially COMP_DIR) if they are not in the .o file;
1761// STMT_LIST will be (uint64_t) -1.
1762//
1763// At present this assumes that there's only one compilation unit DIE.
1764//
1765template <typename A>
1766bool Reader<A>::read_comp_unit(const char ** name, const char ** comp_dir,
1767 uint64_t *stmt_list)
1768{
1769 const uint8_t * debug_info;
1770 const uint8_t * debug_abbrev;
1771 const uint8_t * di;
1772 const uint8_t * da;
1773 const uint8_t * end;
1774 const uint8_t * enda;
1775 uint64_t sz;
1776 uint16_t vers;
1777 uint64_t abbrev_base;
1778 uint64_t abbrev;
1779 uint8_t address_size;
1780 bool dwarf64;
1781
1782 *name = NULL;
1783 *comp_dir = NULL;
1784 *stmt_list = (uint64_t) -1;
1785
1786 if ( (fDwarfDebugInfoSect == NULL) || (fDwarfDebugAbbrevSect == NULL) )
1787 return false;
1788
1789 debug_info = (uint8_t*)(fHeader) + fDwarfDebugInfoSect->offset();
1790 debug_abbrev = (uint8_t*)(fHeader) + fDwarfDebugAbbrevSect->offset();
1791 di = debug_info;
1792
1793 if (fDwarfDebugInfoSect->size() < 12)
1794 /* Too small to be a real debug_info section. */
1795 return false;
1796 sz = A::P::E::get32(*(uint32_t*)di);
1797 di += 4;
1798 dwarf64 = sz == 0xffffffff;
1799 if (dwarf64)
1800 sz = A::P::E::get64(*(uint64_t*)di), di += 8;
1801 else if (sz > 0xffffff00)
1802 /* Unknown dwarf format. */
1803 return false;
1804
1805 /* Verify claimed size. */
1806 if (sz + (di - debug_info) > fDwarfDebugInfoSect->size() || sz <= (dwarf64 ? 23 : 11))
1807 return false;
1808
1809 vers = A::P::E::get16(*(uint16_t*)di);
1810 if (vers < 2 || vers > 3)
1811 /* DWARF version wrong for this code.
1812 Chances are we could continue anyway, but we don't know for sure. */
1813 return false;
1814 di += 2;
1815
1816 /* Find the debug_abbrev section. */
1817 abbrev_base = dwarf64 ? A::P::E::get64(*(uint64_t*)di) : A::P::E::get32(*(uint32_t*)di);
1818 di += dwarf64 ? 8 : 4;
1819
1820 if (abbrev_base > fDwarfDebugAbbrevSect->size())
1821 return false;
1822 da = debug_abbrev + abbrev_base;
1823 enda = debug_abbrev + fDwarfDebugAbbrevSect->size();
1824
1825 address_size = *di++;
1826
1827 /* Find the abbrev number we're looking for. */
1828 end = di + sz;
1829 abbrev = read_uleb128 (&di, end);
1830 if (abbrev == (uint64_t) -1)
1831 return false;
1832
1833 /* Skip through the debug_abbrev section looking for that abbrev. */
1834 for (;;)
1835 {
1836 uint64_t this_abbrev = read_uleb128 (&da, enda);
1837 uint64_t attr;
1838
1839 if (this_abbrev == abbrev)
1840 /* This is almost always taken. */
1841 break;
1842 skip_leb128 (&da, enda); /* Skip the tag. */
1843 if (da == enda)
1844 return false;
1845 da++; /* Skip the DW_CHILDREN_* value. */
1846
1847 do {
1848 attr = read_uleb128 (&da, enda);
1849 skip_leb128 (&da, enda);
1850 } while (attr != 0 && attr != (uint64_t) -1);
1851 if (attr != 0)
1852 return false;
1853 }
1854
1855 /* Check that the abbrev is one for a DW_TAG_compile_unit. */
1856 if (read_uleb128 (&da, enda) != DW_TAG_compile_unit)
1857 return false;
1858 if (da == enda)
1859 return false;
1860 da++; /* Skip the DW_CHILDREN_* value. */
1861
1862 /* Now, go through the DIE looking for DW_AT_name,
1863 DW_AT_comp_dir, and DW_AT_stmt_list. */
1864 for (;;)
1865 {
1866 uint64_t attr = read_uleb128 (&da, enda);
1867 uint64_t form = read_uleb128 (&da, enda);
1868
1869 if (attr == (uint64_t) -1)
1870 return false;
1871 else if (attr == 0)
1872 return true;
1873
1874 if (form == DW_FORM_indirect)
1875 form = read_uleb128 (&di, end);
1876
1877 if (attr == DW_AT_name && form == DW_FORM_string)
1878 *name = (const char *) di;
1879 else if (attr == DW_AT_comp_dir && form == DW_FORM_string)
1880 *comp_dir = (const char *) di;
1881 /* Really we should support DW_FORM_strp here, too, but
1882 there's usually no reason for the producer to use that form
1883 for the DW_AT_name and DW_AT_comp_dir attributes. */
1884 else if (attr == DW_AT_stmt_list && form == DW_FORM_data4)
1885 *stmt_list = A::P::E::get32(*(uint32_t*)di);
1886 else if (attr == DW_AT_stmt_list && form == DW_FORM_data8)
1887 *stmt_list = A::P::E::get64(*(uint64_t*)di);
1888 if (! skip_form (&di, end, form, address_size, dwarf64))
1889 return false;
1890 }
1891}
1892
1893template <typename A>
1894const char* Reader<A>::assureFullPath(const char* path)
1895{
1896 if ( path[0] == '/' )
1897 return path;
1898 char cwdbuff[MAXPATHLEN];
1899 if ( getcwd(cwdbuff, MAXPATHLEN) != NULL ) {
1900 char* result;
1901 asprintf(&result, "%s/%s", cwdbuff, path);
1902 if ( result != NULL )
1903 return result;
1904 }
1905 return path;
1906}
1907
1908
1909//
1910//
1911// To implement architecture xxx, you must write template specializations for the following six methods:
1912// Reader<xxx>::validFile()
1913// Reader<xxx>::addRelocReference()
1914// Reference<xxx>::getDescription()
1915//
1916//
1917
1918
1919template <>
1920bool Reader<ppc>::validFile(const uint8_t* fileContent)
1921{
1922 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1923 if ( header->magic() != MH_MAGIC )
1924 return false;
1925 if ( header->cputype() != CPU_TYPE_POWERPC )
1926 return false;
1927 if ( header->filetype() != MH_OBJECT )
1928 return false;
1929 return true;
1930}
1931
1932template <>
1933bool Reader<ppc64>::validFile(const uint8_t* fileContent)
1934{
1935 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1936 if ( header->magic() != MH_MAGIC_64 )
1937 return false;
1938 if ( header->cputype() != CPU_TYPE_POWERPC64 )
1939 return false;
1940 if ( header->filetype() != MH_OBJECT )
1941 return false;
1942 return true;
1943}
1944
1945template <>
1946bool Reader<x86>::validFile(const uint8_t* fileContent)
1947{
1948 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1949 if ( header->magic() != MH_MAGIC )
1950 return false;
1951 if ( header->cputype() != CPU_TYPE_I386 )
1952 return false;
1953 if ( header->filetype() != MH_OBJECT )
1954 return false;
1955 return true;
1956}
1957
1958
1959
1960template <typename A>
1961bool Reader<A>::isWeakImportSymbol(const macho_nlist<P>* sym)
1962{
1963 return ( ((sym->n_type() & N_TYPE) == N_UNDF) && ((sym->n_desc() & N_WEAK_REF) != 0) );
1964}
1965
1966template <>
1967bool Reader<ppc64>::addRelocReference(const macho_section<ppc64::P>* sect, const macho_relocation_info<ppc64::P>* reloc)
1968{
1969 return addRelocReference_powerpc(sect, reloc);
1970}
1971
1972template <>
1973bool Reader<ppc>::addRelocReference(const macho_section<ppc::P>* sect, const macho_relocation_info<ppc::P>* reloc)
1974{
1975 return addRelocReference_powerpc(sect, reloc);
1976}
1977
1978
1979//
1980// ppc and ppc64 both use the same relocations, so process them in one common routine
1981//
1982template <typename A>
1983bool Reader<A>::addRelocReference_powerpc(const macho_section<typename A::P>* sect,
1984 const macho_relocation_info<typename A::P>* reloc)
1985{
1986 uint32_t srcAddr;
1987 uint32_t dstAddr;
1988 uint32_t* fixUpPtr;
1989 int32_t displacement = 0;
1990 uint32_t instruction = 0;
1991 uint32_t offsetInTarget;
1992 int16_t lowBits;
1993 bool result = false;
1994 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
1995 const macho_relocation_info<P>* nextReloc = &reloc[1];
1996 const char* targetName = NULL;
1997 bool weakImport = false;
1998 fixUpPtr = (uint32_t*)((char*)(fHeader) + sect->offset() + reloc->r_address());
1999 if ( reloc->r_type() != PPC_RELOC_PAIR )
2000 instruction = BigEndian::get32(*fixUpPtr);
2001 srcAddr = sect->addr() + reloc->r_address();
2002 if ( reloc->r_extern() ) {
2003 const macho_nlist<P>* targetSymbol = &fSymbols[reloc->r_symbolnum()];
2004 targetName = &fStrings[targetSymbol->n_strx()];
2005 weakImport = this->isWeakImportSymbol(targetSymbol);
2006 }
2007 switch ( reloc->r_type() ) {
2008 case PPC_RELOC_BR24:
2009 {
2010 if ( (instruction & 0x4C000000) == 0x48000000 ) {
2011 displacement = (instruction & 0x03FFFFFC);
2012 if ( (displacement & 0x02000000) != 0 )
2013 displacement |= 0xFC000000;
2014 }
2015 else {
2016 printf("bad instruction for BR24 reloc");
2017 }
2018 if ( reloc->r_extern() ) {
2019 offsetInTarget = srcAddr + displacement;
2020 if ( weakImport )
2021 makeByNameReference(A::kBranch24WeakImport, srcAddr, targetName, offsetInTarget);
2022 else
2023 makeByNameReference(A::kBranch24, srcAddr, targetName, offsetInTarget);
2024 }
2025 else {
2026 dstAddr = srcAddr + displacement;
2027 // if this is a branch to a stub, we need to see if the stub is for a weak imported symbol
2028 ObjectFile::Atom* atom = findAtomAndOffset(dstAddr).atom;
2029 if ( (atom->getSymbolTableInclusion() == ObjectFile::Atom::kSymbolTableNotIn)
2030 && ((AnonymousAtom<A>*)atom)->isWeakImportStub() )
2031 makeReference(A::kBranch24WeakImport, srcAddr, dstAddr);
2032 else
2033 makeReference(A::kBranch24, srcAddr, dstAddr);
2034 }
2035 }
2036 break;
2037 case PPC_RELOC_BR14:
2038 {
2039 displacement = (instruction & 0x0000FFFC);
2040 if ( (displacement & 0x00008000) != 0 )
2041 displacement |= 0xFFFF0000;
2042 if ( reloc->r_extern() ) {
2043 offsetInTarget = srcAddr + displacement;
2044 makeByNameReference(A::kBranch14, srcAddr, targetName, offsetInTarget);
2045 }
2046 else {
2047 dstAddr = srcAddr + displacement;
2048 makeReference(A::kBranch14, srcAddr, dstAddr);
2049 }
2050 }
2051 break;
2052 case PPC_RELOC_PAIR:
2053 // skip, processed by a previous look ahead
2054 break;
2055 case PPC_RELOC_LO16:
2056 {
2057 if ( nextReloc->r_type() != PPC_RELOC_PAIR ) {
2058 printf("PPC_RELOC_LO16 missing following pair\n");
2059 break;
2060 }
2061 result = true;
2062 lowBits = (instruction & 0xFFFF);
2063 if ( reloc->r_extern() ) {
2064 offsetInTarget = (nextReloc->r_address() << 16) | ((uint32_t)lowBits & 0x0000FFFF);
2065 makeByNameReference(A::kAbsLow16, srcAddr, targetName, offsetInTarget);
2066 }
2067 else {
2068 dstAddr = (nextReloc->r_address() << 16) + ((uint32_t)lowBits & 0x0000FFFF);
2069 makeReference(A::kAbsLow16, srcAddr, dstAddr);
2070 }
2071 }
2072 break;
2073 case PPC_RELOC_LO14:
2074 {
2075 if ( nextReloc->r_type() != PPC_RELOC_PAIR ) {
2076 printf("PPC_RELOC_LO14 missing following pair\n");
2077 break;
2078 }
2079 result = true;
2080 lowBits = (instruction & 0xFFFC);
2081 if ( reloc->r_extern() ) {
2082 offsetInTarget = (nextReloc->r_address() << 16) | ((uint32_t)lowBits & 0x0000FFFF);
2083 makeByNameReference(A::kAbsLow14, srcAddr, targetName, offsetInTarget);
2084 }
2085 else {
2086 dstAddr = (nextReloc->r_address() << 16) | ((uint32_t)lowBits & 0x0000FFFF);
2087 Reference<A>* ref = makeReference(A::kAbsLow14, srcAddr, dstAddr);
2088 BaseAtom* target = ((BaseAtom*)&(ref->getTarget()));
2089 if ( target != NULL )
2090 target->alignAtLeast(2);
2091 }
2092 }
2093 break;
2094 case PPC_RELOC_HI16:
2095 {
2096 if ( nextReloc->r_type() != PPC_RELOC_PAIR ) {
2097 printf("PPC_RELOC_HI16 missing following pair\n");
2098 break;
2099 }
2100 result = true;
2101 if ( reloc->r_extern() ) {
2102 offsetInTarget = ((instruction & 0x0000FFFF) << 16) | (nextReloc->r_address() & 0x0000FFFF);
2103 makeByNameReference(A::kAbsHigh16, srcAddr, targetName, offsetInTarget);
2104 }
2105 else {
2106 dstAddr = ((instruction & 0x0000FFFF) << 16) | (nextReloc->r_address() & 0x0000FFFF);
2107 makeReference(A::kAbsHigh16, srcAddr, dstAddr);
2108 }
2109 }
2110 break;
2111 case PPC_RELOC_HA16:
2112 {
2113 if ( nextReloc->r_type() != PPC_RELOC_PAIR ) {
2114 printf("PPC_RELOC_HA16 missing following pair\n");
2115 break;
2116 }
2117 result = true;
2118 lowBits = (nextReloc->r_address() & 0x0000FFFF);
2119 if ( reloc->r_extern() ) {
2120 offsetInTarget = ((instruction & 0x0000FFFF) << 16) + (int32_t)lowBits;
2121 makeByNameReference(A::kAbsHigh16AddLow, srcAddr, targetName, offsetInTarget);
2122 }
2123 else {
2124 dstAddr = ((instruction & 0x0000FFFF) << 16) + (int32_t)lowBits;
2125 makeReference(A::kAbsHigh16AddLow, srcAddr, dstAddr);
2126 }
2127 }
2128 break;
2129 case PPC_RELOC_VANILLA:
2130 {
2131 pint_t pointerValue = P::getP(*((pint_t*)fixUpPtr));
2132 if ( reloc->r_extern() ) {
2133 if ( weakImport )
2134 makeByNameReference(A::kPointerWeakImport, srcAddr, targetName, pointerValue);
2135 else
2136 makeByNameReference(A::kPointer, srcAddr, targetName, pointerValue);
2137 }
2138 else {
2139 makeReference(A::kPointer, srcAddr, pointerValue);
2140 }
2141 }
2142 break;
2143 case PPC_RELOC_JBSR:
2144 // this is from -mlong-branch codegen. We ignore the jump island
2145 if ( nextReloc->r_type() != PPC_RELOC_PAIR ) {
2146 printf("PPC_RELOC_JBSR missing following pair\n");
2147 break;
2148 }
2149 result = true;
2150 makeReference(A::kBranch24, srcAddr, nextReloc->r_address());
2151 break;
2152 default:
2153 printf("unknown relocation type %d\n", reloc->r_type());
2154 }
2155 }
2156 else {
2157 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
2158 srcAddr = sect->addr() + sreloc->r_address();
2159 dstAddr = sreloc->r_value();
2160 uint32_t betterDstAddr;
2161 fixUpPtr = (uint32_t*)((char*)(fHeader) + sect->offset() + sreloc->r_address());
2162 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
2163 const macho_relocation_info<P>* nextReloc = &reloc[1];
2164 // file format allows pair to be scattered or not
2165 bool nextRelocIsPair = false;
2166 uint32_t nextRelocAddress = 0;
2167 uint32_t nextRelocValue = 0;
2168 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
2169 if ( nextReloc->r_type() == PPC_RELOC_PAIR ) {
2170 nextRelocIsPair = true;
2171 nextRelocAddress = nextReloc->r_address();
2172 result = true;
2173 }
2174 }
2175 else {
2176 if ( nextSReloc->r_type() == PPC_RELOC_PAIR ) {
2177 nextRelocIsPair = true;
2178 nextRelocAddress = nextSReloc->r_address();
2179 nextRelocValue = nextSReloc->r_value();
2180 result = true;
2181 }
2182 }
2183 switch (sreloc->r_type()) {
2184 case PPC_RELOC_VANILLA:
2185 {
2186 betterDstAddr = P::getP(*(pint_t*)fixUpPtr);
2187 //fprintf(stderr, "scattered pointer reloc: srcAddr=0x%08X, dstAddr=0x%08X, pointer=0x%08X\n", srcAddr, dstAddr, betterDstAddr);
2188 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
2189 makeReferenceWithToBase(A::kPointer, srcAddr, betterDstAddr, dstAddr);
2190 }
2191 break;
2192 case PPC_RELOC_BR14:
2193 {
2194 instruction = BigEndian::get32(*fixUpPtr);
2195 displacement = (instruction & 0x0000FFFC);
2196 if ( (displacement & 0x00008000) != 0 )
2197 displacement |= 0xFFFF0000;
2198 betterDstAddr = srcAddr+displacement;
2199 //fprintf(stderr, "betterDstAddr=0x%08X, srcAddr=0x%08X, displacement=0x%08X\n", betterDstAddr, srcAddr, displacement);
2200 makeReferenceWithToBase(A::kBranch14, srcAddr, betterDstAddr, dstAddr);
2201 }
2202 break;
2203 case PPC_RELOC_BR24:
2204 {
2205 instruction = BigEndian::get32(*fixUpPtr);
2206 if ( (instruction & 0x4C000000) == 0x48000000 ) {
2207 displacement = (instruction & 0x03FFFFFC);
2208 if ( (displacement & 0x02000000) != 0 )
2209 displacement |= 0xFC000000;
2210 betterDstAddr = srcAddr+displacement;
2211 makeReferenceWithToBase(A::kBranch24, srcAddr, betterDstAddr, dstAddr);
2212 }
2213 }
2214 break;
2215 case PPC_RELOC_LO16_SECTDIFF:
2216 {
2217 if ( ! nextRelocIsPair ) {
2218 printf("PPC_RELOC_LO16_SECTDIFF missing following PAIR\n");
2219 break;
2220 }
2221 instruction = BigEndian::get32(*fixUpPtr);
2222 lowBits = (instruction & 0xFFFF);
2223 displacement = (nextRelocAddress << 16) | ((uint32_t)lowBits & 0x0000FFFF);
2224 makeReferenceWithToBase(A::kPICBaseLow16, srcAddr, nextRelocValue, nextRelocValue + displacement, dstAddr);
2225 }
2226 break;
2227 case PPC_RELOC_LO14_SECTDIFF:
2228 {
2229 if ( ! nextRelocIsPair ) {
2230 printf("PPC_RELOC_LO14_SECTDIFF missing following PAIR\n");
2231 break;
2232 }
2233 instruction = BigEndian::get32(*fixUpPtr);
2234 lowBits = (instruction & 0xFFFC);
2235 displacement = (nextRelocAddress << 16) | ((uint32_t)lowBits & 0x0000FFFF);
2236 Reference<A>* ref = makeReferenceWithToBase(A::kPICBaseLow14, srcAddr, nextRelocValue, nextRelocValue + displacement, dstAddr);
2237 BaseAtom* target = ((BaseAtom*)&(ref->getTarget()));
2238 if ( target != NULL ) // can be NULL if target is turned into by-name reference
2239 target->alignAtLeast(2);
2240 }
2241 break;
2242 case PPC_RELOC_HA16_SECTDIFF:
2243 {
2244 if ( ! nextRelocIsPair ) {
2245 printf("PPC_RELOC_HA16_SECTDIFF missing following PAIR\n");
2246 break;
2247 }
2248 instruction = BigEndian::get32(*fixUpPtr);
2249 lowBits = (nextRelocAddress & 0x0000FFFF);
2250 displacement = ((instruction & 0x0000FFFF) << 16) + (int32_t)lowBits;
2251 makeReferenceWithToBase(A::kPICBaseHigh16, srcAddr, nextRelocValue, nextRelocValue + displacement, dstAddr);
2252 }
2253 break;
2254 case PPC_RELOC_LO14:
2255 {
2256 if ( ! nextRelocIsPair ) {
2257 printf("PPC_RELOC_LO14 missing following PAIR\n");
2258 break;
2259 }
2260 instruction = BigEndian::get32(*fixUpPtr);
2261 lowBits = (instruction & 0xFFFC);
2262 betterDstAddr = (nextRelocAddress << 16) + ((uint32_t)lowBits & 0x0000FFFF);
2263 makeReferenceWithToBase(A::kAbsLow14, srcAddr, betterDstAddr, dstAddr);
2264 }
2265 break;
2266 case PPC_RELOC_LO16:
2267 {
2268 if ( ! nextRelocIsPair ) {
2269 printf("PPC_RELOC_LO16 missing following PAIR\n");
2270 break;
2271 }
2272 instruction = BigEndian::get32(*fixUpPtr);
2273 lowBits = (instruction & 0xFFFF);
2274 betterDstAddr = (nextRelocAddress << 16) + ((uint32_t)lowBits & 0x0000FFFF);
2275 makeReferenceWithToBase(A::kAbsLow16, srcAddr, betterDstAddr, dstAddr);
2276 }
2277 break;
2278 case PPC_RELOC_HA16:
2279 {
2280 if ( ! nextRelocIsPair ) {
2281 printf("PPC_RELOC_HA16 missing following PAIR\n");
2282 break;
2283 }
2284 instruction = BigEndian::get32(*fixUpPtr);
2285 lowBits = (nextRelocAddress & 0xFFFF);
2286 betterDstAddr = ((instruction & 0xFFFF) << 16) + (int32_t)lowBits;
2287 makeReferenceWithToBase(A::kAbsHigh16AddLow, srcAddr, betterDstAddr, dstAddr);
2288 }
2289 break;
2290 case PPC_RELOC_SECTDIFF:
2291 case PPC_RELOC_LOCAL_SECTDIFF:
2292 {
2293 if ( ! nextRelocIsPair ) {
2294 printf("PPC_RELOC_SECTDIFF missing following pair\n");
2295 break;
2296 }
2297 makeReference(pointerDiffKindForLength_powerpc(sreloc->r_length()), srcAddr, nextRelocValue, dstAddr);
2298 }
2299 break;
2300 case PPC_RELOC_PAIR:
2301 break;
2302 case PPC_RELOC_HI16_SECTDIFF:
2303 printf("unexpected scattered relocation type PPC_RELOC_HI16_SECTDIFF\n");
2304 break;
2305 default:
2306 printf("unknown scattered relocation type %d\n", sreloc->r_type());
2307 }
2308 }
2309 return result;
2310}
2311
2312template <>
2313ppc::ReferenceKinds Reader<ppc>::pointerDiffKindForLength_powerpc(uint8_t r_length)
2314{
2315 if ( r_length == 2 )
2316 return ppc::kPointerDiff32;
2317 else
2318 throw "bad diff relocations r_length for ppc architecture";
2319 }
2320
2321template <>
2322ppc64::ReferenceKinds Reader<ppc64>::pointerDiffKindForLength_powerpc(uint8_t r_length)
2323{
2324 if ( r_length == 2 )
2325 return ppc64::kPointerDiff32;
2326 else if ( r_length == 3 )
2327 return ppc64::kPointerDiff64;
2328 else
2329 throw "bad diff relocations r_length for ppc64 architecture";
2330 }
2331
2332template <>
2333bool Reader<x86>::addRelocReference(const macho_section<x86::P>* sect, const macho_relocation_info<x86::P>* reloc)
2334{
2335 uint32_t srcAddr;
2336 uint32_t dstAddr;
2337 uint32_t* fixUpPtr;
2338 bool result = false;
2339 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
2340 srcAddr = sect->addr() + reloc->r_address();
2341 fixUpPtr = (uint32_t*)((char*)(fHeader) + sect->offset() + reloc->r_address());
2342 switch ( reloc->r_type() ) {
2343 case GENERIC_RELOC_VANILLA:
2344 {
2345 if ( reloc->r_length() != 2 )
2346 throw "bad vanilla relocation length";
2347 x86::ReferenceKinds kind;
2348 uint32_t pointerValue = E::get32(*fixUpPtr);
2349 if ( reloc->r_pcrel() ) {
2350 kind = x86::kPCRel32;
2351 pointerValue += srcAddr + sizeof(uint32_t);
2352 }
2353 else {
2354 kind = x86::kPointer;
2355 }
2356 if ( reloc->r_extern() ) {
2357 const macho_nlist<P>* targetSymbol = &fSymbols[reloc->r_symbolnum()];
2358 if ( this->isWeakImportSymbol(targetSymbol) )
2359 kind = x86::kPointerWeakImport;
2360 const char* targetName = &fStrings[targetSymbol->n_strx()];
2361 makeByNameReference(kind, srcAddr, targetName, pointerValue);
2362 }
2363 else {
2364 // if this is a branch to a stub, we need to see if the stub is for a weak imported symbol
2365 ObjectFile::Atom* atom = findAtomAndOffset(pointerValue).atom;
2366 if ( reloc->r_pcrel() && (atom->getSymbolTableInclusion() == ObjectFile::Atom::kSymbolTableNotIn)
2367 && ((AnonymousAtom<x86>*)atom)->isWeakImportStub() )
2368 makeReference(x86::kPCRel32WeakImport, srcAddr, pointerValue);
2369 else
2370 makeReference(kind, srcAddr, pointerValue);
2371 }
2372 }
2373 break;
2374 default:
2375 printf("unknown relocation type %d\n", reloc->r_type());
2376 }
2377 }
2378 else {
2379 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
2380 srcAddr = sect->addr() + sreloc->r_address();
2381 dstAddr = sreloc->r_value();
2382 fixUpPtr = (uint32_t*)((char*)(fHeader) + sect->offset() + sreloc->r_address());
2383 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
2384 const macho_relocation_info<P>* nextReloc = &reloc[1];
2385 pint_t betterDstAddr;
2386 // file format allows pair to be scattered or not
2387 bool nextRelocIsPair = false;
2388 uint32_t nextRelocAddress = 0;
2389 uint32_t nextRelocValue = 0;
2390 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
2391 if ( nextReloc->r_type() == PPC_RELOC_PAIR ) {
2392 nextRelocIsPair = true;
2393 nextRelocAddress = nextReloc->r_address();
2394 result = true;
2395 }
2396 }
2397 else {
2398 if ( nextSReloc->r_type() == PPC_RELOC_PAIR ) {
2399 nextRelocIsPair = true;
2400 nextRelocAddress = nextSReloc->r_address();
2401 nextRelocValue = nextSReloc->r_value();
2402 }
2403 }
2404 switch (sreloc->r_type()) {
2405 case GENERIC_RELOC_VANILLA:
2406 betterDstAddr = LittleEndian::get32(*fixUpPtr);
2407 //fprintf(stderr, "pointer reloc: srcAddr=0x%08X, dstAddr=0x%08X, pointer=0x%08lX\n", srcAddr, dstAddr, betterDstAddr);
2408 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
2409 if ( sreloc->r_pcrel() ) {
2410 betterDstAddr += srcAddr + 4;
2411 makeReferenceWithToBase(x86::kPCRel32, srcAddr, betterDstAddr, dstAddr);
2412 }
2413 else {
2414 makeReferenceWithToBase(x86::kPointer, srcAddr, betterDstAddr, dstAddr);
2415 }
2416 break;
2417 case GENERIC_RELOC_SECTDIFF:
2418 case GENERIC_RELOC_LOCAL_SECTDIFF:
2419 {
2420 if ( !nextRelocIsPair ) {
2421 printf("GENERIC_RELOC_SECTDIFF missing following pair\n");
2422 break;
2423 }
2424 if ( sreloc->r_length() != 2 )
2425 throw "bad length for GENERIC_RELOC_SECTDIFF";
2426 betterDstAddr = LittleEndian::get32(*fixUpPtr);
2427 makeReferenceWithToBase(x86::kPointerDiff, srcAddr, nextRelocValue, betterDstAddr+nextRelocValue, dstAddr);
2428 }
2429 break;
2430 case GENERIC_RELOC_PAIR:
2431 // do nothing, already used via a look ahead
2432 break;
2433 default:
2434 printf("unknown scattered relocation type %d\n", sreloc->r_type());
2435 }
2436 }
2437 return result;
2438}
2439
2440
2441
2442template <>
2443const char* Reference<x86>::getDescription() const
2444{
2445 static char temp[1024];
2446 switch( fKind ) {
2447 case x86::kNoFixUp:
2448 sprintf(temp, "reference to ");
2449 break;
2450 case x86::kFollowOn:
2451 sprintf(temp, "followed by ");
2452 break;
2453 case x86::kPointerWeakImport:
2454 sprintf(temp, "offset 0x%04X, weak import pointer to ", fFixUpOffsetInSrc);
2455 break;
2456 case x86::kPointer:
2457 sprintf(temp, "offset 0x%04X, pointer to ", fFixUpOffsetInSrc);
2458 break;
2459 case x86::kPointerDiff:
2460 {
2461 // by-name references have quoted names
2462 const char* targetQuotes = (&(this->getTarget()) == NULL) ? "\"" : "";
2463 const char* fromQuotes = (&(this->getFromTarget()) == NULL) ? "\"" : "";
2464 sprintf(temp, "offset 0x%04X, 32-bit pointer difference: (&%s%s%s + 0x%08X) - (&%s%s%s + 0x%08X)",
2465 fFixUpOffsetInSrc, targetQuotes, this->getTargetName(), targetQuotes, fToTarget.offset,
2466 fromQuotes, this->getFromTargetName(), fromQuotes, fFromTarget.offset );
2467 return temp;
2468 }
2469 break;
2470 case x86::kPCRel32WeakImport:
2471 sprintf(temp, "offset 0x%04X, rel32 reference to weak imported ", fFixUpOffsetInSrc);
2472 break;
2473 case x86::kPCRel32:
2474 sprintf(temp, "offset 0x%04X, rel32 reference to ", fFixUpOffsetInSrc);
2475 break;
2476 }
2477 // always quote by-name references
2478 if ( fToTargetName != NULL ) {
2479 strcat(temp, "\"");
2480 strcat(temp, fToTargetName);
2481 strcat(temp, "\"");
2482 }
2483 else if ( fToTarget.atom != NULL ) {
2484 strcat(temp, fToTarget.atom->getDisplayName());
2485 }
2486 else {
2487 strcat(temp, "NULL target");
2488 }
2489 if ( fToTarget.offset != 0 )
2490 sprintf(&temp[strlen(temp)], " plus 0x%08X", fToTarget.offset);
2491
2492 return temp;
2493}
2494
2495
2496template <>
2497const char* Reference<ppc>::getDescription() const
2498{
2499 static char temp[1024];
2500 switch( fKind ) {
2501 case ppc::kNoFixUp:
2502 sprintf(temp, "reference to ");
2503 break;
2504 case ppc::kFollowOn:
2505 sprintf(temp, "followed by ");
2506 break;
2507 case ppc::kPointerWeakImport:
2508 sprintf(temp, "offset 0x%04X, weak import pointer to ", fFixUpOffsetInSrc);
2509 break;
2510 case ppc::kPointer:
2511 sprintf(temp, "offset 0x%04X, pointer to ", fFixUpOffsetInSrc);
2512 break;
2513 case ppc::kPointerDiff32:
2514 {
2515 // by-name references have quoted names
2516 const char* targetQuotes = (&(this->getTarget()) == NULL) ? "\"" : "";
2517 const char* fromQuotes = (&(this->getFromTarget()) == NULL) ? "\"" : "";
2518 sprintf(temp, "offset 0x%04X, 32-bit pointer difference: (&%s%s%s + %d) - (&%s%s%s + %d)",
2519 fFixUpOffsetInSrc, targetQuotes, this->getTargetName(), targetQuotes, fToTarget.offset,
2520 fromQuotes, this->getFromTargetName(), fromQuotes, fFromTarget.offset );
2521 return temp;
2522 }
2523 case ppc::kPointerDiff64:
2524 throw "unsupported refrence kind";
2525 break;
2526 case ppc::kBranch24WeakImport:
2527 sprintf(temp, "offset 0x%04X, pc-rel branch fixup to weak imported ", fFixUpOffsetInSrc);
2528 break;
2529 case ppc::kBranch24:
2530 case ppc::kBranch14:
2531 sprintf(temp, "offset 0x%04X, pc-rel branch fixup to ", fFixUpOffsetInSrc);
2532 break;
2533 case ppc::kPICBaseLow16:
2534 sprintf(temp, "offset 0x%04X, low 16 fixup from pic-base offset 0x%04X to ", fFixUpOffsetInSrc, fFromTarget.offset);
2535 break;
2536 case ppc::kPICBaseLow14:
2537 sprintf(temp, "offset 0x%04X, low 14 fixup from pic-base offset 0x%04X to ", fFixUpOffsetInSrc, fFromTarget.offset);
2538 break;
2539 case ppc::kPICBaseHigh16:
2540 sprintf(temp, "offset 0x%04X, high 16 fixup from pic-base offset 0x%04X to ", fFixUpOffsetInSrc, fFromTarget.offset);
2541 break;
2542 case ppc::kAbsLow16:
2543 sprintf(temp, "offset 0x%04X, low 16 fixup to absolute address of ", fFixUpOffsetInSrc);
2544 break;
2545 case ppc::kAbsLow14:
2546 sprintf(temp, "offset 0x%04X, low 14 fixup to absolute address of ", fFixUpOffsetInSrc);
2547 break;
2548 case ppc::kAbsHigh16:
2549 sprintf(temp, "offset 0x%04X, high 16 fixup to absolute address of ", fFixUpOffsetInSrc);
2550 break;
2551 case ppc::kAbsHigh16AddLow:
2552 sprintf(temp, "offset 0x%04X, high 16 fixup to absolute address of ", fFixUpOffsetInSrc);
2553 break;
2554 }
2555 // always quote by-name references
2556 if ( fToTargetName != NULL ) {
2557 strcat(temp, "\"");
2558 strcat(temp, fToTargetName);
2559 strcat(temp, "\"");
2560 }
2561 else if ( fToTarget.atom != NULL ) {
2562 strcat(temp, fToTarget.atom->getDisplayName());
2563 }
2564 else {
2565 strcat(temp, "NULL target");
2566 }
2567 if ( fToTarget.offset != 0 )
2568 sprintf(&temp[strlen(temp)], " plus 0x%08X", fToTarget.offset);
2569
2570 return temp;
2571}
2572
2573template <>
2574const char* Reference<ppc64>::getDescription() const
2575{
2576 static char temp[1024];
2577 switch( fKind ) {
2578 case ppc64::kNoFixUp:
2579 sprintf(temp, "reference to ");
2580 break;
2581 case ppc64::kFollowOn:
2582 sprintf(temp, "followed by ");
2583 break;
2584 case ppc64::kPointerWeakImport:
2585 sprintf(temp, "offset 0x%04llX, weak import pointer to ", fFixUpOffsetInSrc);
2586 break;
2587 case ppc64::kPointer:
2588 sprintf(temp, "offset 0x%04llX, pointer to ", fFixUpOffsetInSrc);
2589 break;
2590 case ppc64::kPointerDiff64:
2591 {
2592 // by-name references have quoted names
2593 const char* targetQuotes = (&(this->getTarget()) == NULL) ? "\"" : "";
2594 const char* fromQuotes = (&(this->getFromTarget()) == NULL) ? "\"" : "";
2595 sprintf(temp, "offset 0x%04llX, 64-bit pointer difference: (&%s%s%s + %u) - (&%s%s%s + %u)",
2596 fFixUpOffsetInSrc, targetQuotes, this->getTargetName(), targetQuotes, fToTarget.offset,
2597 fromQuotes, this->getFromTargetName(), fromQuotes, fFromTarget.offset );
2598 return temp;
2599 }
2600 case ppc64::kPointerDiff32:
2601 {
2602 // by-name references have quoted names
2603 const char* targetQuotes = (&(this->getTarget()) == NULL) ? "\"" : "";
2604 const char* fromQuotes = (&(this->getFromTarget()) == NULL) ? "\"" : "";
2605 sprintf(temp, "offset 0x%04llX, 32-bit pointer difference: (&%s%s%s + %u) - (&%s%s%s + %u)",
2606 fFixUpOffsetInSrc, targetQuotes, this->getTargetName(), targetQuotes, fToTarget.offset,
2607 fromQuotes, this->getFromTargetName(), fromQuotes, fFromTarget.offset );
2608 return temp;
2609 }
2610 case ppc64::kBranch24WeakImport:
2611 sprintf(temp, "offset 0x%04llX, pc-rel branch fixup to weak imported ", fFixUpOffsetInSrc);
2612 break;
2613 case ppc64::kBranch24:
2614 case ppc64::kBranch14:
2615 sprintf(temp, "offset 0x%04llX, pc-rel branch fixup to ", fFixUpOffsetInSrc);
2616 break;
2617 case ppc64::kPICBaseLow16:
2618 sprintf(temp, "offset 0x%04llX, low 16 fixup from pic-base offset 0x%04X to ", fFixUpOffsetInSrc, fFromTarget.offset);
2619 break;
2620 case ppc64::kPICBaseLow14:
2621 sprintf(temp, "offset 0x%04llX, low 14 fixup from pic-base offset 0x%04X to ", fFixUpOffsetInSrc, fFromTarget.offset);
2622 break;
2623 case ppc64::kPICBaseHigh16:
2624 sprintf(temp, "offset 0x%04llX, high 16 fixup from pic-base offset 0x%04X to ", fFixUpOffsetInSrc, fFromTarget.offset);
2625 break;
2626 case ppc64::kAbsLow16:
2627 sprintf(temp, "offset 0x%04llX, low 16 fixup to absolute address of ", fFixUpOffsetInSrc);
2628 break;
2629 case ppc64::kAbsLow14:
2630 sprintf(temp, "offset 0x%04llX, low 14 fixup to absolute address of ", fFixUpOffsetInSrc);
2631 break;
2632 case ppc64::kAbsHigh16:
2633 sprintf(temp, "offset 0x%04llX, high 16 fixup to absolute address of ", fFixUpOffsetInSrc);
2634 break;
2635 case ppc64::kAbsHigh16AddLow:
2636 sprintf(temp, "offset 0x%04llX, high 16 fixup to absolute address of ", fFixUpOffsetInSrc);
2637 break;
2638 }
2639 // always quote by-name references
2640 if ( fToTargetName != NULL ) {
2641 strcat(temp, "\"");
2642 strcat(temp, fToTargetName);
2643 strcat(temp, "\"");
2644 }
2645 else if ( fToTarget.atom != NULL ) {
2646 strcat(temp, fToTarget.atom->getDisplayName());
2647 }
2648 else {
2649 strcat(temp, "NULL target");
2650 }
2651 if ( fToTarget.offset != 0 )
2652 sprintf(&temp[strlen(temp)], " plus 0x%08X", fToTarget.offset);
2653
2654 return temp;
2655}
2656
2657
2658
2659
2660}; // namespace relocatable
2661}; // namespace mach_o
2662
2663#endif // __OBJECT_FILE_MACH_O__