]> git.saurik.com Git - apple/ld64.git/blob - src/ld/parsers/macho_relocatable_file.cpp
49d333e97d05315d482c01557830506b0ae9ce0d
[apple/ld64.git] / src / ld / parsers / macho_relocatable_file.cpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <math.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <sys/param.h>
32 #include <sys/stat.h>
33 #include <sys/mman.h>
34
35 #include "MachOFileAbstraction.hpp"
36
37 #include "libunwind/DwarfInstructions.hpp"
38 #include "libunwind/AddressSpace.hpp"
39 #include "libunwind/Registers.hpp"
40
41 #include <vector>
42 #include <set>
43 #include <map>
44 #include <algorithm>
45
46 #include "dwarf2.h"
47 #include "debugline.h"
48
49 #include "Architectures.hpp"
50 #include "ld.hpp"
51 #include "macho_relocatable_file.h"
52
53
54
55 extern void throwf(const char* format, ...) __attribute__ ((noreturn,format(printf, 1, 2)));
56 extern void warning(const char* format, ...) __attribute__((format(printf, 1, 2)));
57
58 namespace mach_o {
59 namespace relocatable {
60
61
62 // forward reference
63 template <typename A> class Parser;
64 template <typename A> class Atom;
65 template <typename A> class Section;
66 template <typename A> class CFISection;
67 template <typename A> class CUSection;
68
69 template <typename A>
70 class File : public ld::relocatable::File
71 {
72 public:
73 File(const char* p, time_t mTime, const uint8_t* content, ld::File::Ordinal ord) :
74 ld::relocatable::File(p,mTime,ord), _fileContent(content),
75 _sectionsArray(NULL), _atomsArray(NULL),
76 _sectionsArrayCount(0), _atomsArrayCount(0),
77 _debugInfoKind(ld::relocatable::File::kDebugInfoNone),
78 _dwarfTranslationUnitPath(NULL),
79 _dwarfDebugInfoSect(NULL), _dwarfDebugAbbrevSect(NULL),
80 _dwarfDebugLineSect(NULL), _dwarfDebugStringSect(NULL),
81 _objConstraint(ld::File::objcConstraintNone),
82 _swiftVersion(0),
83 _cpuSubType(0),
84 _canScatterAtoms(false) {}
85 virtual ~File();
86
87 // overrides of ld::File
88 virtual bool forEachAtom(ld::File::AtomHandler&) const;
89 virtual bool justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const
90 { return false; }
91
92 // overrides of ld::relocatable::File
93 virtual ObjcConstraint objCConstraint() const { return _objConstraint; }
94 virtual uint32_t cpuSubType() const { return _cpuSubType; }
95 virtual DebugInfoKind debugInfo() const { return _debugInfoKind; }
96 virtual const std::vector<ld::relocatable::File::Stab>* stabs() const { return &_stabs; }
97 virtual bool canScatterAtoms() const { return _canScatterAtoms; }
98 virtual const char* translationUnitSource() const;
99 virtual LinkerOptionsList* linkerOptions() const { return &_linkerOptions; }
100 virtual uint8_t swiftVersion() const { return _swiftVersion; }
101
102 const uint8_t* fileContent() { return _fileContent; }
103 private:
104 friend class Atom<A>;
105 friend class Section<A>;
106 friend class Parser<A>;
107 friend class CFISection<A>::OAS;
108
109 typedef typename A::P P;
110
111 const uint8_t* _fileContent;
112 Section<A>** _sectionsArray;
113 uint8_t* _atomsArray;
114 uint8_t* _aliasAtomsArray;
115 uint32_t _sectionsArrayCount;
116 uint32_t _atomsArrayCount;
117 uint32_t _aliasAtomsArrayCount;
118 std::vector<ld::Fixup> _fixups;
119 std::vector<ld::Atom::UnwindInfo> _unwindInfos;
120 std::vector<ld::Atom::LineInfo> _lineInfos;
121 std::vector<ld::relocatable::File::Stab>_stabs;
122 ld::relocatable::File::DebugInfoKind _debugInfoKind;
123 const char* _dwarfTranslationUnitPath;
124 const macho_section<P>* _dwarfDebugInfoSect;
125 const macho_section<P>* _dwarfDebugAbbrevSect;
126 const macho_section<P>* _dwarfDebugLineSect;
127 const macho_section<P>* _dwarfDebugStringSect;
128 ld::File::ObjcConstraint _objConstraint;
129 uint8_t _swiftVersion;
130 uint32_t _cpuSubType;
131 bool _canScatterAtoms;
132 std::vector<std::vector<const char*> > _linkerOptions;
133 };
134
135
136 template <typename A>
137 class Section : public ld::Section
138 {
139 public:
140 typedef typename A::P::uint_t pint_t;
141 typedef typename A::P P;
142 typedef typename A::P::E E;
143
144 virtual ~Section() { }
145 class File<A>& file() const { return _file; }
146 const macho_section<P>* machoSection() const { return _machOSection; }
147 uint32_t sectionNum(class Parser<A>&) const;
148 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr);
149 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeUnclassified; }
150 virtual bool dontDeadStrip() { return (this->_machOSection->flags() & S_ATTR_NO_DEAD_STRIP); }
151 virtual Atom<A>* findAtomByAddress(pint_t addr) { return this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms); }
152 virtual bool addFollowOnFixups() const { return ! _file.canScatterAtoms(); }
153 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer,
154 struct Parser<A>::LabelAndCFIBreakIterator& it,
155 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
156 virtual uint32_t computeAtomCount(class Parser<A>& parser,
157 struct Parser<A>::LabelAndCFIBreakIterator& it,
158 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
159 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
160 virtual bool addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
161 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const { return 0; }
162 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
163 const ld::IndirectBindingTable& ind) const { return false; }
164 virtual bool ignoreLabel(const char* label) const { return false; }
165 static const char* makeSectionName(const macho_section<typename A::P>* s);
166
167 protected:
168 Section(File<A>& f, const macho_section<typename A::P>* s)
169 : ld::Section(makeSegmentName(s), makeSectionName(s), sectionType(s)),
170 _file(f), _machOSection(s), _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
171 Section(File<A>& f, const char* segName, const char* sectName, ld::Section::Type t, bool hidden=false)
172 : ld::Section(segName, sectName, t, hidden), _file(f), _machOSection(NULL),
173 _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
174
175
176 Atom<A>* findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end);
177 uint32_t x86_64PcRelOffset(uint8_t r_type);
178 void addLOH(class Parser<A>& parser, int kind, int count, const uint64_t addrs[]);
179 static const char* makeSegmentName(const macho_section<typename A::P>* s);
180 static bool readable(const macho_section<typename A::P>* s);
181 static bool writable(const macho_section<typename A::P>* s);
182 static bool exectuable(const macho_section<typename A::P>* s);
183 static ld::Section::Type sectionType(const macho_section<typename A::P>* s);
184
185 File<A>& _file;
186 const macho_section<P>* _machOSection;
187 class Atom<A>* _beginAtoms;
188 class Atom<A>* _endAtoms;
189 bool _hasAliases;
190 std::set<const class Atom<A>*> _altEntries;
191 };
192
193
194 template <typename A>
195 class CFISection : public Section<A>
196 {
197 public:
198 CFISection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
199 : Section<A>(f, s) { }
200 uint32_t cfiCount(Parser<A>& parser);
201
202 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeCFI; }
203 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
204 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
205 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
206 virtual bool addFollowOnFixups() const { return false; }
207
208
209 ///
210 /// ObjectFileAddressSpace is used as a template parameter to UnwindCursor for parsing
211 /// dwarf CFI information in an object file.
212 ///
213 class OAS
214 {
215 public:
216 typedef typename A::P::uint_t pint_t;
217 typedef typename A::P P;
218 typedef typename A::P::E E;
219 typedef typename A::P::uint_t sint_t;
220
221 OAS(CFISection<A>& ehFrameSection, const uint8_t* ehFrameBuffer) :
222 _ehFrameSection(ehFrameSection),
223 _ehFrameContent(ehFrameBuffer),
224 _ehFrameStartAddr(ehFrameSection.machoSection()->addr()),
225 _ehFrameEndAddr(ehFrameSection.machoSection()->addr()+ehFrameSection.machoSection()->size()) {}
226
227 uint8_t get8(pint_t addr) { return *((uint8_t*)mappedAddress(addr)); }
228 uint16_t get16(pint_t addr) { return E::get16(*((uint16_t*)mappedAddress(addr))); }
229 uint32_t get32(pint_t addr) { return E::get32(*((uint32_t*)mappedAddress(addr))); }
230 uint64_t get64(pint_t addr) { return E::get64(*((uint64_t*)mappedAddress(addr))); }
231 pint_t getP(pint_t addr) { return P::getP(*((pint_t*)mappedAddress(addr))); }
232 uint64_t getULEB128(pint_t& addr, pint_t end);
233 int64_t getSLEB128(pint_t& addr, pint_t end);
234 pint_t getEncodedP(pint_t& addr, pint_t end, uint8_t encoding);
235 private:
236 const void* mappedAddress(pint_t addr);
237
238 CFISection<A>& _ehFrameSection;
239 const uint8_t* _ehFrameContent;
240 pint_t _ehFrameStartAddr;
241 pint_t _ehFrameEndAddr;
242 };
243
244
245 typedef typename A::P::uint_t pint_t;
246 typedef libunwind::CFI_Atom_Info<OAS> CFI_Atom_Info;
247
248 void cfiParse(class Parser<A>& parser, uint8_t* buffer, CFI_Atom_Info cfiArray[], uint32_t& cfiCount, const pint_t cuStarts[], uint32_t cuCount);
249 bool needsRelocating();
250
251 static bool bigEndian();
252 private:
253 void addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo);
254 static void warnFunc(void* ref, uint64_t funcAddr, const char* msg);
255 };
256
257
258 template <typename A>
259 class CUSection : public Section<A>
260 {
261 public:
262 CUSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
263 : Section<A>(f, s) { }
264
265 typedef typename A::P::uint_t pint_t;
266 typedef typename A::P P;
267 typedef typename A::P::E E;
268
269 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
270 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
271 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
272 virtual bool addFollowOnFixups() const { return false; }
273
274 struct Info {
275 pint_t functionStartAddress;
276 uint32_t functionSymbolIndex;
277 uint32_t rangeLength;
278 uint32_t compactUnwindInfo;
279 const char* personality;
280 pint_t lsdaAddress;
281 Atom<A>* function;
282 Atom<A>* lsda;
283 };
284
285 uint32_t count();
286 void parse(class Parser<A>& parser, uint32_t cnt, Info array[]);
287 static bool encodingMeansUseDwarf(compact_unwind_encoding_t enc);
288
289
290 private:
291
292 const char* personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc);
293
294 static int infoSorter(const void* l, const void* r);
295
296 };
297
298
299 template <typename A>
300 class TentativeDefinitionSection : public Section<A>
301 {
302 public:
303 TentativeDefinitionSection(Parser<A>& parser, File<A>& f)
304 : Section<A>(f, "__DATA", "__comm/tent", ld::Section::typeTentativeDefs) {}
305
306 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeZeroFill; }
307 virtual bool addFollowOnFixups() const { return false; }
308 virtual Atom<A>* findAtomByAddress(typename A::P::uint_t addr) { throw "TentativeDefinitionSection::findAtomByAddress() should never be called"; }
309 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
310 const struct Parser<A>::CFI_CU_InfoArrays&);
311 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer,
312 struct Parser<A>::LabelAndCFIBreakIterator& it,
313 const struct Parser<A>::CFI_CU_InfoArrays&);
314 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
315 private:
316 typedef typename A::P::uint_t pint_t;
317 typedef typename A::P P;
318 };
319
320
321 template <typename A>
322 class AbsoluteSymbolSection : public Section<A>
323 {
324 public:
325 AbsoluteSymbolSection(Parser<A>& parser, File<A>& f)
326 : Section<A>(f, "__DATA", "__abs", ld::Section::typeAbsoluteSymbols, true) {}
327
328 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeUnclassified; }
329 virtual bool dontDeadStrip() { return false; }
330 virtual ld::Atom::Alignment alignmentForAddress(typename A::P::uint_t addr) { return ld::Atom::Alignment(0); }
331 virtual bool addFollowOnFixups() const { return false; }
332 virtual Atom<A>* findAtomByAddress(typename A::P::uint_t addr) { throw "AbsoluteSymbolSection::findAtomByAddress() should never be called"; }
333 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
334 const struct Parser<A>::CFI_CU_InfoArrays&);
335 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer,
336 struct Parser<A>::LabelAndCFIBreakIterator& it,
337 const struct Parser<A>::CFI_CU_InfoArrays&);
338 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
339 virtual Atom<A>* findAbsAtomForValue(typename A::P::uint_t);
340
341 private:
342 typedef typename A::P::uint_t pint_t;
343 typedef typename A::P P;
344 };
345
346
347 template <typename A>
348 class SymboledSection : public Section<A>
349 {
350 public:
351 SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s);
352 virtual ld::Atom::ContentType contentType() { return _type; }
353 virtual bool dontDeadStrip();
354 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
355 const struct Parser<A>::CFI_CU_InfoArrays&);
356 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer,
357 struct Parser<A>::LabelAndCFIBreakIterator& it,
358 const struct Parser<A>::CFI_CU_InfoArrays&);
359 protected:
360 typedef typename A::P::uint_t pint_t;
361 typedef typename A::P P;
362
363 ld::Atom::ContentType _type;
364 };
365
366
367 template <typename A>
368 class TLVDefsSection : public SymboledSection<A>
369 {
370 public:
371 TLVDefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s) :
372 SymboledSection<A>(parser, f, s) { }
373
374 private:
375
376 };
377
378
379 template <typename A>
380 class ImplicitSizeSection : public Section<A>
381 {
382 public:
383 ImplicitSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
384 : Section<A>(f, s) { }
385 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
386 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
387 protected:
388 typedef typename A::P::uint_t pint_t;
389 typedef typename A::P P;
390
391 virtual bool addFollowOnFixups() const { return false; }
392 virtual const char* unlabeledAtomName(Parser<A>& parser, pint_t addr) = 0;
393 virtual ld::Atom::SymbolTableInclusion symbolTableInclusion();
394 virtual pint_t elementSizeAtAddress(pint_t addr) = 0;
395 virtual ld::Atom::Scope scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeLinkageUnit; }
396 virtual bool useElementAt(Parser<A>& parser,
397 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr) = 0;
398 virtual ld::Atom::Definition definition() { return ld::Atom::definitionRegular; }
399 virtual ld::Atom::Combine combine(Parser<A>& parser, pint_t addr) = 0;
400 virtual bool ignoreLabel(const char* label) const { return (label[0] == 'L'); }
401 };
402
403
404 template <typename A>
405 class FixedSizeSection : public ImplicitSizeSection<A>
406 {
407 public:
408 FixedSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
409 : ImplicitSizeSection<A>(parser, f, s) { }
410 protected:
411 typedef typename A::P::uint_t pint_t;
412 typedef typename A::P P;
413 typedef typename A::P::E E;
414
415 virtual bool useElementAt(Parser<A>& parser,
416 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
417 { return true; }
418 };
419
420
421 template <typename A>
422 class Literal4Section : public FixedSizeSection<A>
423 {
424 public:
425 Literal4Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
426 : FixedSizeSection<A>(parser, f, s) {}
427 protected:
428 typedef typename A::P::uint_t pint_t;
429 typedef typename A::P P;
430
431 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(2); }
432 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "4-byte-literal"; }
433 virtual pint_t elementSizeAtAddress(pint_t addr) { return 4; }
434 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
435 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
436 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
437 const ld::IndirectBindingTable& ind) const;
438 virtual bool ignoreLabel(const char* label) const;
439 };
440
441 template <typename A>
442 class Literal8Section : public FixedSizeSection<A>
443 {
444 public:
445 Literal8Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
446 : FixedSizeSection<A>(parser, f, s) {}
447 protected:
448 typedef typename A::P::uint_t pint_t;
449 typedef typename A::P P;
450
451 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(3); }
452 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "8-byte-literal"; }
453 virtual pint_t elementSizeAtAddress(pint_t addr) { return 8; }
454 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
455 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
456 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
457 const ld::IndirectBindingTable& ind) const;
458 virtual bool ignoreLabel(const char* label) const;
459 };
460
461 template <typename A>
462 class Literal16Section : public FixedSizeSection<A>
463 {
464 public:
465 Literal16Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
466 : FixedSizeSection<A>(parser, f, s) {}
467 protected:
468 typedef typename A::P::uint_t pint_t;
469 typedef typename A::P P;
470
471 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(4); }
472 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "16-byte-literal"; }
473 virtual pint_t elementSizeAtAddress(pint_t addr) { return 16; }
474 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
475 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
476 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
477 const ld::IndirectBindingTable& ind) const;
478 virtual bool ignoreLabel(const char* label) const;
479 };
480
481
482 template <typename A>
483 class NonLazyPointerSection : public FixedSizeSection<A>
484 {
485 public:
486 NonLazyPointerSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
487 : FixedSizeSection<A>(parser, f, s) {}
488 protected:
489 typedef typename A::P::uint_t pint_t;
490 typedef typename A::P P;
491
492 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
493 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeNonLazyPointer; }
494 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
495 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "non_lazy_ptr"; }
496 virtual pint_t elementSizeAtAddress(pint_t addr) { return sizeof(pint_t); }
497 virtual ld::Atom::Scope scopeAtAddress(Parser<A>& parser, pint_t addr);
498 virtual ld::Atom::Combine combine(Parser<A>&, pint_t);
499 virtual bool ignoreLabel(const char* label) const { return true; }
500 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
501 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
502 const ld::IndirectBindingTable& ind) const;
503
504 private:
505 static const char* targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind);
506 static ld::Fixup::Kind fixupKind();
507 };
508
509
510 template <typename A>
511 class CFStringSection : public FixedSizeSection<A>
512 {
513 public:
514 CFStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
515 : FixedSizeSection<A>(parser, f, s) {}
516 protected:
517 typedef typename A::P::uint_t pint_t;
518
519 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
520 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "CFString"; }
521 virtual pint_t elementSizeAtAddress(pint_t addr) { return 4*sizeof(pint_t); }
522 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndReferences; }
523 virtual bool ignoreLabel(const char* label) const { return true; }
524 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
525 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
526 const ld::IndirectBindingTable& ind) const;
527 private:
528 enum ContentType { contentUTF8, contentUTF16, contentUnknown };
529 static const uint8_t* targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
530 ContentType* ct, unsigned int* count);
531 };
532
533
534 template <typename A>
535 class ObjC1ClassSection : public FixedSizeSection<A>
536 {
537 public:
538 ObjC1ClassSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
539 : FixedSizeSection<A>(parser, f, s) {}
540 protected:
541 typedef typename A::P::uint_t pint_t;
542 typedef typename A::P P;
543 typedef typename A::P::E E;
544
545 virtual ld::Atom::Scope scopeAtAddress(Parser<A>& , pint_t ) { return ld::Atom::scopeGlobal; }
546 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(2); }
547 virtual const char* unlabeledAtomName(Parser<A>&, pint_t);
548 virtual ld::Atom::SymbolTableInclusion symbolTableInclusion() { return ld::Atom::symbolTableIn; }
549 virtual pint_t elementSizeAtAddress(pint_t addr);
550 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineNever; }
551 virtual bool ignoreLabel(const char* label) const { return true; }
552 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
553 { return 0; }
554 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
555 const ld::IndirectBindingTable& ind) const { return false; }
556 virtual bool addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
557 };
558
559
560 template <typename A>
561 class ObjC2ClassRefsSection : public FixedSizeSection<A>
562 {
563 public:
564 ObjC2ClassRefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
565 : FixedSizeSection<A>(parser, f, s) {}
566 protected:
567 typedef typename A::P::uint_t pint_t;
568
569 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
570 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "objc-class-ref"; }
571 virtual pint_t elementSizeAtAddress(pint_t addr) { return sizeof(pint_t); }
572 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndReferences; }
573 virtual bool ignoreLabel(const char* label) const { return true; }
574 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
575 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
576 const ld::IndirectBindingTable& ind) const;
577 private:
578 const char* targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
579 };
580
581
582 template <typename A>
583 class ObjC2CategoryListSection : public FixedSizeSection<A>
584 {
585 public:
586 ObjC2CategoryListSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
587 : FixedSizeSection<A>(parser, f, s) {}
588 protected:
589 typedef typename A::P::uint_t pint_t;
590
591 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
592 virtual ld::Atom::Scope scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeTranslationUnit; }
593 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "objc-cat-list"; }
594 virtual pint_t elementSizeAtAddress(pint_t addr) { return sizeof(pint_t); }
595 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineNever; }
596 virtual bool ignoreLabel(const char* label) const { return true; }
597 private:
598 const char* targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
599 };
600
601
602 template <typename A>
603 class PointerToCStringSection : public FixedSizeSection<A>
604 {
605 public:
606 PointerToCStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
607 : FixedSizeSection<A>(parser, f, s) {}
608 protected:
609 typedef typename A::P::uint_t pint_t;
610
611 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
612 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "pointer-to-literal-cstring"; }
613 virtual pint_t elementSizeAtAddress(pint_t addr) { return sizeof(pint_t); }
614 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndReferences; }
615 virtual bool ignoreLabel(const char* label) const { return true; }
616 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
617 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
618 const ld::IndirectBindingTable& ind) const;
619 virtual const char* targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
620 };
621
622
623 template <typename A>
624 class Objc1ClassReferences : public PointerToCStringSection<A>
625 {
626 public:
627 Objc1ClassReferences(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
628 : PointerToCStringSection<A>(parser, f, s) {}
629
630 typedef typename A::P::uint_t pint_t;
631 typedef typename A::P P;
632
633 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "pointer-to-literal-objc-class-name"; }
634 virtual bool addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
635 virtual const char* targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
636 };
637
638
639 template <typename A>
640 class CStringSection : public ImplicitSizeSection<A>
641 {
642 public:
643 CStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
644 : ImplicitSizeSection<A>(parser, f, s) {}
645 protected:
646 typedef typename A::P::uint_t pint_t;
647 typedef typename A::P P;
648
649 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeCString; }
650 virtual Atom<A>* findAtomByAddress(pint_t addr);
651 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "cstring"; }
652 virtual pint_t elementSizeAtAddress(pint_t addr);
653 virtual bool ignoreLabel(const char* label) const;
654 virtual bool useElementAt(Parser<A>& parser,
655 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr);
656 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
657 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
658 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
659 const ld::IndirectBindingTable& ind) const;
660
661 };
662
663
664 template <typename A>
665 class UTF16StringSection : public SymboledSection<A>
666 {
667 public:
668 UTF16StringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
669 : SymboledSection<A>(parser, f, s) {}
670 protected:
671 typedef typename A::P::uint_t pint_t;
672 typedef typename A::P P;
673
674 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
675 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
676 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
677 const ld::IndirectBindingTable& ind) const;
678 };
679
680
681 //
682 // Atoms in mach-o files
683 //
684 template <typename A>
685 class Atom : public ld::Atom
686 {
687 public:
688 // overrides of ld::Atom
689 virtual const ld::File* file() const;
690 virtual const char* translationUnitSource() const
691 { return sect().file().translationUnitSource(); }
692 virtual const char* name() const { return _name; }
693 virtual uint64_t size() const { return _size; }
694 virtual uint64_t objectAddress() const { return _objAddress; }
695 virtual void copyRawContent(uint8_t buffer[]) const;
696 virtual const uint8_t* rawContentPointer() const { return contentPointer(); }
697 virtual unsigned long contentHash(const ld::IndirectBindingTable& ind) const
698 { if ( _hash == 0 ) _hash = sect().contentHash(this, ind); return _hash; }
699 virtual bool canCoalesceWith(const ld::Atom& rhs, const ld::IndirectBindingTable& ind) const
700 { return sect().canCoalesceWith(this, rhs, ind); }
701 virtual ld::Fixup::iterator fixupsBegin() const { return &machofile()._fixups[_fixupsStartIndex]; }
702 virtual ld::Fixup::iterator fixupsEnd() const { return &machofile()._fixups[_fixupsStartIndex+_fixupsCount]; }
703 virtual ld::Atom::UnwindInfo::iterator beginUnwind() const { return &machofile()._unwindInfos[_unwindInfoStartIndex]; }
704 virtual ld::Atom::UnwindInfo::iterator endUnwind() const { return &machofile()._unwindInfos[_unwindInfoStartIndex+_unwindInfoCount]; }
705 virtual ld::Atom::LineInfo::iterator beginLineInfo() const{ return &machofile()._lineInfos[_lineInfoStartIndex]; }
706 virtual ld::Atom::LineInfo::iterator endLineInfo() const { return &machofile()._lineInfos[_lineInfoStartIndex+_lineInfoCount]; }
707 virtual void setFile(const ld::File* f);
708
709 private:
710
711 enum { kFixupStartIndexBits = 32,
712 kLineInfoStartIndexBits = 32,
713 kUnwindInfoStartIndexBits = 24,
714 kFixupCountBits = 24,
715 kLineInfoCountBits = 12,
716 kUnwindInfoCountBits = 4
717 }; // must sum to 128
718
719 public:
720 // methods for all atoms from mach-o object file
721 Section<A>& sect() const { return (Section<A>&)section(); }
722 File<A>& machofile() const { return ((Section<A>*)(this->_section))->file(); }
723 void setFixupsRange(uint32_t s, uint32_t c);
724 void setUnwindInfoRange(uint32_t s, uint32_t c);
725 void extendUnwindInfoRange();
726 void setLineInfoRange(uint32_t s, uint32_t c);
727 bool roomForMoreLineInfoCount() { return (_lineInfoCount < ((1<<kLineInfoCountBits)-1)); }
728 void incrementLineInfoCount() { assert(roomForMoreLineInfoCount()); ++_lineInfoCount; }
729 void incrementFixupCount() { if (_fixupsCount == ((1 << kFixupCountBits)-1))
730 throwf("too may fixups in %s", name()); ++_fixupsCount; }
731 const uint8_t* contentPointer() const;
732 uint32_t fixupCount() const { return _fixupsCount; }
733 void verifyAlignment(const macho_section<typename A::P>&) const;
734
735 typedef typename A::P P;
736 typedef typename A::P::E E;
737 typedef typename A::P::uint_t pint_t;
738 // constuct via all attributes
739 Atom(Section<A>& sct, const char* nm, pint_t addr, uint64_t sz,
740 ld::Atom::Definition d, ld::Atom::Combine c, ld::Atom::Scope s,
741 ld::Atom::ContentType ct, ld::Atom::SymbolTableInclusion i,
742 bool dds, bool thumb, bool al, ld::Atom::Alignment a)
743 : ld::Atom((ld::Section&)sct, d, c, s, ct, i, dds, thumb, al, a),
744 _size(sz), _objAddress(addr), _name(nm), _hash(0),
745 _fixupsStartIndex(0), _lineInfoStartIndex(0),
746 _unwindInfoStartIndex(0), _fixupsCount(0),
747 _lineInfoCount(0), _unwindInfoCount(0) { }
748 // construct via symbol table entry
749 Atom(Section<A>& sct, Parser<A>& parser, const macho_nlist<P>& sym,
750 uint64_t sz, bool alias=false)
751 : ld::Atom((ld::Section&)sct, parser.definitionFromSymbol(sym),
752 parser.combineFromSymbol(sym), parser.scopeFromSymbol(sym),
753 parser.resolverFromSymbol(sym) ? ld::Atom::typeResolver : sct.contentType(),
754 parser.inclusionFromSymbol(sym),
755 parser.dontDeadStripFromSymbol(sym) || sct.dontDeadStrip(),
756 parser.isThumbFromSymbol(sym), alias,
757 sct.alignmentForAddress(sym.n_value())),
758 _size(sz), _objAddress(sym.n_value()),
759 _name(parser.nameFromSymbol(sym)), _hash(0),
760 _fixupsStartIndex(0), _lineInfoStartIndex(0),
761 _unwindInfoStartIndex(0), _fixupsCount(0),
762 _lineInfoCount(0), _unwindInfoCount(0) {
763 // <rdar://problem/6783167> support auto-hidden weak symbols
764 if ( _scope == ld::Atom::scopeGlobal &&
765 (sym.n_desc() & (N_WEAK_DEF|N_WEAK_REF)) == (N_WEAK_DEF|N_WEAK_REF) )
766 this->setAutoHide();
767 this->verifyAlignment(*sct.machoSection());
768 }
769
770 private:
771 friend class Parser<A>;
772 friend class Section<A>;
773 friend class CStringSection<A>;
774 friend class AbsoluteSymbolSection<A>;
775
776 pint_t _size;
777 pint_t _objAddress;
778 const char* _name;
779 mutable unsigned long _hash;
780
781 uint64_t _fixupsStartIndex : kFixupStartIndexBits,
782 _lineInfoStartIndex : kLineInfoStartIndexBits,
783 _unwindInfoStartIndex : kUnwindInfoStartIndexBits,
784 _fixupsCount : kFixupCountBits,
785 _lineInfoCount : kLineInfoCountBits,
786 _unwindInfoCount : kUnwindInfoCountBits;
787
788 static std::map<const ld::Atom*, const ld::File*> _s_fileOverride;
789 };
790
791 template <typename A>
792 std::map<const ld::Atom*, const ld::File*> Atom<A>::_s_fileOverride;
793
794 template <typename A>
795 void Atom<A>::setFile(const ld::File* f) {
796 _s_fileOverride[this] = f;
797 }
798
799 template <typename A>
800 const ld::File* Atom<A>::file() const
801 {
802 std::map<const ld::Atom*, const ld::File*>::iterator pos = _s_fileOverride.find(this);
803 if ( pos != _s_fileOverride.end() )
804 return pos->second;
805
806 return &sect().file();
807 }
808
809 template <typename A>
810 void Atom<A>::setFixupsRange(uint32_t startIndex, uint32_t count)
811 {
812 if ( count >= (1 << kFixupCountBits) )
813 throwf("too many fixups in function %s", this->name());
814 if ( startIndex >= (1 << kFixupStartIndexBits) )
815 throwf("too many fixups in file");
816 assert(((startIndex+count) <= sect().file()._fixups.size()) && "fixup index out of range");
817 _fixupsStartIndex = startIndex;
818 _fixupsCount = count;
819 }
820
821 template <typename A>
822 void Atom<A>::setUnwindInfoRange(uint32_t startIndex, uint32_t count)
823 {
824 if ( count >= (1 << kUnwindInfoCountBits) )
825 throwf("too many compact unwind infos in function %s", this->name());
826 if ( startIndex >= (1 << kUnwindInfoStartIndexBits) )
827 throwf("too many compact unwind infos (%d) in file", startIndex);
828 assert((startIndex+count) <= sect().file()._unwindInfos.size() && "unwindinfo index out of range");
829 _unwindInfoStartIndex = startIndex;
830 _unwindInfoCount = count;
831 }
832
833 template <typename A>
834 void Atom<A>::extendUnwindInfoRange()
835 {
836 if ( _unwindInfoCount+1 >= (1 << kUnwindInfoCountBits) )
837 throwf("too many compact unwind infos in function %s", this->name());
838 _unwindInfoCount += 1;
839 }
840
841 template <typename A>
842 void Atom<A>::setLineInfoRange(uint32_t startIndex, uint32_t count)
843 {
844 assert((count < (1 << kLineInfoCountBits)) && "too many line infos");
845 assert((startIndex+count) < sect().file()._lineInfos.size() && "line info index out of range");
846 _lineInfoStartIndex = startIndex;
847 _lineInfoCount = count;
848 }
849
850 template <typename A>
851 const uint8_t* Atom<A>::contentPointer() const
852 {
853 const macho_section<P>* sct = this->sect().machoSection();
854 if ( this->_objAddress > sct->addr() + sct->size() )
855 throwf("malformed .o file, symbol has address 0x%0llX which is outside range of its section", (uint64_t)this->_objAddress);
856 uint32_t fileOffset = sct->offset() - sct->addr() + this->_objAddress;
857 return this->sect().file().fileContent()+fileOffset;
858 }
859
860
861 template <typename A>
862 void Atom<A>::copyRawContent(uint8_t buffer[]) const
863 {
864 // copy base bytes
865 if ( this->contentType() == ld::Atom::typeZeroFill ) {
866 bzero(buffer, _size);
867 }
868 else if ( _size != 0 ) {
869 memcpy(buffer, this->contentPointer(), _size);
870 }
871 }
872
873 template <>
874 void Atom<arm>::verifyAlignment(const macho_section<P>&) const
875 {
876 if ( (this->section().type() == ld::Section::typeCode) && ! isThumb() ) {
877 if ( ((_objAddress % 4) != 0) || (this->alignment().powerOf2 < 2) )
878 warning("ARM function not 4-byte aligned: %s from %s", this->name(), this->file()->path());
879 }
880 }
881
882 #if SUPPORT_ARCH_arm64
883 template <>
884 void Atom<arm64>::verifyAlignment(const macho_section<P>& sect) const
885 {
886 if ( (this->section().type() == ld::Section::typeCode) && (sect.size() != 0) ) {
887 if ( ((_objAddress % 4) != 0) || (this->alignment().powerOf2 < 2) )
888 warning("arm64 function not 4-byte aligned: %s from %s", this->name(), this->file()->path());
889 }
890 }
891 #endif
892
893 template <typename A>
894 void Atom<A>::verifyAlignment(const macho_section<P>&) const
895 {
896 }
897
898
899 class AliasAtom : public ld::Atom
900 {
901 public:
902 AliasAtom(const char* name, bool hidden, const ld::File* file, const char* aliasOfName) :
903 ld::Atom(_s_section, ld::Atom::definitionRegular, ld::Atom::combineNever,
904 (hidden ? ld::Atom::scopeLinkageUnit : ld::Atom::scopeGlobal),
905 ld::Atom::typeUnclassified, ld::Atom::symbolTableIn,
906 false, false, true, 0),
907 _file(file),
908 _name(name),
909 _fixup(0, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, ld::Fixup::bindingByNameUnbound, aliasOfName) { }
910
911 virtual const ld::File* file() const { return _file; }
912 virtual const char* translationUnitSource() const
913 { return NULL; }
914 virtual const char* name() const { return _name; }
915 virtual uint64_t size() const { return 0; }
916 virtual uint64_t objectAddress() const { return 0; }
917 virtual void copyRawContent(uint8_t buffer[]) const { }
918 virtual ld::Fixup::iterator fixupsBegin() const { return &((ld::Fixup*)&_fixup)[0]; }
919 virtual ld::Fixup::iterator fixupsEnd() const { return &((ld::Fixup*)&_fixup)[1]; }
920
921 private:
922 static ld::Section _s_section;
923
924 const ld::File* _file;
925 const char* _name;
926 ld::Fixup _fixup;
927 };
928
929 ld::Section AliasAtom::_s_section("__LD", "__aliases", ld::Section::typeTempAlias, true);
930
931
932 template <typename A>
933 class Parser
934 {
935 public:
936 static bool validFile(const uint8_t* fileContent, bool subtypeMustMatch=false,
937 cpu_subtype_t subtype=0);
938 static const char* fileKind(const uint8_t* fileContent);
939 static bool hasObjC2Categories(const uint8_t* fileContent);
940 static bool hasObjC1Categories(const uint8_t* fileContent);
941 static ld::relocatable::File* parse(const uint8_t* fileContent, uint64_t fileLength,
942 const char* path, time_t modTime, ld::File::Ordinal ordinal,
943 const ParserOptions& opts) {
944 Parser p(fileContent, fileLength, path, modTime,
945 ordinal, opts.warnUnwindConversionProblems,
946 opts.keepDwarfUnwind, opts.forceDwarfConversion,
947 opts.neverConvertDwarf, opts.verboseOptimizationHints);
948 return p.parse(opts);
949 }
950
951 typedef typename A::P P;
952 typedef typename A::P::E E;
953 typedef typename A::P::uint_t pint_t;
954
955 struct SourceLocation {
956 SourceLocation() {}
957 SourceLocation(Atom<A>* a, uint32_t o) : atom(a), offsetInAtom(o) {}
958 Atom<A>* atom;
959 uint32_t offsetInAtom;
960 };
961
962 struct TargetDesc {
963 Atom<A>* atom;
964 const char* name; // only used if targetAtom is NULL
965 int64_t addend;
966 bool weakImport; // only used if targetAtom is NULL
967 };
968
969 struct FixupInAtom {
970 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) :
971 fixup(src.offsetInAtom, c, k, target), atom(src.atom) { src.atom->incrementFixupCount(); }
972
973 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) :
974 fixup(src.offsetInAtom, c, k, b, target), atom(src.atom) { src.atom->incrementFixupCount(); }
975
976 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) :
977 fixup(src.offsetInAtom, c, k, wi, name), atom(src.atom) { src.atom->incrementFixupCount(); }
978
979 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) :
980 fixup(src.offsetInAtom, c, k, b, name), atom(src.atom) { src.atom->incrementFixupCount(); }
981
982 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) :
983 fixup(src.offsetInAtom, c, k, addend), atom(src.atom) { src.atom->incrementFixupCount(); }
984
985 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) :
986 fixup(src.offsetInAtom, c, k, (uint64_t)0), atom(src.atom) { src.atom->incrementFixupCount(); }
987
988 ld::Fixup fixup;
989 Atom<A>* atom;
990 };
991
992 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) {
993 _allFixups.push_back(FixupInAtom(src, c, k, target));
994 }
995
996 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) {
997 _allFixups.push_back(FixupInAtom(src, c, k, b, target));
998 }
999
1000 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) {
1001 _allFixups.push_back(FixupInAtom(src, c, k, wi, name));
1002 }
1003
1004 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) {
1005 _allFixups.push_back(FixupInAtom(src, c, k, b, name));
1006 }
1007
1008 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) {
1009 _allFixups.push_back(FixupInAtom(src, c, k, addend));
1010 }
1011
1012 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) {
1013 _allFixups.push_back(FixupInAtom(src, c, k));
1014 }
1015
1016 const char* path() { return _path; }
1017 uint32_t symbolCount() { return _symbolCount; }
1018 uint32_t indirectSymbol(uint32_t indirectIndex);
1019 const macho_nlist<P>& symbolFromIndex(uint32_t index);
1020 const char* nameFromSymbol(const macho_nlist<P>& sym);
1021 ld::Atom::Scope scopeFromSymbol(const macho_nlist<P>& sym);
1022 static ld::Atom::Definition definitionFromSymbol(const macho_nlist<P>& sym);
1023 static ld::Atom::Combine combineFromSymbol(const macho_nlist<P>& sym);
1024 ld::Atom::SymbolTableInclusion inclusionFromSymbol(const macho_nlist<P>& sym);
1025 static bool dontDeadStripFromSymbol(const macho_nlist<P>& sym);
1026 static bool isThumbFromSymbol(const macho_nlist<P>& sym);
1027 static bool weakImportFromSymbol(const macho_nlist<P>& sym);
1028 static bool resolverFromSymbol(const macho_nlist<P>& sym);
1029 static bool altEntryFromSymbol(const macho_nlist<P>& sym);
1030 uint32_t symbolIndexFromIndirectSectionAddress(pint_t,const macho_section<P>*);
1031 const macho_section<P>* firstMachOSection() { return _sectionsStart; }
1032 const macho_section<P>* machOSectionFromSectionIndex(uint32_t index);
1033 uint32_t machOSectionCount() { return _machOSectionsCount; }
1034 uint32_t undefinedStartIndex() { return _undefinedStartIndex; }
1035 uint32_t undefinedEndIndex() { return _undefinedEndIndex; }
1036 void addFixup(FixupInAtom f) { _allFixups.push_back(f); }
1037 Section<A>* sectionForNum(unsigned int sectNum);
1038 Section<A>* sectionForAddress(pint_t addr);
1039 Atom<A>* findAtomByAddress(pint_t addr);
1040 Atom<A>* findAtomByAddressOrNullIfStub(pint_t addr);
1041 Atom<A>* findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom);
1042 Atom<A>* findAtomByName(const char* name); // slow!
1043 void findTargetFromAddress(pint_t addr, TargetDesc& target);
1044 void findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target);
1045 void findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum,
1046 TargetDesc& target);
1047 uint32_t tentativeDefinitionCount() { return _tentativeDefinitionCount; }
1048 uint32_t absoluteSymbolCount() { return _absoluteSymbolCount; }
1049
1050 bool hasStubsSection() { return (_stubsSectionNum != 0); }
1051 unsigned int stubsSectionNum() { return _stubsSectionNum; }
1052 void addDtraceExtraInfos(const SourceLocation& src, const char* provider);
1053 const char* scanSymbolTableForAddress(uint64_t addr);
1054 bool warnUnwindConversionProblems() { return _warnUnwindConversionProblems; }
1055 bool hasDataInCodeLabels() { return _hasDataInCodeLabels; }
1056 bool keepDwarfUnwind() { return _keepDwarfUnwind; }
1057 bool forceDwarfConversion() { return _forceDwarfConversion; }
1058 bool verboseOptimizationHints() { return _verboseOptimizationHints; }
1059 bool neverConvertDwarf() { return _neverConvertDwarf; }
1060 bool armUsesZeroCostExceptions() { return _armUsesZeroCostExceptions; }
1061
1062 macho_data_in_code_entry<P>* dataInCodeStart() { return _dataInCodeStart; }
1063 macho_data_in_code_entry<P>* dataInCodeEnd() { return _dataInCodeEnd; }
1064 const uint8_t* optimizationHintsStart() { return _lohStart; }
1065 const uint8_t* optimizationHintsEnd() { return _lohEnd; }
1066 bool hasOptimizationHints() { return _lohStart != _lohEnd; }
1067
1068
1069 void addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target);
1070 void addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase);
1071
1072
1073
1074 struct LabelAndCFIBreakIterator {
1075 typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
1076 LabelAndCFIBreakIterator(const uint32_t* ssa, uint32_t ssc, const pint_t* cfisa,
1077 uint32_t cfisc, bool ols)
1078 : sortedSymbolIndexes(ssa), sortedSymbolCount(ssc), cfiStartsArray(cfisa),
1079 cfiStartsCount(cfisc), fileHasOverlappingSymbols(ols),
1080 newSection(false), cfiIndex(0), symIndex(0) {}
1081 bool next(Parser<A>& parser, const Section<A>& sect, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
1082 pint_t* addr, pint_t* size, const macho_nlist<P>** sym);
1083 pint_t peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr);
1084 void beginSection() { newSection = true; symIndex = 0; }
1085
1086 const uint32_t* const sortedSymbolIndexes;
1087 const uint32_t sortedSymbolCount;
1088 const pint_t* cfiStartsArray;
1089 const uint32_t cfiStartsCount;
1090 const bool fileHasOverlappingSymbols;
1091 bool newSection;
1092 uint32_t cfiIndex;
1093 uint32_t symIndex;
1094 };
1095
1096 struct CFI_CU_InfoArrays {
1097 typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
1098 typedef typename CUSection<A>::Info CU_Info;
1099 CFI_CU_InfoArrays(const CFI_Atom_Info* cfiAr, uint32_t cfiC, CU_Info* cuAr, uint32_t cuC)
1100 : cfiArray(cfiAr), cuArray(cuAr), cfiCount(cfiC), cuCount(cuC) {}
1101 const CFI_Atom_Info* const cfiArray;
1102 CU_Info* const cuArray;
1103 const uint32_t cfiCount;
1104 const uint32_t cuCount;
1105 };
1106
1107
1108
1109 private:
1110 friend class Section<A>;
1111
1112 enum SectionType { sectionTypeIgnore, sectionTypeLiteral4, sectionTypeLiteral8, sectionTypeLiteral16,
1113 sectionTypeNonLazy, sectionTypeCFI, sectionTypeCString, sectionTypeCStringPointer,
1114 sectionTypeUTF16Strings, sectionTypeCFString, sectionTypeObjC2ClassRefs, typeObjC2CategoryList,
1115 sectionTypeObjC1Classes, sectionTypeSymboled, sectionTypeObjC1ClassRefs,
1116 sectionTypeTentativeDefinitions, sectionTypeAbsoluteSymbols, sectionTypeTLVDefs,
1117 sectionTypeCompactUnwind };
1118
1119 template <typename P>
1120 struct MachOSectionAndSectionClass
1121 {
1122 const macho_section<P>* sect;
1123 SectionType type;
1124
1125 static int sorter(const void* l, const void* r) {
1126 const MachOSectionAndSectionClass<P>* left = (MachOSectionAndSectionClass<P>*)l;
1127 const MachOSectionAndSectionClass<P>* right = (MachOSectionAndSectionClass<P>*)r;
1128 int64_t diff = left->sect->addr() - right->sect->addr();
1129 if ( diff == 0 )
1130 return 0;
1131 if ( diff < 0 )
1132 return -1;
1133 else
1134 return 1;
1135 }
1136 };
1137
1138 struct ParserAndSectionsArray { Parser* parser; const uint32_t* sortedSectionsArray; };
1139
1140
1141 Parser(const uint8_t* fileContent, uint64_t fileLength,
1142 const char* path, time_t modTime, ld::File::Ordinal ordinal,
1143 bool warnUnwindConversionProblems, bool keepDwarfUnwind,
1144 bool forceDwarfConversion, bool neverConvertDwarf, bool verboseOptimizationHints);
1145 ld::relocatable::File* parse(const ParserOptions& opts);
1146 uint8_t loadCommandSizeMask();
1147 bool parseLoadCommands();
1148 void makeSections();
1149 void prescanSymbolTable();
1150 void makeSortedSymbolsArray(uint32_t symArray[], const uint32_t sectionArray[]);
1151 void makeSortedSectionsArray(uint32_t array[]);
1152 static int pointerSorter(const void* l, const void* r);
1153 static int symbolIndexSorter(void* extra, const void* l, const void* r);
1154 static int sectionIndexSorter(void* extra, const void* l, const void* r);
1155
1156 void parseDebugInfo();
1157 void parseStabs();
1158 void appendAliasAtoms(uint8_t* atomBuffer);
1159 static bool isConstFunStabs(const char *stabStr);
1160 bool read_comp_unit(const char ** name, const char ** comp_dir,
1161 uint64_t *stmt_list);
1162 pint_t realAddr(pint_t addr);
1163 const char* getDwarfString(uint64_t form, const uint8_t*& p);
1164 uint64_t getDwarfOffset(uint64_t form, const uint8_t*& di, bool dwarf64);
1165 bool skip_form(const uint8_t ** offset, const uint8_t * end,
1166 uint64_t form, uint8_t addr_size, bool dwarf64);
1167
1168
1169 // filled in by constructor
1170 const uint8_t* _fileContent;
1171 uint32_t _fileLength;
1172 const char* _path;
1173 time_t _modTime;
1174 ld::File::Ordinal _ordinal;
1175
1176 // filled in by parseLoadCommands()
1177 File<A>* _file;
1178 const macho_nlist<P>* _symbols;
1179 uint32_t _symbolCount;
1180 uint32_t _indirectSymbolCount;
1181 const char* _strings;
1182 uint32_t _stringsSize;
1183 const uint32_t* _indirectTable;
1184 uint32_t _indirectTableCount;
1185 uint32_t _undefinedStartIndex;
1186 uint32_t _undefinedEndIndex;
1187 const macho_section<P>* _sectionsStart;
1188 uint32_t _machOSectionsCount;
1189 bool _hasUUID;
1190 macho_data_in_code_entry<P>* _dataInCodeStart;
1191 macho_data_in_code_entry<P>* _dataInCodeEnd;
1192 const uint8_t* _lohStart;
1193 const uint8_t* _lohEnd;
1194
1195 // filled in by parse()
1196 CFISection<A>* _EHFrameSection;
1197 CUSection<A>* _compactUnwindSection;
1198 AbsoluteSymbolSection<A>* _absoluteSection;
1199 uint32_t _tentativeDefinitionCount;
1200 uint32_t _absoluteSymbolCount;
1201 uint32_t _symbolsInSections;
1202 bool _hasLongBranchStubs;
1203 bool _AppleObjc; // FSF has objc that uses different data layout
1204 bool _overlappingSymbols;
1205 bool _warnUnwindConversionProblems;
1206 bool _hasDataInCodeLabels;
1207 bool _keepDwarfUnwind;
1208 bool _forceDwarfConversion;
1209 bool _neverConvertDwarf;
1210 bool _verboseOptimizationHints;
1211 bool _armUsesZeroCostExceptions;
1212 unsigned int _stubsSectionNum;
1213 const macho_section<P>* _stubsMachOSection;
1214 std::vector<const char*> _dtraceProviderInfo;
1215 std::vector<FixupInAtom> _allFixups;
1216 };
1217
1218
1219
1220 template <typename A>
1221 Parser<A>::Parser(const uint8_t* fileContent, uint64_t fileLength, const char* path, time_t modTime,
1222 ld::File::Ordinal ordinal, bool convertDUI, bool keepDwarfUnwind, bool forceDwarfConversion,
1223 bool neverConvertDwarf, bool verboseOptimizationHints)
1224 : _fileContent(fileContent), _fileLength(fileLength), _path(path), _modTime(modTime),
1225 _ordinal(ordinal), _file(NULL),
1226 _symbols(NULL), _symbolCount(0), _indirectSymbolCount(0), _strings(NULL), _stringsSize(0),
1227 _indirectTable(NULL), _indirectTableCount(0),
1228 _undefinedStartIndex(0), _undefinedEndIndex(0),
1229 _sectionsStart(NULL), _machOSectionsCount(0), _hasUUID(false),
1230 _dataInCodeStart(NULL), _dataInCodeEnd(NULL),
1231 _lohStart(NULL), _lohEnd(NULL),
1232 _EHFrameSection(NULL), _compactUnwindSection(NULL), _absoluteSection(NULL),
1233 _tentativeDefinitionCount(0), _absoluteSymbolCount(0),
1234 _symbolsInSections(0), _hasLongBranchStubs(false), _AppleObjc(false),
1235 _overlappingSymbols(false), _warnUnwindConversionProblems(convertDUI), _hasDataInCodeLabels(false),
1236 _keepDwarfUnwind(keepDwarfUnwind), _forceDwarfConversion(forceDwarfConversion),
1237 _neverConvertDwarf(neverConvertDwarf),
1238 _verboseOptimizationHints(verboseOptimizationHints),
1239 _stubsSectionNum(0), _stubsMachOSection(NULL)
1240 {
1241 }
1242
1243
1244 template <>
1245 bool Parser<x86>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1246 {
1247 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1248 if ( header->magic() != MH_MAGIC )
1249 return false;
1250 if ( header->cputype() != CPU_TYPE_I386 )
1251 return false;
1252 if ( header->filetype() != MH_OBJECT )
1253 return false;
1254 return true;
1255 }
1256
1257 template <>
1258 bool Parser<x86_64>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1259 {
1260 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1261 if ( header->magic() != MH_MAGIC_64 )
1262 return false;
1263 if ( header->cputype() != CPU_TYPE_X86_64 )
1264 return false;
1265 if ( header->filetype() != MH_OBJECT )
1266 return false;
1267 return true;
1268 }
1269
1270 template <>
1271 bool Parser<arm>::validFile(const uint8_t* fileContent, bool subtypeMustMatch, cpu_subtype_t subtype)
1272 {
1273 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1274 if ( header->magic() != MH_MAGIC )
1275 return false;
1276 if ( header->cputype() != CPU_TYPE_ARM )
1277 return false;
1278 if ( header->filetype() != MH_OBJECT )
1279 return false;
1280 if ( subtypeMustMatch ) {
1281 if ( (cpu_subtype_t)header->cpusubtype() == subtype )
1282 return true;
1283 // hack until libcc_kext.a is made fat
1284 if ( header->cpusubtype() == CPU_SUBTYPE_ARM_ALL )
1285 return true;
1286 return false;
1287 }
1288 return true;
1289 }
1290
1291
1292 template <>
1293 bool Parser<arm64>::validFile(const uint8_t* fileContent, bool subtypeMustMatch, cpu_subtype_t subtype)
1294 {
1295 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1296 if ( header->magic() != MH_MAGIC_64 )
1297 return false;
1298 if ( header->cputype() != CPU_TYPE_ARM64 )
1299 return false;
1300 if ( header->filetype() != MH_OBJECT )
1301 return false;
1302 return true;
1303 }
1304
1305
1306 template <>
1307 const char* Parser<x86>::fileKind(const uint8_t* fileContent)
1308 {
1309 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1310 if ( header->magic() != MH_MAGIC )
1311 return NULL;
1312 if ( header->cputype() != CPU_TYPE_I386 )
1313 return NULL;
1314 return "i386";
1315 }
1316
1317 template <>
1318 const char* Parser<x86_64>::fileKind(const uint8_t* fileContent)
1319 {
1320 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1321 if ( header->magic() != MH_MAGIC )
1322 return NULL;
1323 if ( header->cputype() != CPU_TYPE_X86_64 )
1324 return NULL;
1325 return "x86_64";
1326 }
1327
1328 template <>
1329 const char* Parser<arm>::fileKind(const uint8_t* fileContent)
1330 {
1331 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1332 if ( header->magic() != MH_MAGIC )
1333 return NULL;
1334 if ( header->cputype() != CPU_TYPE_ARM )
1335 return NULL;
1336 for (const ArchInfo* t=archInfoArray; t->archName != NULL; ++t) {
1337 if ( (t->cpuType == CPU_TYPE_ARM) && ((cpu_subtype_t)header->cpusubtype() == t->cpuSubType) ) {
1338 return t->archName;
1339 }
1340 }
1341 return "arm???";
1342 }
1343
1344 #if SUPPORT_ARCH_arm64
1345 template <>
1346 const char* Parser<arm64>::fileKind(const uint8_t* fileContent)
1347 {
1348 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1349 if ( header->magic() != MH_MAGIC )
1350 return NULL;
1351 if ( header->cputype() != CPU_TYPE_ARM64 )
1352 return NULL;
1353 return "arm64";
1354 }
1355 #endif
1356
1357 template <typename A>
1358 bool Parser<A>::hasObjC2Categories(const uint8_t* fileContent)
1359 {
1360 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1361 const uint32_t cmd_count = header->ncmds();
1362 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1363 const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1364 const macho_load_command<P>* cmd = cmds;
1365 for (uint32_t i = 0; i < cmd_count; ++i) {
1366 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1367 const macho_segment_command<P>* segment = (macho_segment_command<P>*)cmd;
1368 const macho_section<P>* sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1369 for (uint32_t si=0; si < segment->nsects(); ++si) {
1370 const macho_section<P>* sect = &sectionsStart[si];
1371 if ( (sect->size() > 0)
1372 && (strcmp(sect->sectname(), "__objc_catlist") == 0)
1373 && (strcmp(sect->segname(), "__DATA") == 0) ) {
1374 return true;
1375 }
1376 }
1377 }
1378 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1379 if ( cmd > cmdsEnd )
1380 throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1381 }
1382 return false;
1383 }
1384
1385
1386 template <typename A>
1387 bool Parser<A>::hasObjC1Categories(const uint8_t* fileContent)
1388 {
1389 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1390 const uint32_t cmd_count = header->ncmds();
1391 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1392 const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1393 const macho_load_command<P>* cmd = cmds;
1394 for (uint32_t i = 0; i < cmd_count; ++i) {
1395 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1396 const macho_segment_command<P>* segment = (macho_segment_command<P>*)cmd;
1397 const macho_section<P>* sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1398 for (uint32_t si=0; si < segment->nsects(); ++si) {
1399 const macho_section<P>* sect = &sectionsStart[si];
1400 if ( (sect->size() > 0)
1401 && (strcmp(sect->sectname(), "__category") == 0)
1402 && (strcmp(sect->segname(), "__OBJC") == 0) ) {
1403 return true;
1404 }
1405 }
1406 }
1407 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1408 if ( cmd > cmdsEnd )
1409 throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1410 }
1411 return false;
1412 }
1413
1414 template <typename A>
1415 int Parser<A>::pointerSorter(const void* l, const void* r)
1416 {
1417 // sort references by address
1418 const pint_t* left = (pint_t*)l;
1419 const pint_t* right = (pint_t*)r;
1420 return (*left - *right);
1421 }
1422
1423 template <typename A>
1424 typename A::P::uint_t Parser<A>::LabelAndCFIBreakIterator::peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr)
1425 {
1426 pint_t symbolAddr;
1427 if ( symIndex < sortedSymbolCount )
1428 symbolAddr = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]).n_value();
1429 else
1430 symbolAddr = endAddr;
1431 pint_t cfiAddr;
1432 if ( cfiIndex < cfiStartsCount )
1433 cfiAddr = cfiStartsArray[cfiIndex];
1434 else
1435 cfiAddr = endAddr;
1436 if ( (cfiAddr < symbolAddr) && (cfiAddr >= startAddr) ) {
1437 if ( cfiAddr < endAddr )
1438 return cfiAddr;
1439 else
1440 return endAddr;
1441 }
1442 else {
1443 if ( symbolAddr < endAddr )
1444 return symbolAddr;
1445 else
1446 return endAddr;
1447 }
1448 }
1449
1450 //
1451 // Parses up a section into chunks based on labels and CFI information.
1452 // Each call returns the next chunk address and size, and (if the break
1453 // was becuase of a label, the symbol). Returns false when no more chunks.
1454 //
1455 template <typename A>
1456 bool Parser<A>::LabelAndCFIBreakIterator::next(Parser<A>& parser, const Section<A>& sect, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
1457 pint_t* addr, pint_t* size, const macho_nlist<P>** symbol)
1458 {
1459 // may not be a label on start of section, but need atom demarcation there
1460 if ( newSection ) {
1461 newSection = false;
1462 // advance symIndex until we get to the first label at or past the start of this section
1463 while ( symIndex < sortedSymbolCount ) {
1464 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1465 if ( ! sect.ignoreLabel(parser.nameFromSymbol(sym)) ) {
1466 pint_t nextSymbolAddr = sym.n_value();
1467 //fprintf(stderr, "sectNum=%d, nextSymbolAddr=0x%08llX, name=%s\n", sectNum, (uint64_t)nextSymbolAddr, parser.nameFromSymbol(sym));
1468 if ( (nextSymbolAddr > startAddr) || ((nextSymbolAddr == startAddr) && (sym.n_sect() == sectNum)) )
1469 break;
1470 }
1471 ++symIndex;
1472 }
1473 if ( symIndex < sortedSymbolCount ) {
1474 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1475 pint_t nextSymbolAddr = sym.n_value();
1476 // if next symbol found is not in this section
1477 if ( sym.n_sect() != sectNum ) {
1478 // check for CFI break instead of symbol break
1479 if ( cfiIndex < cfiStartsCount ) {
1480 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1481 if ( nextCfiAddr < endAddr ) {
1482 // use cfi
1483 ++cfiIndex;
1484 *addr = nextCfiAddr;
1485 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1486 *symbol = NULL;
1487 return true;
1488 }
1489 }
1490 *addr = startAddr;
1491 *size = endAddr - startAddr;
1492 *symbol = NULL;
1493 if ( startAddr == endAddr )
1494 return false; // zero size section
1495 else
1496 return true; // whole section is one atom with no label
1497 }
1498 // if also CFI break here, eat it
1499 if ( cfiIndex < cfiStartsCount ) {
1500 if ( cfiStartsArray[cfiIndex] == nextSymbolAddr )
1501 ++cfiIndex;
1502 }
1503 if ( nextSymbolAddr == startAddr ) {
1504 // label at start of section, return it as chunk
1505 ++symIndex;
1506 *addr = startAddr;
1507 *size = peek(parser, startAddr, endAddr) - startAddr;
1508 *symbol = &sym;
1509 return true;
1510 }
1511 // return chunk before first symbol
1512 *addr = startAddr;
1513 *size = nextSymbolAddr - startAddr;
1514 *symbol = NULL;
1515 return true;
1516 }
1517 // no symbols in section, check CFI
1518 if ( cfiIndex < cfiStartsCount ) {
1519 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1520 if ( nextCfiAddr < endAddr ) {
1521 // use cfi
1522 ++cfiIndex;
1523 *addr = nextCfiAddr;
1524 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1525 *symbol = NULL;
1526 return true;
1527 }
1528 }
1529 // no cfi, so whole section is one chunk
1530 *addr = startAddr;
1531 *size = endAddr - startAddr;
1532 *symbol = NULL;
1533 if ( startAddr == endAddr )
1534 return false; // zero size section
1535 else
1536 return true; // whole section is one atom with no label
1537 }
1538
1539 while ( (symIndex < sortedSymbolCount) && (cfiIndex < cfiStartsCount) ) {
1540 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1541 pint_t nextSymbolAddr = sym.n_value();
1542 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1543 if ( nextSymbolAddr < nextCfiAddr ) {
1544 if ( nextSymbolAddr >= endAddr )
1545 return false;
1546 ++symIndex;
1547 if ( nextSymbolAddr < startAddr )
1548 continue;
1549 *addr = nextSymbolAddr;
1550 *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1551 *symbol = &sym;
1552 return true;
1553 }
1554 else if ( nextCfiAddr < nextSymbolAddr ) {
1555 if ( nextCfiAddr >= endAddr )
1556 return false;
1557 ++cfiIndex;
1558 if ( nextCfiAddr < startAddr )
1559 continue;
1560 *addr = nextCfiAddr;
1561 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1562 *symbol = NULL;
1563 return true;
1564 }
1565 else {
1566 if ( nextCfiAddr >= endAddr )
1567 return false;
1568 ++symIndex;
1569 ++cfiIndex;
1570 if ( nextCfiAddr < startAddr )
1571 continue;
1572 *addr = nextCfiAddr;
1573 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1574 *symbol = &sym;
1575 return true;
1576 }
1577 }
1578 while ( symIndex < sortedSymbolCount ) {
1579 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1580 pint_t nextSymbolAddr = sym.n_value();
1581 // if next symbol found is not in this section, then done with iteration
1582 if ( sym.n_sect() != sectNum )
1583 return false;
1584 ++symIndex;
1585 if ( nextSymbolAddr < startAddr )
1586 continue;
1587 *addr = nextSymbolAddr;
1588 *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1589 *symbol = &sym;
1590 return true;
1591 }
1592 while ( cfiIndex < cfiStartsCount ) {
1593 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1594 if ( nextCfiAddr >= endAddr )
1595 return false;
1596 ++cfiIndex;
1597 if ( nextCfiAddr < startAddr )
1598 continue;
1599 *addr = nextCfiAddr;
1600 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1601 *symbol = NULL;
1602 return true;
1603 }
1604 return false;
1605 }
1606
1607 template <>
1608 typename arm::P::uint_t Parser<arm>::realAddr(typename arm::P::uint_t addr)
1609 {
1610 return addr & (-2);
1611 }
1612
1613 template <typename A>
1614 typename A::P::uint_t Parser<A>::realAddr(typename A::P::uint_t addr)
1615 {
1616 return addr;
1617 }
1618
1619 #define STACK_ALLOC_IF_SMALL(_type, _name, _actual_count, _maxCount) \
1620 _type* _name = NULL; \
1621 uint32_t _name##_count = 1; \
1622 if ( _actual_count > _maxCount ) \
1623 _name = (_type*)malloc(sizeof(_type) * _actual_count); \
1624 else \
1625 _name##_count = _actual_count; \
1626 _type _name##_buffer[_name##_count]; \
1627 if ( _name == NULL ) \
1628 _name = _name##_buffer;
1629
1630
1631 template <typename A>
1632 ld::relocatable::File* Parser<A>::parse(const ParserOptions& opts)
1633 {
1634 // create file object
1635 _file = new File<A>(_path, _modTime, _fileContent, _ordinal);
1636
1637 // respond to -t option
1638 if ( opts.logAllFiles )
1639 printf("%s\n", _path);
1640
1641 _armUsesZeroCostExceptions = opts.armUsesZeroCostExceptions;
1642
1643 // parse start of mach-o file
1644 if ( ! parseLoadCommands() )
1645 return _file;
1646
1647 // make array of
1648 uint32_t sortedSectionIndexes[_machOSectionsCount];
1649 this->makeSortedSectionsArray(sortedSectionIndexes);
1650
1651 // make symbol table sorted by address
1652 this->prescanSymbolTable();
1653 uint32_t sortedSymbolIndexes[_symbolsInSections];
1654 this->makeSortedSymbolsArray(sortedSymbolIndexes, sortedSectionIndexes);
1655
1656 // allocate Section<A> object for each mach-o section
1657 makeSections();
1658
1659 // if it exists, do special early parsing of __compact_unwind section
1660 uint32_t countOfCUs = 0;
1661 if ( _compactUnwindSection != NULL )
1662 countOfCUs = _compactUnwindSection->count();
1663 // stack allocate (if not too large) cuInfoBuffer
1664 STACK_ALLOC_IF_SMALL(typename CUSection<A>::Info, cuInfoArray, countOfCUs, 1024);
1665 if ( countOfCUs != 0 )
1666 _compactUnwindSection->parse(*this, countOfCUs, cuInfoArray);
1667
1668 // create lists of address that already have compact unwind and thus don't need the dwarf parsed
1669 unsigned cuLsdaCount = 0;
1670 pint_t cuStarts[countOfCUs];
1671 for (uint32_t i=0; i < countOfCUs; ++i) {
1672 if ( CUSection<A>::encodingMeansUseDwarf(cuInfoArray[i].compactUnwindInfo) )
1673 cuStarts[i] = -1;
1674 else
1675 cuStarts[i] = cuInfoArray[i].functionStartAddress;
1676 if ( cuInfoArray[i].lsdaAddress != 0 )
1677 ++cuLsdaCount;
1678 }
1679
1680
1681 // if it exists, do special early parsing of __eh_frame section
1682 // stack allocate (if not too large) array of CFI_Atom_Info
1683 uint32_t countOfCFIs = 0;
1684 if ( _EHFrameSection != NULL )
1685 countOfCFIs = _EHFrameSection->cfiCount(*this);
1686 STACK_ALLOC_IF_SMALL(typename CFISection<A>::CFI_Atom_Info, cfiArray, countOfCFIs, 1024);
1687
1688 // stack allocate (if not too large) a copy of __eh_frame to apply relocations to
1689 uint32_t sectSize = 4;
1690 if ( (countOfCFIs != 0) && _EHFrameSection->needsRelocating() )
1691 sectSize = _EHFrameSection->machoSection()->size()+4;
1692 STACK_ALLOC_IF_SMALL(uint8_t, ehBuffer, sectSize, 50*1024);
1693 uint32_t cfiStartsCount = 0;
1694 if ( countOfCFIs != 0 ) {
1695 _EHFrameSection->cfiParse(*this, ehBuffer, cfiArray, countOfCFIs, cuStarts, countOfCUs);
1696 // count functions and lsdas
1697 for(uint32_t i=0; i < countOfCFIs; ++i) {
1698 if ( cfiArray[i].isCIE )
1699 continue;
1700 //fprintf(stderr, "cfiArray[i].func = 0x%08llX, cfiArray[i].lsda = 0x%08llX, encoding=0x%08X\n",
1701 // (uint64_t)cfiArray[i].u.fdeInfo.function.targetAddress,
1702 // (uint64_t)cfiArray[i].u.fdeInfo.lsda.targetAddress,
1703 // cfiArray[i].u.fdeInfo.compactUnwindInfo);
1704 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1705 ++cfiStartsCount;
1706 if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1707 ++cfiStartsCount;
1708 }
1709 }
1710 CFI_CU_InfoArrays cfis(cfiArray, countOfCFIs, cuInfoArray, countOfCUs);
1711
1712 // create sorted array of function starts and lsda starts
1713 pint_t cfiStartsArray[cfiStartsCount+cuLsdaCount];
1714 uint32_t countOfFDEs = 0;
1715 uint32_t cfiStartsArrayCount = 0;
1716 if ( countOfCFIs != 0 ) {
1717 for(uint32_t i=0; i < countOfCFIs; ++i) {
1718 if ( cfiArray[i].isCIE )
1719 continue;
1720 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1721 cfiStartsArray[cfiStartsArrayCount++] = realAddr(cfiArray[i].u.fdeInfo.function.targetAddress);
1722 if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1723 cfiStartsArray[cfiStartsArrayCount++] = cfiArray[i].u.fdeInfo.lsda.targetAddress;
1724 ++countOfFDEs;
1725 }
1726 }
1727 if ( cuLsdaCount != 0 ) {
1728 // merge in an lsda info from compact unwind
1729 for (uint32_t i=0; i < countOfCUs; ++i) {
1730 if ( cuInfoArray[i].lsdaAddress == 0 )
1731 continue;
1732 // append to cfiStartsArray if not already in that list
1733 bool found = false;
1734 for(uint32_t j=0; j < cfiStartsArrayCount; ++j) {
1735 if ( cfiStartsArray[j] == cuInfoArray[i].lsdaAddress )
1736 found = true;
1737 }
1738 if ( ! found ) {
1739 cfiStartsArray[cfiStartsArrayCount++] = cuInfoArray[i].lsdaAddress;
1740 }
1741 }
1742 }
1743 if ( cfiStartsArrayCount != 0 ) {
1744 ::qsort(cfiStartsArray, cfiStartsArrayCount, sizeof(pint_t), pointerSorter);
1745 #ifndef NDEBUG
1746 // scan for FDEs claming the same function
1747 for(uint32_t i=1; i < cfiStartsArrayCount; ++i) {
1748 assert( cfiStartsArray[i] != cfiStartsArray[i-1] );
1749 }
1750 #endif
1751 }
1752
1753 Section<A>** sections = _file->_sectionsArray;
1754 uint32_t sectionsCount = _file->_sectionsArrayCount;
1755
1756 // figure out how many atoms will be allocated and allocate
1757 LabelAndCFIBreakIterator breakIterator(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1758 cfiStartsArrayCount, _overlappingSymbols);
1759 uint32_t computedAtomCount = 0;
1760 for (uint32_t i=0; i < sectionsCount; ++i ) {
1761 breakIterator.beginSection();
1762 uint32_t count = sections[i]->computeAtomCount(*this, breakIterator, cfis);
1763 //const macho_section<P>* sect = sections[i]->machoSection();
1764 //fprintf(stderr, "computed count=%u for section %s size=%llu\n", count, sect->sectname(), (sect != NULL) ? sect->size() : 0);
1765 computedAtomCount += count;
1766 }
1767 //fprintf(stderr, "allocating %d atoms * sizeof(Atom<A>)=%ld, sizeof(ld::Atom)=%ld\n", computedAtomCount, sizeof(Atom<A>), sizeof(ld::Atom));
1768 _file->_atomsArray = new uint8_t[computedAtomCount*sizeof(Atom<A>)];
1769 _file->_atomsArrayCount = 0;
1770
1771 // have each section append atoms to _atomsArray
1772 LabelAndCFIBreakIterator breakIterator2(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1773 cfiStartsArrayCount, _overlappingSymbols);
1774 for (uint32_t i=0; i < sectionsCount; ++i ) {
1775 uint8_t* atoms = _file->_atomsArray + _file->_atomsArrayCount*sizeof(Atom<A>);
1776 breakIterator2.beginSection();
1777 uint32_t count = sections[i]->appendAtoms(*this, atoms, breakIterator2, cfis);
1778 //fprintf(stderr, "append count=%u for section %s/%s\n", count, sections[i]->machoSection()->segname(), sections[i]->machoSection()->sectname());
1779 _file->_atomsArrayCount += count;
1780 }
1781 assert( _file->_atomsArrayCount == computedAtomCount && "more atoms allocated than expected");
1782
1783
1784 // have each section add all fix-ups for its atoms
1785 _allFixups.reserve(computedAtomCount*5);
1786 for (uint32_t i=0; i < sectionsCount; ++i )
1787 sections[i]->makeFixups(*this, cfis);
1788
1789 // assign fixups start offset for each atom
1790 uint8_t* p = _file->_atomsArray;
1791 uint32_t fixupOffset = 0;
1792 for(int i=_file->_atomsArrayCount; i > 0; --i) {
1793 Atom<A>* atom = (Atom<A>*)p;
1794 atom->_fixupsStartIndex = fixupOffset;
1795 fixupOffset += atom->_fixupsCount;
1796 atom->_fixupsCount = 0;
1797 p += sizeof(Atom<A>);
1798 }
1799 assert(fixupOffset == _allFixups.size());
1800 _file->_fixups.reserve(fixupOffset);
1801
1802 // copy each fixup for each atom
1803 for(typename std::vector<FixupInAtom>::iterator it=_allFixups.begin(); it != _allFixups.end(); ++it) {
1804 uint32_t slot = it->atom->_fixupsStartIndex + it->atom->_fixupsCount;
1805 _file->_fixups[slot] = it->fixup;
1806 it->atom->_fixupsCount++;
1807 }
1808
1809 // done with temp vector
1810 _allFixups.clear();
1811
1812 // add unwind info
1813 _file->_unwindInfos.reserve(countOfFDEs+countOfCUs);
1814 for(uint32_t i=0; i < countOfCFIs; ++i) {
1815 if ( cfiArray[i].isCIE )
1816 continue;
1817 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS ) {
1818 ld::Atom::UnwindInfo info;
1819 info.startOffset = 0;
1820 info.unwindInfo = cfiArray[i].u.fdeInfo.compactUnwindInfo;
1821 _file->_unwindInfos.push_back(info);
1822 Atom<A>* func = findAtomByAddress(cfiArray[i].u.fdeInfo.function.targetAddress);
1823 func->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1824 //fprintf(stderr, "cu from dwarf =0x%08X, atom=%s\n", info.unwindInfo, func->name());
1825 }
1826 }
1827 // apply compact infos in __LD,__compact_unwind section to each function
1828 // if function also has dwarf unwind, CU will override it
1829 Atom<A>* lastFunc = NULL;
1830 uint32_t lastEnd = 0;
1831 for(uint32_t i=0; i < countOfCUs; ++i) {
1832 typename CUSection<A>::Info* info = &cuInfoArray[i];
1833 assert(info->function != NULL);
1834 ld::Atom::UnwindInfo ui;
1835 ui.startOffset = info->functionStartAddress - info->function->objectAddress();
1836 ui.unwindInfo = info->compactUnwindInfo;
1837 _file->_unwindInfos.push_back(ui);
1838 // don't override with converted cu with "use dwarf" cu, if forcing dwarf conversion
1839 if ( !_forceDwarfConversion || !CUSection<A>::encodingMeansUseDwarf(info->compactUnwindInfo) ) {
1840 //fprintf(stderr, "cu=0x%08X, atom=%s\n", ui.unwindInfo, info->function->name());
1841 // if previous is for same function, extend range
1842 if ( info->function == lastFunc ) {
1843 if ( lastEnd != ui.startOffset ) {
1844 if ( lastEnd < ui.startOffset )
1845 warning("__LD,__compact_unwind entries for %s have a gap at offset 0x%0X", info->function->name(), lastEnd);
1846 else
1847 warning("__LD,__compact_unwind entries for %s overlap at offset 0x%0X", info->function->name(), lastEnd);
1848 }
1849 lastFunc->extendUnwindInfoRange();
1850 }
1851 else
1852 info->function->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1853 lastFunc = info->function;
1854 lastEnd = ui.startOffset + info->rangeLength;
1855 }
1856 }
1857
1858 // process indirect symbols which become AliasAtoms
1859 _file->_aliasAtomsArray = NULL;
1860 _file->_aliasAtomsArrayCount = 0;
1861 if ( _indirectSymbolCount != 0 ) {
1862 _file->_aliasAtomsArrayCount = _indirectSymbolCount;
1863 _file->_aliasAtomsArray = new uint8_t[_file->_aliasAtomsArrayCount*sizeof(AliasAtom)];
1864 this->appendAliasAtoms(_file->_aliasAtomsArray);
1865 }
1866
1867
1868 // parse dwarf debug info to get line info
1869 this->parseDebugInfo();
1870
1871 return _file;
1872 }
1873
1874
1875 template <> uint8_t Parser<x86>::loadCommandSizeMask() { return 0x03; }
1876 template <> uint8_t Parser<x86_64>::loadCommandSizeMask() { return 0x07; }
1877 template <> uint8_t Parser<arm>::loadCommandSizeMask() { return 0x03; }
1878 template <> uint8_t Parser<arm64>::loadCommandSizeMask() { return 0x07; }
1879
1880 template <typename A>
1881 bool Parser<A>::parseLoadCommands()
1882 {
1883 const macho_header<P>* header = (const macho_header<P>*)_fileContent;
1884
1885 // set File attributes
1886 _file->_canScatterAtoms = (header->flags() & MH_SUBSECTIONS_VIA_SYMBOLS);
1887 _file->_cpuSubType = header->cpusubtype();
1888
1889 const macho_segment_command<P>* segment = NULL;
1890 const uint8_t* const endOfFile = _fileContent + _fileLength;
1891 const uint32_t cmd_count = header->ncmds();
1892 // <rdar://problem/5394172> an empty .o file with zero load commands will crash linker
1893 if ( cmd_count == 0 )
1894 return false;
1895 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1896 const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1897 const macho_load_command<P>* cmd = cmds;
1898 for (uint32_t i = 0; i < cmd_count; ++i) {
1899 uint32_t size = cmd->cmdsize();
1900 if ( (size & this->loadCommandSizeMask()) != 0 )
1901 throwf("load command #%d has a unaligned size", i);
1902 const uint8_t* endOfCmd = ((uint8_t*)cmd)+cmd->cmdsize();
1903 if ( endOfCmd > (uint8_t*)cmdsEnd )
1904 throwf("load command #%d extends beyond the end of the load commands", i);
1905 if ( endOfCmd > endOfFile )
1906 throwf("load command #%d extends beyond the end of the file", i);
1907 switch (cmd->cmd()) {
1908 case LC_SYMTAB:
1909 {
1910 const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
1911 _symbolCount = symtab->nsyms();
1912 _symbols = (const macho_nlist<P>*)(_fileContent + symtab->symoff());
1913 _strings = (char*)_fileContent + symtab->stroff();
1914 _stringsSize = symtab->strsize();
1915 if ( (symtab->symoff() + _symbolCount*sizeof(macho_nlist<P>)) > _fileLength )
1916 throw "mach-o symbol table extends beyond end of file";
1917 if ( (_strings + _stringsSize) > (char*)endOfFile )
1918 throw "mach-o string pool extends beyond end of file";
1919 if ( _indirectTable == NULL ) {
1920 if ( _undefinedEndIndex == 0 ) {
1921 _undefinedStartIndex = 0;
1922 _undefinedEndIndex = symtab->nsyms();
1923 }
1924 }
1925 }
1926 break;
1927 case LC_DYSYMTAB:
1928 {
1929 const macho_dysymtab_command<P>* dsymtab = (macho_dysymtab_command<P>*)cmd;
1930 _indirectTable = (uint32_t*)(_fileContent + dsymtab->indirectsymoff());
1931 _indirectTableCount = dsymtab->nindirectsyms();
1932 if ( &_indirectTable[_indirectTableCount] > (uint32_t*)endOfFile )
1933 throw "indirect symbol table extends beyond end of file";
1934 _undefinedStartIndex = dsymtab->iundefsym();
1935 _undefinedEndIndex = _undefinedStartIndex + dsymtab->nundefsym();
1936 }
1937 break;
1938 case LC_UUID:
1939 _hasUUID = true;
1940 break;
1941 case LC_DATA_IN_CODE:
1942 {
1943 const macho_linkedit_data_command<P>* dc = (macho_linkedit_data_command<P>*)cmd;
1944 _dataInCodeStart = (macho_data_in_code_entry<P>*)(_fileContent + dc->dataoff());
1945 _dataInCodeEnd = (macho_data_in_code_entry<P>*)(_fileContent + dc->dataoff() + dc->datasize());
1946 if ( _dataInCodeEnd > (macho_data_in_code_entry<P>*)endOfFile )
1947 throw "LC_DATA_IN_CODE table extends beyond end of file";
1948 }
1949 break;
1950 case LC_LINKER_OPTION:
1951 {
1952 const macho_linker_option_command<P>* loc = (macho_linker_option_command<P>*)cmd;
1953 const char* buffer = loc->buffer();
1954 _file->_linkerOptions.resize(_file->_linkerOptions.size() + 1);
1955 std::vector<const char*>& vec = _file->_linkerOptions.back();
1956 for (uint32_t j=0; j < loc->count(); ++j) {
1957 vec.push_back(buffer);
1958 buffer += strlen(buffer) + 1;
1959 }
1960 if ( buffer > ((char*)cmd + loc->cmdsize()) )
1961 throw "malformed LC_LINKER_OPTION";
1962 }
1963 break;
1964 case LC_LINKER_OPTIMIZATION_HINTS:
1965 {
1966 const macho_linkedit_data_command<P>* loh = (macho_linkedit_data_command<P>*)cmd;
1967 _lohStart = _fileContent + loh->dataoff();
1968 _lohEnd = _fileContent + loh->dataoff() + loh->datasize();
1969 if ( _lohEnd > endOfFile )
1970 throw "LC_LINKER_OPTIMIZATION_HINTS table extends beyond end of file";
1971 }
1972 break;
1973 default:
1974 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1975 if ( segment != NULL )
1976 throw "more than one LC_SEGMENT found in object file";
1977 segment = (macho_segment_command<P>*)cmd;
1978 }
1979 break;
1980 }
1981 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1982 if ( cmd > cmdsEnd )
1983 throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1984 }
1985
1986 // record range of sections
1987 if ( segment == NULL )
1988 throw "missing LC_SEGMENT";
1989 _sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1990 _machOSectionsCount = segment->nsects();
1991
1992 return true;
1993 }
1994
1995
1996 template <typename A>
1997 void Parser<A>::prescanSymbolTable()
1998 {
1999 _tentativeDefinitionCount = 0;
2000 _absoluteSymbolCount = 0;
2001 _symbolsInSections = 0;
2002 _hasDataInCodeLabels = false;
2003 for (uint32_t i=0; i < this->_symbolCount; ++i) {
2004 const macho_nlist<P>& sym = symbolFromIndex(i);
2005 // ignore stabs
2006 if ( (sym.n_type() & N_STAB) != 0 )
2007 continue;
2008
2009 // look at undefines
2010 const char* symbolName = this->nameFromSymbol(sym);
2011 if ( (sym.n_type() & N_TYPE) == N_UNDF ) {
2012 if ( sym.n_value() != 0 ) {
2013 // count tentative definitions
2014 ++_tentativeDefinitionCount;
2015 }
2016 else if ( strncmp(symbolName, "___dtrace_", 10) == 0 ) {
2017 // any undefined starting with __dtrace_*$ that is not ___dtrace_probe$* or ___dtrace_isenabled$*
2018 // is extra provider info
2019 if ( (strncmp(&symbolName[10], "probe$", 6) != 0) && (strncmp(&symbolName[10], "isenabled$", 10) != 0) ) {
2020 _dtraceProviderInfo.push_back(symbolName);
2021 }
2022 }
2023 continue;
2024 }
2025 else if ( ((sym.n_type() & N_TYPE) == N_INDR) && ((sym.n_type() & N_EXT) != 0) ) {
2026 _indirectSymbolCount++;
2027 continue;
2028 }
2029
2030 // count absolute symbols
2031 if ( (sym.n_type() & N_TYPE) == N_ABS ) {
2032 const char* absName = this->nameFromSymbol(sym);
2033 // ignore .objc_class_name_* symbols
2034 if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
2035 _AppleObjc = true;
2036 continue;
2037 }
2038 // ignore .objc_class_name_* symbols
2039 if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
2040 continue;
2041 // ignore empty *.eh symbols
2042 if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
2043 continue;
2044 ++_absoluteSymbolCount;
2045 }
2046
2047 // only look at definitions
2048 if ( (sym.n_type() & N_TYPE) != N_SECT )
2049 continue;
2050
2051 // 'L' labels do not denote atom breaks
2052 if ( symbolName[0] == 'L' ) {
2053 // <rdar://problem/9218847> Formalize data in code with L$start$ labels
2054 if ( strncmp(symbolName, "L$start$", 8) == 0 )
2055 _hasDataInCodeLabels = true;
2056 continue;
2057 }
2058 // how many def syms in each section
2059 if ( sym.n_sect() > _machOSectionsCount )
2060 throw "bad n_sect in symbol table";
2061
2062 _symbolsInSections++;
2063 }
2064 }
2065
2066 template <typename A>
2067 void Parser<A>::appendAliasAtoms(uint8_t* p)
2068 {
2069 for (uint32_t i=0; i < this->_symbolCount; ++i) {
2070 const macho_nlist<P>& sym = symbolFromIndex(i);
2071 // ignore stabs
2072 if ( (sym.n_type() & N_STAB) != 0 )
2073 continue;
2074
2075 // only look at N_INDR symbols
2076 if ( (sym.n_type() & N_TYPE) != N_INDR )
2077 continue;
2078
2079 // skip non-external aliases
2080 if ( (sym.n_type() & N_EXT) == 0 )
2081 continue;
2082
2083 const char* symbolName = this->nameFromSymbol(sym);
2084 const char* aliasOfName = &_strings[sym.n_value()];
2085 bool isHiddenVisibility = (sym.n_type() & N_PEXT);
2086 AliasAtom* allocatedSpace = (AliasAtom*)p;
2087 new (allocatedSpace) AliasAtom(symbolName, isHiddenVisibility, _file, aliasOfName);
2088 p += sizeof(AliasAtom);
2089 }
2090 }
2091
2092
2093
2094 template <typename A>
2095 int Parser<A>::sectionIndexSorter(void* extra, const void* l, const void* r)
2096 {
2097 Parser<A>* parser = (Parser<A>*)extra;
2098 const uint32_t* left = (uint32_t*)l;
2099 const uint32_t* right = (uint32_t*)r;
2100 const macho_section<P>* leftSect = parser->machOSectionFromSectionIndex(*left);
2101 const macho_section<P>* rightSect = parser->machOSectionFromSectionIndex(*right);
2102
2103 // can't just return difference because 64-bit diff does not fit in 32-bit return type
2104 int64_t result = leftSect->addr() - rightSect->addr();
2105 if ( result == 0 ) {
2106 // two sections with same start address
2107 // one with zero size goes first
2108 bool leftEmpty = ( leftSect->size() == 0 );
2109 bool rightEmpty = ( rightSect->size() == 0 );
2110 if ( leftEmpty != rightEmpty ) {
2111 return ( rightEmpty ? 1 : -1 );
2112 }
2113 if ( !leftEmpty && !rightEmpty )
2114 throwf("overlapping sections");
2115 // both empty, so chose file order
2116 return ( rightSect - leftSect );
2117 }
2118 else if ( result < 0 )
2119 return -1;
2120 else
2121 return 1;
2122 }
2123
2124 template <typename A>
2125 void Parser<A>::makeSortedSectionsArray(uint32_t array[])
2126 {
2127 const bool log = false;
2128
2129 if ( log ) {
2130 fprintf(stderr, "unsorted sections:\n");
2131 for(unsigned int i=0; i < _machOSectionsCount; ++i )
2132 fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[i].addr(), _sectionsStart[i].segname(), _sectionsStart[i].sectname());
2133 }
2134
2135 // sort by symbol table address
2136 for (uint32_t i=0; i < _machOSectionsCount; ++i)
2137 array[i] = i;
2138 ::qsort_r(array, _machOSectionsCount, sizeof(uint32_t), this, &sectionIndexSorter);
2139
2140 if ( log ) {
2141 fprintf(stderr, "sorted sections:\n");
2142 for(unsigned int i=0; i < _machOSectionsCount; ++i )
2143 fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[array[i]].addr(), _sectionsStart[array[i]].segname(), _sectionsStart[array[i]].sectname());
2144 }
2145 }
2146
2147
2148
2149 template <typename A>
2150 int Parser<A>::symbolIndexSorter(void* extra, const void* l, const void* r)
2151 {
2152 ParserAndSectionsArray* extraInfo = (ParserAndSectionsArray*)extra;
2153 Parser<A>* parser = extraInfo->parser;
2154 const uint32_t* sortedSectionsArray = extraInfo->sortedSectionsArray;
2155 const uint32_t* left = (uint32_t*)l;
2156 const uint32_t* right = (uint32_t*)r;
2157 const macho_nlist<P>& leftSym = parser->symbolFromIndex(*left);
2158 const macho_nlist<P>& rightSym = parser->symbolFromIndex(*right);
2159 // can't just return difference because 64-bit diff does not fit in 32-bit return type
2160 int64_t result = leftSym.n_value() - rightSym.n_value();
2161 if ( result == 0 ) {
2162 // two symbols with same address
2163 // if in different sections, sort earlier section first
2164 if ( leftSym.n_sect() != rightSym.n_sect() ) {
2165 for (uint32_t i=0; i < parser->machOSectionCount(); ++i) {
2166 if ( sortedSectionsArray[i]+1 == leftSym.n_sect() )
2167 return -1;
2168 if ( sortedSectionsArray[i]+1 == rightSym.n_sect() )
2169 return 1;
2170 }
2171 }
2172 // two symbols in same section, means one is an alias
2173 // if one is ltmp*, make it an alias (sort first)
2174 const char* leftName = parser->nameFromSymbol(leftSym);
2175 const char* rightName = parser->nameFromSymbol(rightSym);
2176 bool leftIsTmp = strncmp(leftName, "ltmp", 4);
2177 bool rightIsTmp = strncmp(rightName, "ltmp", 4);
2178 if ( leftIsTmp != rightIsTmp ) {
2179 return (rightIsTmp ? -1 : 1);
2180 }
2181
2182 // if only one is global, make the other an alias (sort first)
2183 if ( (leftSym.n_type() & N_EXT) != (rightSym.n_type() & N_EXT) ) {
2184 if ( (rightSym.n_type() & N_EXT) != 0 )
2185 return -1;
2186 else
2187 return 1;
2188 }
2189 // if both are global, sort alphabetically. earlier one will be the alias
2190 return ( strcmp(rightName, leftName) );
2191 }
2192 else if ( result < 0 )
2193 return -1;
2194 else
2195 return 1;
2196 }
2197
2198
2199 template <typename A>
2200 void Parser<A>::makeSortedSymbolsArray(uint32_t array[], const uint32_t sectionArray[])
2201 {
2202 const bool log = false;
2203
2204 uint32_t* p = array;
2205 for (uint32_t i=0; i < this->_symbolCount; ++i) {
2206 const macho_nlist<P>& sym = symbolFromIndex(i);
2207 // ignore stabs
2208 if ( (sym.n_type() & N_STAB) != 0 )
2209 continue;
2210
2211 // only look at definitions
2212 if ( (sym.n_type() & N_TYPE) != N_SECT )
2213 continue;
2214
2215 // 'L' labels do not denote atom breaks
2216 const char* symbolName = this->nameFromSymbol(sym);
2217 if ( symbolName[0] == 'L' )
2218 continue;
2219
2220 // how many def syms in each section
2221 if ( sym.n_sect() > _machOSectionsCount )
2222 throw "bad n_sect in symbol table";
2223
2224 // append to array
2225 *p++ = i;
2226 }
2227 assert(p == &array[_symbolsInSections] && "second pass over symbol table yield a different number of symbols");
2228
2229 // sort by symbol table address
2230 ParserAndSectionsArray extra = { this, sectionArray };
2231 ::qsort_r(array, _symbolsInSections, sizeof(uint32_t), &extra, &symbolIndexSorter);
2232
2233
2234 // look for two symbols at same address
2235 _overlappingSymbols = false;
2236 for (unsigned int i=1; i < _symbolsInSections; ++i) {
2237 if ( symbolFromIndex(array[i-1]).n_value() == symbolFromIndex(array[i]).n_value() ) {
2238 //fprintf(stderr, "overlapping symbols at 0x%08llX\n", symbolFromIndex(array[i-1]).n_value());
2239 _overlappingSymbols = true;
2240 break;
2241 }
2242 }
2243
2244 if ( log ) {
2245 fprintf(stderr, "sorted symbols:\n");
2246 for(unsigned int i=0; i < _symbolsInSections; ++i )
2247 fprintf(stderr, "0x%09llX symIndex=%d sectNum=%2d, %s\n", symbolFromIndex(array[i]).n_value(), array[i], symbolFromIndex(array[i]).n_sect(), nameFromSymbol(symbolFromIndex(array[i])) );
2248 }
2249 }
2250
2251 template <typename A>
2252 void Parser<A>::makeSections()
2253 {
2254 // classify each section by type
2255 // compute how many Section objects will be needed and total size for all
2256 unsigned int totalSectionsSize = 0;
2257 uint8_t machOSectsStorage[sizeof(MachOSectionAndSectionClass<P>)*(_machOSectionsCount+2)]; // also room for tentative-defs and absolute symbols
2258 // allocate raw storage for all section objects on stack
2259 MachOSectionAndSectionClass<P>* machOSects = (MachOSectionAndSectionClass<P>*)machOSectsStorage;
2260 unsigned int count = 0;
2261 for (uint32_t i=0; i < _machOSectionsCount; ++i) {
2262 const macho_section<P>* sect = &_sectionsStart[i];
2263 if ( (sect->flags() & S_ATTR_DEBUG) != 0 ) {
2264 if ( strcmp(sect->segname(), "__DWARF") == 0 ) {
2265 // note that .o file has dwarf
2266 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoDwarf;
2267 // save off iteresting dwarf sections
2268 if ( strcmp(sect->sectname(), "__debug_info") == 0 )
2269 _file->_dwarfDebugInfoSect = sect;
2270 else if ( strcmp(sect->sectname(), "__debug_abbrev") == 0 )
2271 _file->_dwarfDebugAbbrevSect = sect;
2272 else if ( strcmp(sect->sectname(), "__debug_line") == 0 )
2273 _file->_dwarfDebugLineSect = sect;
2274 else if ( strcmp(sect->sectname(), "__debug_str") == 0 )
2275 _file->_dwarfDebugStringSect = sect;
2276 // linker does not propagate dwarf sections to output file
2277 continue;
2278 }
2279 else if ( strcmp(sect->segname(), "__LD") == 0 ) {
2280 if ( strncmp(sect->sectname(), "__compact_unwind", 16) == 0 ) {
2281 machOSects[count].sect = sect;
2282 totalSectionsSize += sizeof(CUSection<A>);
2283 machOSects[count++].type = sectionTypeCompactUnwind;
2284 continue;
2285 }
2286 }
2287 }
2288 // ignore empty __OBJC sections
2289 if ( (sect->size() == 0) && (strcmp(sect->segname(), "__OBJC") == 0) )
2290 continue;
2291 // objc image info section is really attributes and not content
2292 if ( ((strcmp(sect->sectname(), "__image_info") == 0) && (strcmp(sect->segname(), "__OBJC") == 0))
2293 || ((strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0) && (strcmp(sect->segname(), "__DATA") == 0)) ) {
2294 // struct objc_image_info {
2295 // uint32_t version; // initially 0
2296 // uint32_t flags;
2297 // };
2298 // #define OBJC_IMAGE_SUPPORTS_GC 2
2299 // #define OBJC_IMAGE_GC_ONLY 4
2300 // #define OBJC_IMAGE_IS_SIMULATED 32
2301 //
2302 const uint32_t* contents = (uint32_t*)(_file->fileContent()+sect->offset());
2303 if ( (sect->size() >= 8) && (contents[0] == 0) ) {
2304 uint32_t flags = E::get32(contents[1]);
2305 if ( (flags & 4) == 4 )
2306 _file->_objConstraint = ld::File::objcConstraintGC;
2307 else if ( (flags & 2) == 2 )
2308 _file->_objConstraint = ld::File::objcConstraintRetainReleaseOrGC;
2309 else if ( (flags & 32) == 32 )
2310 _file->_objConstraint = ld::File::objcConstraintRetainReleaseForSimulator;
2311 else
2312 _file->_objConstraint = ld::File::objcConstraintRetainRelease;
2313 _file->_swiftVersion = ((flags >> 8) & 0xFF);
2314 if ( sect->size() > 8 ) {
2315 warning("section %s/%s has unexpectedly large size %llu in %s",
2316 sect->segname(), Section<A>::makeSectionName(sect), sect->size(), _file->path());
2317 }
2318 }
2319 else {
2320 warning("can't parse %s/%s section in %s", sect->segname(), Section<A>::makeSectionName(sect), _file->path());
2321 }
2322 continue;
2323 }
2324 machOSects[count].sect = sect;
2325 switch ( sect->flags() & SECTION_TYPE ) {
2326 case S_SYMBOL_STUBS:
2327 if ( _stubsSectionNum == 0 ) {
2328 _stubsSectionNum = i+1;
2329 _stubsMachOSection = sect;
2330 }
2331 else
2332 assert(1 && "multiple S_SYMBOL_STUBS sections");
2333 case S_LAZY_SYMBOL_POINTERS:
2334 break;
2335 case S_4BYTE_LITERALS:
2336 totalSectionsSize += sizeof(Literal4Section<A>);
2337 machOSects[count++].type = sectionTypeLiteral4;
2338 break;
2339 case S_8BYTE_LITERALS:
2340 totalSectionsSize += sizeof(Literal8Section<A>);
2341 machOSects[count++].type = sectionTypeLiteral8;
2342 break;
2343 case S_16BYTE_LITERALS:
2344 totalSectionsSize += sizeof(Literal16Section<A>);
2345 machOSects[count++].type = sectionTypeLiteral16;
2346 break;
2347 case S_NON_LAZY_SYMBOL_POINTERS:
2348 totalSectionsSize += sizeof(NonLazyPointerSection<A>);
2349 machOSects[count++].type = sectionTypeNonLazy;
2350 break;
2351 case S_LITERAL_POINTERS:
2352 if ( (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__cls_refs") == 0) ) {
2353 totalSectionsSize += sizeof(Objc1ClassReferences<A>);
2354 machOSects[count++].type = sectionTypeObjC1ClassRefs;
2355 }
2356 else {
2357 totalSectionsSize += sizeof(PointerToCStringSection<A>);
2358 machOSects[count++].type = sectionTypeCStringPointer;
2359 }
2360 break;
2361 case S_CSTRING_LITERALS:
2362 totalSectionsSize += sizeof(CStringSection<A>);
2363 machOSects[count++].type = sectionTypeCString;
2364 break;
2365 case S_MOD_INIT_FUNC_POINTERS:
2366 case S_MOD_TERM_FUNC_POINTERS:
2367 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
2368 case S_INTERPOSING:
2369 case S_ZEROFILL:
2370 case S_REGULAR:
2371 case S_COALESCED:
2372 case S_THREAD_LOCAL_REGULAR:
2373 case S_THREAD_LOCAL_ZEROFILL:
2374 if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__eh_frame") == 0) ) {
2375 totalSectionsSize += sizeof(CFISection<A>);
2376 machOSects[count++].type = sectionTypeCFI;
2377 }
2378 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__cfstring") == 0) ) {
2379 totalSectionsSize += sizeof(CFStringSection<A>);
2380 machOSects[count++].type = sectionTypeCFString;
2381 }
2382 else if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__ustring") == 0) ) {
2383 totalSectionsSize += sizeof(UTF16StringSection<A>);
2384 machOSects[count++].type = sectionTypeUTF16Strings;
2385 }
2386 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strncmp(sect->sectname(), "__objc_classrefs", 16) == 0) ) {
2387 totalSectionsSize += sizeof(ObjC2ClassRefsSection<A>);
2388 machOSects[count++].type = sectionTypeObjC2ClassRefs;
2389 }
2390 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__objc_catlist") == 0) ) {
2391 totalSectionsSize += sizeof(ObjC2CategoryListSection<A>);
2392 machOSects[count++].type = typeObjC2CategoryList;
2393 }
2394 else if ( _AppleObjc && (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__class") == 0) ) {
2395 totalSectionsSize += sizeof(ObjC1ClassSection<A>);
2396 machOSects[count++].type = sectionTypeObjC1Classes;
2397 }
2398 else {
2399 totalSectionsSize += sizeof(SymboledSection<A>);
2400 machOSects[count++].type = sectionTypeSymboled;
2401 }
2402 break;
2403 case S_THREAD_LOCAL_VARIABLES:
2404 totalSectionsSize += sizeof(TLVDefsSection<A>);
2405 machOSects[count++].type = sectionTypeTLVDefs;
2406 break;
2407 case S_THREAD_LOCAL_VARIABLE_POINTERS:
2408 default:
2409 throwf("unknown section type %d", sect->flags() & SECTION_TYPE);
2410 }
2411 }
2412
2413 // sort by address (mach-o object files don't aways have sections sorted)
2414 ::qsort(machOSects, count, sizeof(MachOSectionAndSectionClass<P>), MachOSectionAndSectionClass<P>::sorter);
2415
2416 // we will synthesize a dummy Section<A> object for tentative definitions
2417 if ( _tentativeDefinitionCount > 0 ) {
2418 totalSectionsSize += sizeof(TentativeDefinitionSection<A>);
2419 machOSects[count++].type = sectionTypeTentativeDefinitions;
2420 }
2421
2422 // we will synthesize a dummy Section<A> object for Absolute symbols
2423 if ( _absoluteSymbolCount > 0 ) {
2424 totalSectionsSize += sizeof(AbsoluteSymbolSection<A>);
2425 machOSects[count++].type = sectionTypeAbsoluteSymbols;
2426 }
2427
2428 // allocate one block for all Section objects as well as pointers to each
2429 uint8_t* space = new uint8_t[totalSectionsSize+count*sizeof(Section<A>*)];
2430 _file->_sectionsArray = (Section<A>**)space;
2431 _file->_sectionsArrayCount = count;
2432 Section<A>** objects = _file->_sectionsArray;
2433 space += count*sizeof(Section<A>*);
2434 for (uint32_t i=0; i < count; ++i) {
2435 switch ( machOSects[i].type ) {
2436 case sectionTypeIgnore:
2437 break;
2438 case sectionTypeLiteral4:
2439 *objects++ = new (space) Literal4Section<A>(*this, *_file, machOSects[i].sect);
2440 space += sizeof(Literal4Section<A>);
2441 break;
2442 case sectionTypeLiteral8:
2443 *objects++ = new (space) Literal8Section<A>(*this, *_file, machOSects[i].sect);
2444 space += sizeof(Literal8Section<A>);
2445 break;
2446 case sectionTypeLiteral16:
2447 *objects++ = new (space) Literal16Section<A>(*this, *_file, machOSects[i].sect);
2448 space += sizeof(Literal16Section<A>);
2449 break;
2450 case sectionTypeNonLazy:
2451 *objects++ = new (space) NonLazyPointerSection<A>(*this, *_file, machOSects[i].sect);
2452 space += sizeof(NonLazyPointerSection<A>);
2453 break;
2454 case sectionTypeCFI:
2455 _EHFrameSection = new (space) CFISection<A>(*this, *_file, machOSects[i].sect);
2456 *objects++ = _EHFrameSection;
2457 space += sizeof(CFISection<A>);
2458 break;
2459 case sectionTypeCString:
2460 *objects++ = new (space) CStringSection<A>(*this, *_file, machOSects[i].sect);
2461 space += sizeof(CStringSection<A>);
2462 break;
2463 case sectionTypeCStringPointer:
2464 *objects++ = new (space) PointerToCStringSection<A>(*this, *_file, machOSects[i].sect);
2465 space += sizeof(PointerToCStringSection<A>);
2466 break;
2467 case sectionTypeObjC1ClassRefs:
2468 *objects++ = new (space) Objc1ClassReferences<A>(*this, *_file, machOSects[i].sect);
2469 space += sizeof(Objc1ClassReferences<A>);
2470 break;
2471 case sectionTypeUTF16Strings:
2472 *objects++ = new (space) UTF16StringSection<A>(*this, *_file, machOSects[i].sect);
2473 space += sizeof(UTF16StringSection<A>);
2474 break;
2475 case sectionTypeCFString:
2476 *objects++ = new (space) CFStringSection<A>(*this, *_file, machOSects[i].sect);
2477 space += sizeof(CFStringSection<A>);
2478 break;
2479 case sectionTypeObjC2ClassRefs:
2480 *objects++ = new (space) ObjC2ClassRefsSection<A>(*this, *_file, machOSects[i].sect);
2481 space += sizeof(ObjC2ClassRefsSection<A>);
2482 break;
2483 case typeObjC2CategoryList:
2484 *objects++ = new (space) ObjC2CategoryListSection<A>(*this, *_file, machOSects[i].sect);
2485 space += sizeof(ObjC2CategoryListSection<A>);
2486 break;
2487 case sectionTypeObjC1Classes:
2488 *objects++ = new (space) ObjC1ClassSection<A>(*this, *_file, machOSects[i].sect);
2489 space += sizeof(ObjC1ClassSection<A>);
2490 break;
2491 case sectionTypeSymboled:
2492 *objects++ = new (space) SymboledSection<A>(*this, *_file, machOSects[i].sect);
2493 space += sizeof(SymboledSection<A>);
2494 break;
2495 case sectionTypeTLVDefs:
2496 *objects++ = new (space) TLVDefsSection<A>(*this, *_file, machOSects[i].sect);
2497 space += sizeof(TLVDefsSection<A>);
2498 break;
2499 case sectionTypeCompactUnwind:
2500 _compactUnwindSection = new (space) CUSection<A>(*this, *_file, machOSects[i].sect);
2501 *objects++ = _compactUnwindSection;
2502 space += sizeof(CUSection<A>);
2503 break;
2504 case sectionTypeTentativeDefinitions:
2505 *objects++ = new (space) TentativeDefinitionSection<A>(*this, *_file);
2506 space += sizeof(TentativeDefinitionSection<A>);
2507 break;
2508 case sectionTypeAbsoluteSymbols:
2509 _absoluteSection = new (space) AbsoluteSymbolSection<A>(*this, *_file);
2510 *objects++ = _absoluteSection;
2511 space += sizeof(AbsoluteSymbolSection<A>);
2512 break;
2513 default:
2514 throw "internal error uknown SectionType";
2515 }
2516 }
2517 }
2518
2519
2520 template <typename A>
2521 Section<A>* Parser<A>::sectionForAddress(typename A::P::uint_t addr)
2522 {
2523 for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2524 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2525 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2526 if ( sect != NULL ) {
2527 if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
2528 return _file->_sectionsArray[i];
2529 }
2530 }
2531 }
2532 // not strictly in any section
2533 // may be in a zero length section
2534 for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2535 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2536 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2537 if ( sect != NULL ) {
2538 if ( (sect->addr() == addr) && (sect->size() == 0) ) {
2539 return _file->_sectionsArray[i];
2540 }
2541 }
2542 }
2543
2544 throwf("sectionForAddress(0x%llX) address not in any section", (uint64_t)addr);
2545 }
2546
2547 template <typename A>
2548 Section<A>* Parser<A>::sectionForNum(unsigned int num)
2549 {
2550 for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2551 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2552 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2553 if ( sect != NULL ) {
2554 if ( num == (unsigned int)((sect - _sectionsStart)+1) )
2555 return _file->_sectionsArray[i];
2556 }
2557 }
2558 throwf("sectionForNum(%u) section number not for any section", num);
2559 }
2560
2561 template <typename A>
2562 Atom<A>* Parser<A>::findAtomByAddress(pint_t addr)
2563 {
2564 Section<A>* section = this->sectionForAddress(addr);
2565 return section->findAtomByAddress(addr);
2566 }
2567
2568 template <typename A>
2569 Atom<A>* Parser<A>::findAtomByAddressOrNullIfStub(pint_t addr)
2570 {
2571 if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) )
2572 return NULL;
2573 return findAtomByAddress(addr);
2574 }
2575
2576 template <typename A>
2577 Atom<A>* Parser<A>::findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom)
2578 {
2579 if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2580 // target is a stub, remove indirection
2581 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2582 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2583 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2584 // can't be to external weak symbol
2585 assert( (this->combineFromSymbol(sym) != ld::Atom::combineByName) || (this->scopeFromSymbol(sym) != ld::Atom::scopeGlobal) );
2586 *offsetInAtom = 0;
2587 return this->findAtomByName(this->nameFromSymbol(sym));
2588 }
2589 Atom<A>* target = this->findAtomByAddress(addr);
2590 *offsetInAtom = addr - target->_objAddress;
2591 return target;
2592 }
2593
2594 template <typename A>
2595 Atom<A>* Parser<A>::findAtomByName(const char* name)
2596 {
2597 uint8_t* p = _file->_atomsArray;
2598 for(int i=_file->_atomsArrayCount; i > 0; --i) {
2599 Atom<A>* atom = (Atom<A>*)p;
2600 if ( strcmp(name, atom->name()) == 0 )
2601 return atom;
2602 p += sizeof(Atom<A>);
2603 }
2604 return NULL;
2605 }
2606
2607 template <typename A>
2608 void Parser<A>::findTargetFromAddress(pint_t addr, TargetDesc& target)
2609 {
2610 if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2611 // target is a stub, remove indirection
2612 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2613 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2614 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2615 target.atom = NULL;
2616 target.name = this->nameFromSymbol(sym);
2617 target.weakImport = this->weakImportFromSymbol(sym);
2618 target.addend = 0;
2619 return;
2620 }
2621 Section<A>* section = this->sectionForAddress(addr);
2622 target.atom = section->findAtomByAddress(addr);
2623 target.addend = addr - target.atom->_objAddress;
2624 target.weakImport = false;
2625 target.name = NULL;
2626 }
2627
2628 template <typename A>
2629 void Parser<A>::findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target)
2630 {
2631 findTargetFromAddress(baseAddr, target);
2632 target.addend = addr - target.atom->_objAddress;
2633 }
2634
2635 template <typename A>
2636 void Parser<A>::findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum, TargetDesc& target)
2637 {
2638 if ( sectNum == R_ABS ) {
2639 // target is absolute symbol that corresponds to addr
2640 if ( _absoluteSection != NULL ) {
2641 target.atom = _absoluteSection->findAbsAtomForValue(addr);
2642 if ( target.atom != NULL ) {
2643 target.name = NULL;
2644 target.weakImport = false;
2645 target.addend = 0;
2646 return;
2647 }
2648 }
2649 throwf("R_ABS reloc but no absolute symbol at target address");
2650 }
2651
2652 if ( hasStubsSection() && (stubsSectionNum() == sectNum) ) {
2653 // target is a stub, remove indirection
2654 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2655 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2656 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2657 // use direct reference when stub is to a static function
2658 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (this->nameFromSymbol(sym)[0] == 'L')) ) {
2659 this->findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
2660 }
2661 else {
2662 target.atom = NULL;
2663 target.name = this->nameFromSymbol(sym);
2664 target.weakImport = this->weakImportFromSymbol(sym);
2665 target.addend = 0;
2666 }
2667 return;
2668 }
2669 Section<A>* section = this->sectionForNum(sectNum);
2670 target.atom = section->findAtomByAddress(addr);
2671 if ( target.atom == NULL ) {
2672 typedef typename A::P::sint_t sint_t;
2673 sint_t a = (sint_t)addr;
2674 sint_t sectStart = (sint_t)(section->machoSection()->addr());
2675 sint_t sectEnd = sectStart + section->machoSection()->size();
2676 if ( a < sectStart ) {
2677 // target address is before start of section, so must be negative addend
2678 target.atom = section->findAtomByAddress(sectStart);
2679 target.addend = a - sectStart;
2680 target.weakImport = false;
2681 target.name = NULL;
2682 return;
2683 }
2684 else if ( a >= sectEnd ) {
2685 target.atom = section->findAtomByAddress(sectEnd-1);
2686 target.addend = a - sectEnd;
2687 target.weakImport = false;
2688 target.name = NULL;
2689 return;
2690 }
2691 }
2692 assert(target.atom != NULL);
2693 target.addend = addr - target.atom->_objAddress;
2694 target.weakImport = false;
2695 target.name = NULL;
2696 }
2697
2698 template <typename A>
2699 void Parser<A>::addDtraceExtraInfos(const SourceLocation& src, const char* providerName)
2700 {
2701 // for every ___dtrace_stability$* and ___dtrace_typedefs$* undefine with
2702 // a matching provider name, add a by-name kDtraceTypeReference at probe site
2703 const char* dollar = strchr(providerName, '$');
2704 if ( dollar != NULL ) {
2705 int providerNameLen = dollar-providerName+1;
2706 for ( std::vector<const char*>::iterator it = _dtraceProviderInfo.begin(); it != _dtraceProviderInfo.end(); ++it) {
2707 const char* typeDollar = strchr(*it, '$');
2708 if ( typeDollar != NULL ) {
2709 if ( strncmp(typeDollar+1, providerName, providerNameLen) == 0 ) {
2710 addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindDtraceExtra,false, *it);
2711 }
2712 }
2713 }
2714 }
2715 }
2716
2717 template <typename A>
2718 const char* Parser<A>::scanSymbolTableForAddress(uint64_t addr)
2719 {
2720 uint64_t closestSymAddr = 0;
2721 const char* closestSymName = NULL;
2722 for (uint32_t i=0; i < this->_symbolCount; ++i) {
2723 const macho_nlist<P>& sym = symbolFromIndex(i);
2724 // ignore stabs
2725 if ( (sym.n_type() & N_STAB) != 0 )
2726 continue;
2727
2728 // only look at definitions
2729 if ( (sym.n_type() & N_TYPE) != N_SECT )
2730 continue;
2731
2732 // return with exact match
2733 if ( sym.n_value() == addr ) {
2734 const char* name = nameFromSymbol(sym);
2735 if ( strncmp(name, "ltmp", 4) != 0 )
2736 return name;
2737 // treat 'ltmp*' labels as close match
2738 closestSymAddr = sym.n_value();
2739 closestSymName = name;
2740 }
2741
2742 // record closest seen so far
2743 if ( (sym.n_value() < addr) && ((sym.n_value() > closestSymAddr) || (closestSymName == NULL)) )
2744 closestSymName = nameFromSymbol(sym);
2745 }
2746
2747 return (closestSymName != NULL) ? closestSymName : "unknown";
2748 }
2749
2750
2751 template <typename A>
2752 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind setKind, const TargetDesc& target)
2753 {
2754 // some fixup pairs can be combined
2755 ld::Fixup::Cluster cl = ld::Fixup::k1of3;
2756 ld::Fixup::Kind firstKind = ld::Fixup::kindSetTargetAddress;
2757 bool combined = false;
2758 if ( target.addend == 0 ) {
2759 cl = ld::Fixup::k1of1;
2760 combined = true;
2761 switch ( setKind ) {
2762 case ld::Fixup::kindStoreLittleEndian32:
2763 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian32;
2764 break;
2765 case ld::Fixup::kindStoreLittleEndian64:
2766 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian64;
2767 break;
2768 case ld::Fixup::kindStoreBigEndian32:
2769 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian32;
2770 break;
2771 case ld::Fixup::kindStoreBigEndian64:
2772 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian64;
2773 break;
2774 case ld::Fixup::kindStoreX86BranchPCRel32:
2775 firstKind = ld::Fixup::kindStoreTargetAddressX86BranchPCRel32;
2776 break;
2777 case ld::Fixup::kindStoreX86PCRel32:
2778 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32;
2779 break;
2780 case ld::Fixup::kindStoreX86PCRel32GOTLoad:
2781 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32GOTLoad;
2782 break;
2783 case ld::Fixup::kindStoreX86PCRel32TLVLoad:
2784 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32TLVLoad;
2785 break;
2786 case ld::Fixup::kindStoreX86Abs32TLVLoad:
2787 firstKind = ld::Fixup::kindStoreTargetAddressX86Abs32TLVLoad;
2788 break;
2789 case ld::Fixup::kindStoreARMBranch24:
2790 firstKind = ld::Fixup::kindStoreTargetAddressARMBranch24;
2791 break;
2792 case ld::Fixup::kindStoreThumbBranch22:
2793 firstKind = ld::Fixup::kindStoreTargetAddressThumbBranch22;
2794 break;
2795 #if SUPPORT_ARCH_arm64
2796 case ld::Fixup::kindStoreARM64Branch26:
2797 firstKind = ld::Fixup::kindStoreTargetAddressARM64Branch26;
2798 break;
2799 case ld::Fixup::kindStoreARM64Page21:
2800 firstKind = ld::Fixup::kindStoreTargetAddressARM64Page21;
2801 break;
2802 case ld::Fixup::kindStoreARM64PageOff12:
2803 firstKind = ld::Fixup::kindStoreTargetAddressARM64PageOff12;
2804 break;
2805 case ld::Fixup::kindStoreARM64GOTLoadPage21:
2806 firstKind = ld::Fixup::kindStoreTargetAddressARM64GOTLoadPage21;
2807 break;
2808 case ld::Fixup::kindStoreARM64GOTLoadPageOff12:
2809 firstKind = ld::Fixup::kindStoreTargetAddressARM64GOTLoadPageOff12;
2810 break;
2811 case ld::Fixup::kindStoreARM64TLVPLoadPage21:
2812 firstKind = ld::Fixup::kindStoreTargetAddressARM64TLVPLoadPage21;
2813 break;
2814 case ld::Fixup::kindStoreARM64TLVPLoadPageOff12:
2815 firstKind = ld::Fixup::kindStoreTargetAddressARM64TLVPLoadPageOff12;
2816 break;
2817 #endif
2818 default:
2819 combined = false;
2820 cl = ld::Fixup::k1of2;
2821 break;
2822 }
2823 }
2824
2825 if ( target.atom != NULL ) {
2826 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2827 addFixup(src, cl, firstKind, target.atom);
2828 }
2829 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2830 addFixup(src, cl, firstKind, ld::Fixup::bindingByContentBound, target.atom);
2831 }
2832 else if ( (src.atom->section().type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
2833 // backing string in CFStrings should always be direct
2834 addFixup(src, cl, firstKind, target.atom);
2835 }
2836 else if ( (src.atom == target.atom) && (target.atom->combine() == ld::Atom::combineByName) ) {
2837 // reference to self should always be direct
2838 addFixup(src, cl, firstKind, target.atom);
2839 }
2840 else {
2841 // change direct fixup to by-name fixup
2842 addFixup(src, cl, firstKind, false, target.atom->name());
2843 }
2844 }
2845 else {
2846 addFixup(src, cl, firstKind, target.weakImport, target.name);
2847 }
2848 if ( target.addend == 0 ) {
2849 if ( ! combined )
2850 addFixup(src, ld::Fixup::k2of2, setKind);
2851 }
2852 else {
2853 addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, target.addend);
2854 addFixup(src, ld::Fixup::k3of3, setKind);
2855 }
2856 }
2857
2858 template <typename A>
2859 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase)
2860 {
2861 ld::Fixup::Cluster cl = (target.addend == 0) ? ld::Fixup::k1of4 : ld::Fixup::k1of5;
2862 if ( target.atom != NULL ) {
2863 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2864 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.atom);
2865 }
2866 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2867 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
2868 }
2869 else {
2870 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
2871 }
2872 }
2873 else {
2874 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.weakImport, target.name);
2875 }
2876 if ( target.addend == 0 ) {
2877 assert(picBase.atom != NULL);
2878 addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2879 addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, picBase.addend);
2880 addFixup(src, ld::Fixup::k4of4, kind);
2881 }
2882 else {
2883 addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
2884 addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2885 addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, picBase.addend);
2886 addFixup(src, ld::Fixup::k5of5, kind);
2887 }
2888 }
2889
2890
2891
2892 template <typename A>
2893 uint32_t TentativeDefinitionSection<A>::computeAtomCount(class Parser<A>& parser,
2894 struct Parser<A>::LabelAndCFIBreakIterator& it,
2895 const struct Parser<A>::CFI_CU_InfoArrays&)
2896 {
2897 return parser.tentativeDefinitionCount();
2898 }
2899
2900 template <typename A>
2901 uint32_t TentativeDefinitionSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2902 struct Parser<A>::LabelAndCFIBreakIterator& it,
2903 const struct Parser<A>::CFI_CU_InfoArrays&)
2904 {
2905 this->_beginAtoms = (Atom<A>*)p;
2906 uint32_t count = 0;
2907 for (uint32_t i=parser.undefinedStartIndex(); i < parser.undefinedEndIndex(); ++i) {
2908 const macho_nlist<P>& sym = parser.symbolFromIndex(i);
2909 if ( ((sym.n_type() & N_TYPE) == N_UNDF) && (sym.n_value() != 0) ) {
2910 uint64_t size = sym.n_value();
2911 uint8_t alignP2 = GET_COMM_ALIGN(sym.n_desc());
2912 if ( alignP2 == 0 ) {
2913 // common symbols align to their size
2914 // that is, a 4-byte common aligns to 4-bytes
2915 // if this size is not a power of two,
2916 // then round up to the next power of two
2917 alignP2 = 63 - (uint8_t)__builtin_clzll(size);
2918 if ( size != (1ULL << alignP2) )
2919 ++alignP2;
2920 }
2921 // limit alignment of extremely large commons to 2^15 bytes (8-page)
2922 if ( alignP2 > 15 )
2923 alignP2 = 15;
2924 Atom<A>* allocatedSpace = (Atom<A>*)p;
2925 new (allocatedSpace) Atom<A>(*this, parser.nameFromSymbol(sym), (pint_t)ULLONG_MAX, size,
2926 ld::Atom::definitionTentative, ld::Atom::combineByName,
2927 parser.scopeFromSymbol(sym), ld::Atom::typeZeroFill, ld::Atom::symbolTableIn,
2928 parser.dontDeadStripFromSymbol(sym), false, false, ld::Atom::Alignment(alignP2) );
2929 p += sizeof(Atom<A>);
2930 ++count;
2931 }
2932 }
2933 this->_endAtoms = (Atom<A>*)p;
2934 return count;
2935 }
2936
2937
2938 template <typename A>
2939 uint32_t AbsoluteSymbolSection<A>::computeAtomCount(class Parser<A>& parser,
2940 struct Parser<A>::LabelAndCFIBreakIterator& it,
2941 const struct Parser<A>::CFI_CU_InfoArrays&)
2942 {
2943 return parser.absoluteSymbolCount();
2944 }
2945
2946 template <typename A>
2947 uint32_t AbsoluteSymbolSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2948 struct Parser<A>::LabelAndCFIBreakIterator& it,
2949 const struct Parser<A>::CFI_CU_InfoArrays&)
2950 {
2951 this->_beginAtoms = (Atom<A>*)p;
2952 uint32_t count = 0;
2953 for (uint32_t i=0; i < parser.symbolCount(); ++i) {
2954 const macho_nlist<P>& sym = parser.symbolFromIndex(i);
2955 if ( (sym.n_type() & N_TYPE) != N_ABS )
2956 continue;
2957 const char* absName = parser.nameFromSymbol(sym);
2958 // ignore .objc_class_name_* symbols
2959 if ( strncmp(absName, ".objc_class_name_", 17) == 0 )
2960 continue;
2961 // ignore .objc_class_name_* symbols
2962 if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
2963 continue;
2964 // ignore empty *.eh symbols
2965 if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
2966 continue;
2967
2968 Atom<A>* allocatedSpace = (Atom<A>*)p;
2969 new (allocatedSpace) Atom<A>(*this, parser, sym, 0);
2970 p += sizeof(Atom<A>);
2971 ++count;
2972 }
2973 this->_endAtoms = (Atom<A>*)p;
2974 return count;
2975 }
2976
2977 template <typename A>
2978 Atom<A>* AbsoluteSymbolSection<A>::findAbsAtomForValue(typename A::P::uint_t value)
2979 {
2980 Atom<A>* end = this->_endAtoms;
2981 for(Atom<A>* p = this->_beginAtoms; p < end; ++p) {
2982 if ( p->_objAddress == value )
2983 return p;
2984 }
2985 return NULL;
2986 }
2987
2988
2989 template <typename A>
2990 uint32_t Parser<A>::indirectSymbol(uint32_t indirectIndex)
2991 {
2992 if ( indirectIndex >= _indirectTableCount )
2993 throw "indirect symbol index out of range";
2994 return E::get32(_indirectTable[indirectIndex]);
2995 }
2996
2997 template <typename A>
2998 const macho_nlist<typename A::P>& Parser<A>::symbolFromIndex(uint32_t index)
2999 {
3000 if ( index > _symbolCount )
3001 throw "symbol index out of range";
3002 return _symbols[index];
3003 }
3004
3005 template <typename A>
3006 const macho_section<typename A::P>* Parser<A>::machOSectionFromSectionIndex(uint32_t index)
3007 {
3008 if ( index >= _machOSectionsCount )
3009 throw "section index out of range";
3010 return &_sectionsStart[index];
3011 }
3012
3013 template <typename A>
3014 uint32_t Parser<A>::symbolIndexFromIndirectSectionAddress(pint_t addr, const macho_section<P>* sect)
3015 {
3016 uint32_t elementSize = 0;
3017 switch ( sect->flags() & SECTION_TYPE ) {
3018 case S_SYMBOL_STUBS:
3019 elementSize = sect->reserved2();
3020 break;
3021 case S_LAZY_SYMBOL_POINTERS:
3022 case S_NON_LAZY_SYMBOL_POINTERS:
3023 elementSize = sizeof(pint_t);
3024 break;
3025 default:
3026 throw "section does not use inirect symbol table";
3027 }
3028 uint32_t indexInSection = (addr - sect->addr()) / elementSize;
3029 uint32_t indexIntoIndirectTable = sect->reserved1() + indexInSection;
3030 return this->indirectSymbol(indexIntoIndirectTable);
3031 }
3032
3033
3034
3035 template <typename A>
3036 const char* Parser<A>::nameFromSymbol(const macho_nlist<P>& sym)
3037 {
3038 return &_strings[sym.n_strx()];
3039 }
3040
3041 template <typename A>
3042 ld::Atom::Scope Parser<A>::scopeFromSymbol(const macho_nlist<P>& sym)
3043 {
3044 if ( (sym.n_type() & N_EXT) == 0 )
3045 return ld::Atom::scopeTranslationUnit;
3046 else if ( (sym.n_type() & N_PEXT) != 0 )
3047 return ld::Atom::scopeLinkageUnit;
3048 else if ( this->nameFromSymbol(sym)[0] == 'l' ) // since all 'l' symbols will be remove, don't make them global
3049 return ld::Atom::scopeLinkageUnit;
3050 else
3051 return ld::Atom::scopeGlobal;
3052 }
3053
3054 template <typename A>
3055 ld::Atom::Definition Parser<A>::definitionFromSymbol(const macho_nlist<P>& sym)
3056 {
3057 switch ( sym.n_type() & N_TYPE ) {
3058 case N_ABS:
3059 return ld::Atom::definitionAbsolute;
3060 case N_SECT:
3061 return ld::Atom::definitionRegular;
3062 case N_UNDF:
3063 if ( sym.n_value() != 0 )
3064 return ld::Atom::definitionTentative;
3065 }
3066 throw "definitionFromSymbol() bad symbol";
3067 }
3068
3069 template <typename A>
3070 ld::Atom::Combine Parser<A>::combineFromSymbol(const macho_nlist<P>& sym)
3071 {
3072 if ( sym.n_desc() & N_WEAK_DEF )
3073 return ld::Atom::combineByName;
3074 else
3075 return ld::Atom::combineNever;
3076 }
3077
3078
3079 template <typename A>
3080 ld::Atom::SymbolTableInclusion Parser<A>::inclusionFromSymbol(const macho_nlist<P>& sym)
3081 {
3082 const char* symbolName = nameFromSymbol(sym);
3083 // labels beginning with 'l' (lowercase ell) are automatically removed in final linked images <rdar://problem/4571042>
3084 // labels beginning with 'L' should have been stripped by the assembler, so are stripped now
3085 if ( sym.n_desc() & REFERENCED_DYNAMICALLY )
3086 return ld::Atom::symbolTableInAndNeverStrip;
3087 else if ( symbolName[0] == 'l' )
3088 return ld::Atom::symbolTableNotInFinalLinkedImages;
3089 else if ( symbolName[0] == 'L' )
3090 return ld::Atom::symbolTableNotIn;
3091 else
3092 return ld::Atom::symbolTableIn;
3093 }
3094
3095 template <typename A>
3096 bool Parser<A>::dontDeadStripFromSymbol(const macho_nlist<P>& sym)
3097 {
3098 return ( (sym.n_desc() & (N_NO_DEAD_STRIP|REFERENCED_DYNAMICALLY)) != 0 );
3099 }
3100
3101 template <typename A>
3102 bool Parser<A>::isThumbFromSymbol(const macho_nlist<P>& sym)
3103 {
3104 return ( sym.n_desc() & N_ARM_THUMB_DEF );
3105 }
3106
3107 template <typename A>
3108 bool Parser<A>::weakImportFromSymbol(const macho_nlist<P>& sym)
3109 {
3110 return ( ((sym.n_type() & N_TYPE) == N_UNDF) && ((sym.n_desc() & N_WEAK_REF) != 0) );
3111 }
3112
3113 template <typename A>
3114 bool Parser<A>::resolverFromSymbol(const macho_nlist<P>& sym)
3115 {
3116 return ( sym.n_desc() & N_SYMBOL_RESOLVER );
3117 }
3118
3119 template <typename A>
3120 bool Parser<A>::altEntryFromSymbol(const macho_nlist<P>& sym)
3121 {
3122 return ( sym.n_desc() & N_ALT_ENTRY );
3123 }
3124
3125
3126 /* Skip over a LEB128 value (signed or unsigned). */
3127 static void
3128 skip_leb128 (const uint8_t ** offset, const uint8_t * end)
3129 {
3130 while (*offset != end && **offset >= 0x80)
3131 (*offset)++;
3132 if (*offset != end)
3133 (*offset)++;
3134 }
3135
3136 /* Read a ULEB128 into a 64-bit word. Return (uint64_t)-1 on overflow
3137 or error. On overflow, skip past the rest of the uleb128. */
3138 static uint64_t
3139 read_uleb128 (const uint8_t ** offset, const uint8_t * end)
3140 {
3141 uint64_t result = 0;
3142 int bit = 0;
3143
3144 do {
3145 uint64_t b;
3146
3147 if (*offset == end)
3148 return (uint64_t) -1;
3149
3150 b = **offset & 0x7f;
3151
3152 if (bit >= 64 || b << bit >> bit != b)
3153 result = (uint64_t) -1;
3154 else
3155 result |= b << bit, bit += 7;
3156 } while (*(*offset)++ >= 0x80);
3157 return result;
3158 }
3159
3160
3161 /* Skip over a DWARF attribute of form FORM. */
3162 template <typename A>
3163 bool Parser<A>::skip_form(const uint8_t ** offset, const uint8_t * end, uint64_t form,
3164 uint8_t addr_size, bool dwarf64)
3165 {
3166 int64_t sz=0;
3167
3168 switch (form)
3169 {
3170 case DW_FORM_addr:
3171 sz = addr_size;
3172 break;
3173
3174 case DW_FORM_block2:
3175 if (end - *offset < 2)
3176 return false;
3177 sz = 2 + A::P::E::get16(*(uint16_t*)offset);
3178 break;
3179
3180 case DW_FORM_block4:
3181 if (end - *offset < 4)
3182 return false;
3183 sz = 2 + A::P::E::get32(*(uint32_t*)offset);
3184 break;
3185
3186 case DW_FORM_data2:
3187 case DW_FORM_ref2:
3188 sz = 2;
3189 break;
3190
3191 case DW_FORM_data4:
3192 case DW_FORM_ref4:
3193 sz = 4;
3194 break;
3195
3196 case DW_FORM_data8:
3197 case DW_FORM_ref8:
3198 sz = 8;
3199 break;
3200
3201 case DW_FORM_string:
3202 while (*offset != end && **offset)
3203 ++*offset;
3204 case DW_FORM_data1:
3205 case DW_FORM_flag:
3206 case DW_FORM_ref1:
3207 sz = 1;
3208 break;
3209
3210 case DW_FORM_block:
3211 sz = read_uleb128 (offset, end);
3212 break;
3213
3214 case DW_FORM_block1:
3215 if (*offset == end)
3216 return false;
3217 sz = 1 + **offset;
3218 break;
3219
3220 case DW_FORM_sdata:
3221 case DW_FORM_udata:
3222 case DW_FORM_ref_udata:
3223 skip_leb128 (offset, end);
3224 return true;
3225
3226 case DW_FORM_strp:
3227 case DW_FORM_ref_addr:
3228 sz = 4;
3229 break;
3230
3231 case DW_FORM_sec_offset:
3232 sz = sizeof(typename A::P::uint_t);
3233 break;
3234
3235 case DW_FORM_exprloc:
3236 sz = read_uleb128 (offset, end);
3237 break;
3238
3239 case DW_FORM_flag_present:
3240 sz = 0;
3241 break;
3242
3243 case DW_FORM_ref_sig8:
3244 sz = 8;
3245 break;
3246
3247 default:
3248 return false;
3249 }
3250 if (end - *offset < sz)
3251 return false;
3252 *offset += sz;
3253 return true;
3254 }
3255
3256
3257 template <typename A>
3258 const char* Parser<A>::getDwarfString(uint64_t form, const uint8_t*& di)
3259 {
3260 uint32_t offset;
3261 const char* dwarfStrings;
3262 const char* result = NULL;
3263 switch (form) {
3264 case DW_FORM_string:
3265 result = (const char*)di;
3266 di += strlen(result) + 1;
3267 break;
3268 case DW_FORM_strp:
3269 offset = E::get32(*((uint32_t*)di));
3270 dwarfStrings = (char*)_file->fileContent() + _file->_dwarfDebugStringSect->offset();
3271 if ( offset < _file->_dwarfDebugStringSect->size() )
3272 result = &dwarfStrings[offset];
3273 else
3274 warning("dwarf DW_FORM_strp (offset=0x%08X) is too big in %s", offset, this->_path);
3275 di += 4;
3276 break;
3277 default:
3278 warning("unknown dwarf string encoding (form=%lld) in %s", form, this->_path);
3279 break;
3280 }
3281 return result;
3282 }
3283
3284 template <typename A>
3285 uint64_t Parser<A>::getDwarfOffset(uint64_t form, const uint8_t*& di, bool dwarf64)
3286 {
3287 if ( form == DW_FORM_sec_offset )
3288 form = (dwarf64 ? DW_FORM_data8 : DW_FORM_data4);
3289 uint64_t result = -1;
3290 switch (form) {
3291 case DW_FORM_data4:
3292 result = A::P::E::get32(*(uint32_t*)di);
3293 di += 4;
3294 break;
3295 case DW_FORM_data8:
3296 result = A::P::E::get64(*(uint64_t*)di);
3297 di += 8;
3298 break;
3299 default:
3300 warning("unknown dwarf DW_FORM_ for DW_AT_stmt_list in %s", this->_path);
3301 }
3302 return result;
3303 }
3304
3305
3306 template <typename A>
3307 struct AtomAndLineInfo {
3308 Atom<A>* atom;
3309 ld::Atom::LineInfo info;
3310 };
3311
3312
3313 // <rdar://problem/5591394> Add support to ld64 for N_FUN stabs when used for symbolic constants
3314 // Returns whether a stabStr belonging to an N_FUN stab represents a
3315 // symbolic constant rather than a function
3316 template <typename A>
3317 bool Parser<A>::isConstFunStabs(const char *stabStr)
3318 {
3319 const char* colon;
3320 // N_FUN can be used for both constants and for functions. In case it's a constant,
3321 // the format of the stabs string is "symname:c=<value>;"
3322 // ':' cannot appear in the symbol name, except if it's an Objective-C method
3323 // (in which case the symbol name starts with + or -, and then it's definitely
3324 // not a constant)
3325 return (stabStr != NULL) && (stabStr[0] != '+') && (stabStr[0] != '-')
3326 && ((colon = strchr(stabStr, ':')) != NULL)
3327 && (colon[1] == 'c') && (colon[2] == '=');
3328 }
3329
3330
3331 template <typename A>
3332 void Parser<A>::parseDebugInfo()
3333 {
3334 // check for dwarf __debug_info section
3335 if ( _file->_dwarfDebugInfoSect == NULL ) {
3336 // if no DWARF debug info, look for stabs
3337 this->parseStabs();
3338 return;
3339 }
3340 if ( _file->_dwarfDebugInfoSect->size() == 0 )
3341 return;
3342
3343 uint64_t stmtList;
3344 const char* tuDir;
3345 const char* tuName;
3346 if ( !read_comp_unit(&tuName, &tuDir, &stmtList) ) {
3347 // if can't parse dwarf, warn and give up
3348 _file->_dwarfTranslationUnitPath = NULL;
3349 warning("can't parse dwarf compilation unit info in %s", _path);
3350 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoNone;
3351 return;
3352 }
3353 if ( (tuName != NULL) && (tuName[0] == '/') ) {
3354 _file->_dwarfTranslationUnitPath = tuName;
3355 }
3356 else if ( (tuDir != NULL) && (tuName != NULL) ) {
3357 asprintf((char**)&(_file->_dwarfTranslationUnitPath), "%s/%s", tuDir, tuName);
3358 }
3359 else if ( tuDir == NULL ) {
3360 _file->_dwarfTranslationUnitPath = tuName;
3361 }
3362 else {
3363 _file->_dwarfTranslationUnitPath = NULL;
3364 }
3365
3366 // add line number info to atoms from dwarf
3367 std::vector<AtomAndLineInfo<A> > entries;
3368 entries.reserve(64);
3369 if ( _file->_debugInfoKind == ld::relocatable::File::kDebugInfoDwarf ) {
3370 // file with just data will have no __debug_line info
3371 if ( (_file->_dwarfDebugLineSect != NULL) && (_file->_dwarfDebugLineSect->size() != 0) ) {
3372 // validate stmt_list
3373 if ( (stmtList != (uint64_t)-1) && (stmtList < _file->_dwarfDebugLineSect->size()) ) {
3374 const uint8_t* debug_line = (uint8_t*)_file->fileContent() + _file->_dwarfDebugLineSect->offset();
3375 struct line_reader_data* lines = line_open(&debug_line[stmtList],
3376 _file->_dwarfDebugLineSect->size() - stmtList, E::little_endian);
3377 struct line_info result;
3378 Atom<A>* curAtom = NULL;
3379 uint32_t curAtomOffset = 0;
3380 uint32_t curAtomAddress = 0;
3381 uint32_t curAtomSize = 0;
3382 std::map<uint32_t,const char*> dwarfIndexToFile;
3383 if ( lines != NULL ) {
3384 while ( line_next(lines, &result, line_stop_pc) ) {
3385 //fprintf(stderr, "curAtom=%p, result.pc=0x%llX, result.line=%llu, result.end_of_sequence=%d,"
3386 // " curAtomAddress=0x%X, curAtomSize=0x%X\n",
3387 // curAtom, result.pc, result.line, result.end_of_sequence, curAtomAddress, curAtomSize);
3388 // work around weird debug line table compiler generates if no functions in __text section
3389 if ( (curAtom == NULL) && (result.pc == 0) && result.end_of_sequence && (result.file == 1))
3390 continue;
3391 // for performance, see if in next pc is in current atom
3392 if ( (curAtom != NULL) && (curAtomAddress <= result.pc) && (result.pc < (curAtomAddress+curAtomSize)) ) {
3393 curAtomOffset = result.pc - curAtomAddress;
3394 }
3395 // or pc at end of current atom
3396 else if ( result.end_of_sequence && (curAtom != NULL) && (result.pc == (curAtomAddress+curAtomSize)) ) {
3397 curAtomOffset = result.pc - curAtomAddress;
3398 }
3399 // or only one function that is a one line function
3400 else if ( result.end_of_sequence && (curAtom == NULL) && (this->findAtomByAddress(0) != NULL) && (result.pc == this->findAtomByAddress(0)->size()) ) {
3401 curAtom = this->findAtomByAddress(0);
3402 curAtomOffset = result.pc - curAtom->objectAddress();
3403 curAtomAddress = curAtom->objectAddress();
3404 curAtomSize = curAtom->size();
3405 }
3406 else {
3407 // do slow look up of atom by address
3408 try {
3409 curAtom = this->findAtomByAddress(result.pc);
3410 }
3411 catch (...) {
3412 // in case of bug in debug info, don't abort link, just limp on
3413 curAtom = NULL;
3414 }
3415 if ( curAtom == NULL )
3416 break; // file has line info but no functions
3417 if ( result.end_of_sequence && (curAtomAddress+curAtomSize < result.pc) ) {
3418 // a one line function can be returned by line_next() as one entry with pc at end of blob
3419 // look for alt atom starting at end of previous atom
3420 uint32_t previousEnd = curAtomAddress+curAtomSize;
3421 Atom<A>* alt = this->findAtomByAddressOrNullIfStub(previousEnd);
3422 if ( alt == NULL )
3423 continue; // ignore spurious debug info for stubs
3424 if ( result.pc <= alt->objectAddress() + alt->size() ) {
3425 curAtom = alt;
3426 curAtomOffset = result.pc - alt->objectAddress();
3427 curAtomAddress = alt->objectAddress();
3428 curAtomSize = alt->size();
3429 }
3430 else {
3431 curAtomOffset = result.pc - curAtom->objectAddress();
3432 curAtomAddress = curAtom->objectAddress();
3433 curAtomSize = curAtom->size();
3434 }
3435 }
3436 else {
3437 curAtomOffset = result.pc - curAtom->objectAddress();
3438 curAtomAddress = curAtom->objectAddress();
3439 curAtomSize = curAtom->size();
3440 }
3441 }
3442 const char* filename;
3443 std::map<uint32_t,const char*>::iterator pos = dwarfIndexToFile.find(result.file);
3444 if ( pos == dwarfIndexToFile.end() ) {
3445 filename = line_file(lines, result.file);
3446 dwarfIndexToFile[result.file] = filename;
3447 }
3448 else {
3449 filename = pos->second;
3450 }
3451 // only record for ~8000 line info records per function
3452 if ( curAtom->roomForMoreLineInfoCount() ) {
3453 AtomAndLineInfo<A> entry;
3454 entry.atom = curAtom;
3455 entry.info.atomOffset = curAtomOffset;
3456 entry.info.fileName = filename;
3457 entry.info.lineNumber = result.line;
3458 //fprintf(stderr, "addr=0x%08llX, line=%lld, file=%s, atom=%s, atom.size=0x%X, end=%d\n",
3459 // result.pc, result.line, filename, curAtom->name(), curAtomSize, result.end_of_sequence);
3460 entries.push_back(entry);
3461 curAtom->incrementLineInfoCount();
3462 }
3463 if ( result.end_of_sequence ) {
3464 curAtom = NULL;
3465 }
3466 }
3467 line_free(lines);
3468 }
3469 }
3470 }
3471 }
3472
3473 // assign line info start offset for each atom
3474 uint8_t* p = _file->_atomsArray;
3475 uint32_t liOffset = 0;
3476 for(int i=_file->_atomsArrayCount; i > 0; --i) {
3477 Atom<A>* atom = (Atom<A>*)p;
3478 atom->_lineInfoStartIndex = liOffset;
3479 liOffset += atom->_lineInfoCount;
3480 atom->_lineInfoCount = 0;
3481 p += sizeof(Atom<A>);
3482 }
3483 assert(liOffset == entries.size());
3484 _file->_lineInfos.reserve(liOffset);
3485
3486 // copy each line info for each atom
3487 for (typename std::vector<AtomAndLineInfo<A> >::iterator it = entries.begin(); it != entries.end(); ++it) {
3488 uint32_t slot = it->atom->_lineInfoStartIndex + it->atom->_lineInfoCount;
3489 _file->_lineInfos[slot] = it->info;
3490 it->atom->_lineInfoCount++;
3491 }
3492
3493 // done with temp vector
3494 entries.clear();
3495 }
3496
3497 template <typename A>
3498 void Parser<A>::parseStabs()
3499 {
3500 // scan symbol table for stabs entries
3501 Atom<A>* currentAtom = NULL;
3502 pint_t currentAtomAddress = 0;
3503 enum { start, inBeginEnd, inFun } state = start;
3504 for (uint32_t symbolIndex = 0; symbolIndex < _symbolCount; ++symbolIndex ) {
3505 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
3506 bool useStab = true;
3507 uint8_t type = sym.n_type();
3508 const char* symString = (sym.n_strx() != 0) ? this->nameFromSymbol(sym) : NULL;
3509 if ( (type & N_STAB) != 0 ) {
3510 _file->_debugInfoKind = (_hasUUID ? ld::relocatable::File::kDebugInfoStabsUUID : ld::relocatable::File::kDebugInfoStabs);
3511 ld::relocatable::File::Stab stab;
3512 stab.atom = NULL;
3513 stab.type = type;
3514 stab.other = sym.n_sect();
3515 stab.desc = sym.n_desc();
3516 stab.value = sym.n_value();
3517 stab.string = NULL;
3518 switch (state) {
3519 case start:
3520 switch (type) {
3521 case N_BNSYM:
3522 // beginning of function block
3523 state = inBeginEnd;
3524 // fall into case to lookup atom by addresss
3525 case N_LCSYM:
3526 case N_STSYM:
3527 currentAtomAddress = sym.n_value();
3528 currentAtom = this->findAtomByAddress(currentAtomAddress);
3529 if ( currentAtom != NULL ) {
3530 stab.atom = currentAtom;
3531 stab.string = symString;
3532 }
3533 else {
3534 fprintf(stderr, "can't find atom for stabs BNSYM at %08llX in %s",
3535 (uint64_t)sym.n_value(), _path);
3536 }
3537 break;
3538 case N_SO:
3539 case N_OSO:
3540 case N_OPT:
3541 case N_LSYM:
3542 case N_RSYM:
3543 case N_PSYM:
3544 case N_AST:
3545 // not associated with an atom, just copy
3546 stab.string = symString;
3547 break;
3548 case N_GSYM:
3549 {
3550 // n_value field is NOT atom address ;-(
3551 // need to find atom by name match
3552 const char* colon = strchr(symString, ':');
3553 if ( colon != NULL ) {
3554 // build underscore leading name
3555 int nameLen = colon - symString;
3556 char symName[nameLen+2];
3557 strlcpy(&symName[1], symString, nameLen+1);
3558 symName[0] = '_';
3559 symName[nameLen+1] = '\0';
3560 currentAtom = this->findAtomByName(symName);
3561 if ( currentAtom != NULL ) {
3562 stab.atom = currentAtom;
3563 stab.string = symString;
3564 }
3565 }
3566 else {
3567 // might be a debug-note without trailing :G()
3568 currentAtom = this->findAtomByName(symString);
3569 if ( currentAtom != NULL ) {
3570 stab.atom = currentAtom;
3571 stab.string = symString;
3572 }
3573 }
3574 if ( stab.atom == NULL ) {
3575 // ld_classic added bogus GSYM stabs for old style dtrace probes
3576 if ( (strncmp(symString, "__dtrace_probe$", 15) != 0) )
3577 warning("can't find atom for N_GSYM stabs %s in %s", symString, _path);
3578 useStab = false;
3579 }
3580 break;
3581 }
3582 case N_FUN:
3583 if ( isConstFunStabs(symString) ) {
3584 // constant not associated with a function
3585 stab.string = symString;
3586 }
3587 else {
3588 // old style stabs without BNSYM
3589 state = inFun;
3590 currentAtomAddress = sym.n_value();
3591 currentAtom = this->findAtomByAddress(currentAtomAddress);
3592 if ( currentAtom != NULL ) {
3593 stab.atom = currentAtom;
3594 stab.string = symString;
3595 }
3596 else {
3597 warning("can't find atom for stabs FUN at %08llX in %s",
3598 (uint64_t)currentAtomAddress, _path);
3599 }
3600 }
3601 break;
3602 case N_SOL:
3603 case N_SLINE:
3604 stab.string = symString;
3605 // old stabs
3606 break;
3607 case N_BINCL:
3608 case N_EINCL:
3609 case N_EXCL:
3610 stab.string = symString;
3611 // -gfull built .o file
3612 break;
3613 default:
3614 warning("unknown stabs type 0x%X in %s", type, _path);
3615 }
3616 break;
3617 case inBeginEnd:
3618 stab.atom = currentAtom;
3619 switch (type) {
3620 case N_ENSYM:
3621 state = start;
3622 currentAtom = NULL;
3623 break;
3624 case N_LCSYM:
3625 case N_STSYM:
3626 {
3627 Atom<A>* nestedAtom = this->findAtomByAddress(sym.n_value());
3628 if ( nestedAtom != NULL ) {
3629 stab.atom = nestedAtom;
3630 stab.string = symString;
3631 }
3632 else {
3633 warning("can't find atom for stabs 0x%X at %08llX in %s",
3634 type, (uint64_t)sym.n_value(), _path);
3635 }
3636 break;
3637 }
3638 case N_LBRAC:
3639 case N_RBRAC:
3640 case N_SLINE:
3641 // adjust value to be offset in atom
3642 stab.value -= currentAtomAddress;
3643 default:
3644 stab.string = symString;
3645 break;
3646 }
3647 break;
3648 case inFun:
3649 switch (type) {
3650 case N_FUN:
3651 if ( isConstFunStabs(symString) ) {
3652 stab.atom = currentAtom;
3653 stab.string = symString;
3654 }
3655 else {
3656 if ( sym.n_sect() != 0 ) {
3657 // found another start stab, must be really old stabs...
3658 currentAtomAddress = sym.n_value();
3659 currentAtom = this->findAtomByAddress(currentAtomAddress);
3660 if ( currentAtom != NULL ) {
3661 stab.atom = currentAtom;
3662 stab.string = symString;
3663 }
3664 else {
3665 warning("can't find atom for stabs FUN at %08llX in %s",
3666 (uint64_t)currentAtomAddress, _path);
3667 }
3668 }
3669 else {
3670 // found ending stab, switch back to start state
3671 stab.string = symString;
3672 stab.atom = currentAtom;
3673 state = start;
3674 currentAtom = NULL;
3675 }
3676 }
3677 break;
3678 case N_LBRAC:
3679 case N_RBRAC:
3680 case N_SLINE:
3681 // adjust value to be offset in atom
3682 stab.value -= currentAtomAddress;
3683 stab.atom = currentAtom;
3684 break;
3685 case N_SO:
3686 stab.string = symString;
3687 state = start;
3688 break;
3689 default:
3690 stab.atom = currentAtom;
3691 stab.string = symString;
3692 break;
3693 }
3694 break;
3695 }
3696 // add to list of stabs for this .o file
3697 if ( useStab )
3698 _file->_stabs.push_back(stab);
3699 }
3700 }
3701 }
3702
3703
3704
3705 // Look at the compilation unit DIE and determine
3706 // its NAME, compilation directory (in COMP_DIR) and its
3707 // line number information offset (in STMT_LIST). NAME and COMP_DIR
3708 // may be NULL (especially COMP_DIR) if they are not in the .o file;
3709 // STMT_LIST will be (uint64_t) -1.
3710 //
3711 // At present this assumes that there's only one compilation unit DIE.
3712 //
3713 template <typename A>
3714 bool Parser<A>::read_comp_unit(const char ** name, const char ** comp_dir,
3715 uint64_t *stmt_list)
3716 {
3717 const uint8_t * debug_info;
3718 const uint8_t * debug_abbrev;
3719 const uint8_t * di;
3720 const uint8_t * da;
3721 const uint8_t * end;
3722 const uint8_t * enda;
3723 uint64_t sz;
3724 uint16_t vers;
3725 uint64_t abbrev_base;
3726 uint64_t abbrev;
3727 uint8_t address_size;
3728 bool dwarf64;
3729
3730 *name = NULL;
3731 *comp_dir = NULL;
3732 *stmt_list = (uint64_t) -1;
3733
3734 if ( (_file->_dwarfDebugInfoSect == NULL) || (_file->_dwarfDebugAbbrevSect == NULL) )
3735 return false;
3736
3737 debug_info = (uint8_t*)_file->fileContent() + _file->_dwarfDebugInfoSect->offset();
3738 debug_abbrev = (uint8_t*)_file->fileContent() + _file->_dwarfDebugAbbrevSect->offset();
3739 di = debug_info;
3740
3741 if (_file->_dwarfDebugInfoSect->size() < 12)
3742 /* Too small to be a real debug_info section. */
3743 return false;
3744 sz = A::P::E::get32(*(uint32_t*)di);
3745 di += 4;
3746 dwarf64 = sz == 0xffffffff;
3747 if (dwarf64)
3748 sz = A::P::E::get64(*(uint64_t*)di), di += 8;
3749 else if (sz > 0xffffff00)
3750 /* Unknown dwarf format. */
3751 return false;
3752
3753 /* Verify claimed size. */
3754 if (sz + (di - debug_info) > _file->_dwarfDebugInfoSect->size() || sz <= (dwarf64 ? 23 : 11))
3755 return false;
3756
3757 vers = A::P::E::get16(*(uint16_t*)di);
3758 if (vers < 2 || vers > 4)
3759 /* DWARF version wrong for this code.
3760 Chances are we could continue anyway, but we don't know for sure. */
3761 return false;
3762 di += 2;
3763
3764 /* Find the debug_abbrev section. */
3765 abbrev_base = dwarf64 ? A::P::E::get64(*(uint64_t*)di) : A::P::E::get32(*(uint32_t*)di);
3766 di += dwarf64 ? 8 : 4;
3767
3768 if (abbrev_base > _file->_dwarfDebugAbbrevSect->size())
3769 return false;
3770 da = debug_abbrev + abbrev_base;
3771 enda = debug_abbrev + _file->_dwarfDebugAbbrevSect->size();
3772
3773 address_size = *di++;
3774
3775 /* Find the abbrev number we're looking for. */
3776 end = di + sz;
3777 abbrev = read_uleb128 (&di, end);
3778 if (abbrev == (uint64_t) -1)
3779 return false;
3780
3781 /* Skip through the debug_abbrev section looking for that abbrev. */
3782 for (;;)
3783 {
3784 uint64_t this_abbrev = read_uleb128 (&da, enda);
3785 uint64_t attr;
3786
3787 if (this_abbrev == abbrev)
3788 /* This is almost always taken. */
3789 break;
3790 skip_leb128 (&da, enda); /* Skip the tag. */
3791 if (da == enda)
3792 return false;
3793 da++; /* Skip the DW_CHILDREN_* value. */
3794
3795 do {
3796 attr = read_uleb128 (&da, enda);
3797 skip_leb128 (&da, enda);
3798 } while (attr != 0 && attr != (uint64_t) -1);
3799 if (attr != 0)
3800 return false;
3801 }
3802
3803 /* Check that the abbrev is one for a DW_TAG_compile_unit. */
3804 if (read_uleb128 (&da, enda) != DW_TAG_compile_unit)
3805 return false;
3806 if (da == enda)
3807 return false;
3808 da++; /* Skip the DW_CHILDREN_* value. */
3809
3810 /* Now, go through the DIE looking for DW_AT_name,
3811 DW_AT_comp_dir, and DW_AT_stmt_list. */
3812 for (;;)
3813 {
3814 uint64_t attr = read_uleb128 (&da, enda);
3815 uint64_t form = read_uleb128 (&da, enda);
3816
3817 if (attr == (uint64_t) -1)
3818 return false;
3819 else if (attr == 0)
3820 return true;
3821 if (form == DW_FORM_indirect)
3822 form = read_uleb128 (&di, end);
3823
3824 switch (attr) {
3825 case DW_AT_name:
3826 *name = getDwarfString(form, di);
3827 break;
3828 case DW_AT_comp_dir:
3829 *comp_dir = getDwarfString(form, di);
3830 break;
3831 case DW_AT_stmt_list:
3832 *stmt_list = getDwarfOffset(form, di, dwarf64);
3833 break;
3834 default:
3835 if (! skip_form (&di, end, form, address_size, dwarf64))
3836 return false;
3837 }
3838 }
3839 }
3840
3841
3842
3843 template <typename A>
3844 File<A>::~File()
3845 {
3846 free(_sectionsArray);
3847 free(_atomsArray);
3848 }
3849
3850 template <typename A>
3851 const char* File<A>::translationUnitSource() const
3852 {
3853 return _dwarfTranslationUnitPath;
3854 }
3855
3856
3857
3858 template <typename A>
3859 bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
3860 {
3861 handler.doFile(*this);
3862 uint8_t* p = _atomsArray;
3863 for(int i=_atomsArrayCount; i > 0; --i) {
3864 handler.doAtom(*((Atom<A>*)p));
3865 p += sizeof(Atom<A>);
3866 }
3867 p = _aliasAtomsArray;
3868 for(int i=_aliasAtomsArrayCount; i > 0; --i) {
3869 handler.doAtom(*((AliasAtom*)p));
3870 p += sizeof(AliasAtom);
3871 }
3872
3873 return (_atomsArrayCount != 0) || (_aliasAtomsArrayCount != 0);
3874 }
3875
3876 template <typename A>
3877 const char* Section<A>::makeSegmentName(const macho_section<typename A::P>* sect)
3878 {
3879 // mach-o section record only has room for 16-byte seg/sect names
3880 // so a 16-byte name has no trailing zero
3881 const char* name = sect->segname();
3882 if ( strlen(name) < 16 )
3883 return name;
3884 char* tmp = new char[17];
3885 strlcpy(tmp, name, 17);
3886 return tmp;
3887 }
3888
3889 template <typename A>
3890 const char* Section<A>::makeSectionName(const macho_section<typename A::P>* sect)
3891 {
3892 const char* name = sect->sectname();
3893 if ( strlen(name) < 16 )
3894 return name;
3895
3896 // special case common long section names so we don't have to malloc
3897 if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
3898 return "__objc_classrefs";
3899 if ( strncmp(sect->sectname(), "__objc_classlist", 16) == 0 )
3900 return "__objc_classlist";
3901 if ( strncmp(sect->sectname(), "__objc_nlclslist", 16) == 0 )
3902 return "__objc_nlclslist";
3903 if ( strncmp(sect->sectname(), "__objc_nlcatlist", 16) == 0 )
3904 return "__objc_nlcatlist";
3905 if ( strncmp(sect->sectname(), "__objc_protolist", 16) == 0 )
3906 return "__objc_protolist";
3907 if ( strncmp(sect->sectname(), "__objc_protorefs", 16) == 0 )
3908 return "__objc_protorefs";
3909 if ( strncmp(sect->sectname(), "__objc_superrefs", 16) == 0 )
3910 return "__objc_superrefs";
3911 if ( strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0 )
3912 return "__objc_imageinfo";
3913 if ( strncmp(sect->sectname(), "__objc_stringobj", 16) == 0 )
3914 return "__objc_stringobj";
3915 if ( strncmp(sect->sectname(), "__gcc_except_tab", 16) == 0 )
3916 return "__gcc_except_tab";
3917
3918 char* tmp = new char[17];
3919 strlcpy(tmp, name, 17);
3920 return tmp;
3921 }
3922
3923 template <typename A>
3924 bool Section<A>::readable(const macho_section<typename A::P>* sect)
3925 {
3926 return true;
3927 }
3928
3929 template <typename A>
3930 bool Section<A>::writable(const macho_section<typename A::P>* sect)
3931 {
3932 // mach-o .o files do not contain segment permissions
3933 // we just know TEXT is special
3934 return ( strcmp(sect->segname(), "__TEXT") != 0 );
3935 }
3936
3937 template <typename A>
3938 bool Section<A>::exectuable(const macho_section<typename A::P>* sect)
3939 {
3940 // mach-o .o files do not contain segment permissions
3941 // we just know TEXT is special
3942 return ( strcmp(sect->segname(), "__TEXT") == 0 );
3943 }
3944
3945
3946 template <typename A>
3947 ld::Section::Type Section<A>::sectionType(const macho_section<typename A::P>* sect)
3948 {
3949 switch ( sect->flags() & SECTION_TYPE ) {
3950 case S_ZEROFILL:
3951 return ld::Section::typeZeroFill;
3952 case S_CSTRING_LITERALS:
3953 if ( (strcmp(sect->sectname(), "__cstring") == 0) && (strcmp(sect->segname(), "__TEXT") == 0) )
3954 return ld::Section::typeCString;
3955 else
3956 return ld::Section::typeNonStdCString;
3957 case S_4BYTE_LITERALS:
3958 return ld::Section::typeLiteral4;
3959 case S_8BYTE_LITERALS:
3960 return ld::Section::typeLiteral8;
3961 case S_LITERAL_POINTERS:
3962 return ld::Section::typeCStringPointer;
3963 case S_NON_LAZY_SYMBOL_POINTERS:
3964 return ld::Section::typeNonLazyPointer;
3965 case S_LAZY_SYMBOL_POINTERS:
3966 return ld::Section::typeLazyPointer;
3967 case S_SYMBOL_STUBS:
3968 return ld::Section::typeStub;
3969 case S_MOD_INIT_FUNC_POINTERS:
3970 return ld::Section::typeInitializerPointers;
3971 case S_MOD_TERM_FUNC_POINTERS:
3972 return ld::Section::typeTerminatorPointers;
3973 case S_INTERPOSING:
3974 return ld::Section::typeUnclassified;
3975 case S_16BYTE_LITERALS:
3976 return ld::Section::typeLiteral16;
3977 case S_REGULAR:
3978 case S_COALESCED:
3979 if ( sect->flags() & S_ATTR_PURE_INSTRUCTIONS ) {
3980 return ld::Section::typeCode;
3981 }
3982 else if ( strcmp(sect->segname(), "__TEXT") == 0 ) {
3983 if ( strcmp(sect->sectname(), "__eh_frame") == 0 )
3984 return ld::Section::typeCFI;
3985 else if ( strcmp(sect->sectname(), "__ustring") == 0 )
3986 return ld::Section::typeUTF16Strings;
3987 else if ( strcmp(sect->sectname(), "__textcoal_nt") == 0 )
3988 return ld::Section::typeCode;
3989 else if ( strcmp(sect->sectname(), "__StaticInit") == 0 )
3990 return ld::Section::typeCode;
3991 else if ( strcmp(sect->sectname(), "__constructor") == 0 )
3992 return ld::Section::typeInitializerPointers;
3993 }
3994 else if ( strcmp(sect->segname(), "__DATA") == 0 ) {
3995 if ( strcmp(sect->sectname(), "__cfstring") == 0 )
3996 return ld::Section::typeCFString;
3997 else if ( strcmp(sect->sectname(), "__dyld") == 0 )
3998 return ld::Section::typeDyldInfo;
3999 else if ( strcmp(sect->sectname(), "__program_vars") == 0 )
4000 return ld::Section::typeDyldInfo;
4001 else if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
4002 return ld::Section::typeObjCClassRefs;
4003 else if ( strcmp(sect->sectname(), "__objc_catlist") == 0 )
4004 return ld::Section::typeObjC2CategoryList;
4005 }
4006 else if ( strcmp(sect->segname(), "__OBJC") == 0 ) {
4007 if ( strcmp(sect->sectname(), "__class") == 0 )
4008 return ld::Section::typeObjC1Classes;
4009 }
4010 break;
4011 case S_THREAD_LOCAL_REGULAR:
4012 return ld::Section::typeTLVInitialValues;
4013 case S_THREAD_LOCAL_ZEROFILL:
4014 return ld::Section::typeTLVZeroFill;
4015 case S_THREAD_LOCAL_VARIABLES:
4016 return ld::Section::typeTLVDefs;
4017 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
4018 return ld::Section::typeTLVInitializerPointers;
4019 }
4020 return ld::Section::typeUnclassified;
4021 }
4022
4023
4024 template <typename A>
4025 Atom<A>* Section<A>::findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end)
4026 {
4027 // do a binary search of atom array
4028 uint32_t atomCount = end - start;
4029 Atom<A>* base = start;
4030 for (uint32_t n = atomCount; n > 0; n /= 2) {
4031 Atom<A>* pivot = &base[n/2];
4032 pint_t atomStartAddr = pivot->_objAddress;
4033 pint_t atomEndAddr = atomStartAddr + pivot->_size;
4034 if ( atomStartAddr <= addr ) {
4035 // address in normal atom
4036 if (addr < atomEndAddr)
4037 return pivot;
4038 // address in "end" label (but not in alias)
4039 if ( (pivot->_size == 0) && (addr == atomEndAddr) && !pivot->isAlias() )
4040 return pivot;
4041 }
4042 if ( addr >= atomEndAddr ) {
4043 // key > pivot
4044 // move base to atom after pivot
4045 base = &pivot[1];
4046 --n;
4047 }
4048 else {
4049 // key < pivot
4050 // keep same base
4051 }
4052 }
4053 return NULL;
4054 }
4055
4056 template <typename A>
4057 ld::Atom::Alignment Section<A>::alignmentForAddress(pint_t addr)
4058 {
4059 const uint32_t sectionAlignment = this->_machOSection->align();
4060 uint32_t modulus = (addr % (1 << sectionAlignment));
4061 if ( modulus > 0xFFFF )
4062 warning("alignment for symbol at address 0x%08llX in %s exceeds 2^16", (uint64_t)addr, this->file().path());
4063 return ld::Atom::Alignment(sectionAlignment, modulus);
4064 }
4065
4066 template <typename A>
4067 uint32_t Section<A>::sectionNum(class Parser<A>& parser) const
4068 {
4069 if ( _machOSection == NULL )
4070 return 0;
4071 else
4072 return 1 + (this->_machOSection - parser.firstMachOSection());
4073 }
4074
4075 // arm does not have zero cost exceptions
4076 template <>
4077 uint32_t CFISection<arm>::cfiCount(Parser<arm>& parser)
4078 {
4079 if ( parser.armUsesZeroCostExceptions() ) {
4080 // create ObjectAddressSpace object for use by libunwind
4081 OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
4082 return libunwind::CFI_Parser<OAS>::getCFICount(oas,
4083 this->_machOSection->addr(), this->_machOSection->size());
4084 }
4085 return 0;
4086 }
4087
4088 template <typename A>
4089 uint32_t CFISection<A>::cfiCount(Parser<A>& parser)
4090 {
4091 // create ObjectAddressSpace object for use by libunwind
4092 OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
4093 return libunwind::CFI_Parser<OAS>::getCFICount(oas,
4094 this->_machOSection->addr(), this->_machOSection->size());
4095 }
4096
4097 template <typename A>
4098 void CFISection<A>::warnFunc(void* ref, uint64_t funcAddr, const char* msg)
4099 {
4100 Parser<A>* parser = (Parser<A>*)ref;
4101 if ( ! parser->warnUnwindConversionProblems() )
4102 return;
4103 if ( funcAddr != CFI_INVALID_ADDRESS ) {
4104 // atoms are not constructed yet, so scan symbol table for labels
4105 const char* name = parser->scanSymbolTableForAddress(funcAddr);
4106 warning("could not create compact unwind for %s: %s", name, msg);
4107 }
4108 else {
4109 warning("could not create compact unwind: %s", msg);
4110 }
4111 }
4112
4113 template <>
4114 bool CFISection<x86_64>::needsRelocating()
4115 {
4116 return true;
4117 }
4118
4119 template <>
4120 bool CFISection<arm64>::needsRelocating()
4121 {
4122 return true;
4123 }
4124
4125 template <typename A>
4126 bool CFISection<A>::needsRelocating()
4127 {
4128 return false;
4129 }
4130
4131 template <>
4132 void CFISection<x86_64>::cfiParse(class Parser<x86_64>& parser, uint8_t* buffer,
4133 libunwind::CFI_Atom_Info<CFISection<x86_64>::OAS>::CFI_Atom_Info cfiArray[],
4134 uint32_t& count, const pint_t cuStarts[], uint32_t cuCount)
4135 {
4136 // copy __eh_frame data to buffer
4137 memcpy(buffer, file().fileContent() + this->_machOSection->offset(), this->_machOSection->size());
4138
4139 // and apply relocations
4140 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + this->_machOSection->reloff());
4141 const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
4142 for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
4143 uint64_t value = 0;
4144 switch ( reloc->r_type() ) {
4145 case X86_64_RELOC_SUBTRACTOR:
4146 value = 0 - parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4147 ++reloc;
4148 if ( reloc->r_extern() )
4149 value += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4150 break;
4151 case X86_64_RELOC_UNSIGNED:
4152 value = parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4153 break;
4154 case X86_64_RELOC_GOT:
4155 // this is used for the reference to the personality function in CIEs
4156 // store the symbol number of the personality function for later use as a Fixup
4157 value = reloc->r_symbolnum();
4158 break;
4159 default:
4160 fprintf(stderr, "CFISection::cfiParse() unexpected relocation type at r_address=0x%08X\n", reloc->r_address());
4161 break;
4162 }
4163 uint64_t* p64;
4164 uint32_t* p32;
4165 switch ( reloc->r_length() ) {
4166 case 3:
4167 p64 = (uint64_t*)&buffer[reloc->r_address()];
4168 E::set64(*p64, value + E::get64(*p64));
4169 break;
4170 case 2:
4171 p32 = (uint32_t*)&buffer[reloc->r_address()];
4172 E::set32(*p32, value + E::get32(*p32));
4173 break;
4174 default:
4175 fprintf(stderr, "CFISection::cfiParse() unexpected relocation size at r_address=0x%08X\n", reloc->r_address());
4176 break;
4177 }
4178 }
4179
4180 // create ObjectAddressSpace object for use by libunwind
4181 OAS oas(*this, buffer);
4182
4183 // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
4184 const char* msg;
4185 msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86_64>::parseCFIs(
4186 oas, this->_machOSection->addr(), this->_machOSection->size(),
4187 cuStarts, cuCount, parser.keepDwarfUnwind(), parser.forceDwarfConversion(), parser.neverConvertDwarf(),
4188 cfiArray, count, (void*)&parser, warnFunc);
4189 if ( msg != NULL )
4190 throwf("malformed __eh_frame section: %s", msg);
4191 }
4192
4193 template <>
4194 void CFISection<x86>::cfiParse(class Parser<x86>& parser, uint8_t* buffer,
4195 libunwind::CFI_Atom_Info<CFISection<x86>::OAS>::CFI_Atom_Info cfiArray[],
4196 uint32_t& count, const pint_t cuStarts[], uint32_t cuCount)
4197 {
4198 // create ObjectAddressSpace object for use by libunwind
4199 OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
4200
4201 // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
4202 const char* msg;
4203 msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86>::parseCFIs(
4204 oas, this->_machOSection->addr(), this->_machOSection->size(),
4205 cuStarts, cuCount, parser.keepDwarfUnwind(), parser.forceDwarfConversion(), parser.neverConvertDwarf(),
4206 cfiArray, count, (void*)&parser, warnFunc);
4207 if ( msg != NULL )
4208 throwf("malformed __eh_frame section: %s", msg);
4209 }
4210
4211
4212
4213
4214 template <>
4215 void CFISection<arm>::cfiParse(class Parser<arm>& parser, uint8_t* buffer,
4216 libunwind::CFI_Atom_Info<CFISection<arm>::OAS>::CFI_Atom_Info cfiArray[],
4217 uint32_t& count, const pint_t cuStarts[], uint32_t cuCount)
4218 {
4219 if ( !parser.armUsesZeroCostExceptions() ) {
4220 // most arm do not use zero cost exceptions
4221 assert(count == 0);
4222 return;
4223 }
4224 // create ObjectAddressSpace object for use by libunwind
4225 OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
4226
4227 // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
4228 const char* msg;
4229 msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_arm>::parseCFIs(
4230 oas, this->_machOSection->addr(), this->_machOSection->size(),
4231 cuStarts, cuCount, parser.keepDwarfUnwind(), parser.forceDwarfConversion(), parser.neverConvertDwarf(),
4232 cfiArray, count, (void*)&parser, warnFunc);
4233 if ( msg != NULL )
4234 throwf("malformed __eh_frame section: %s", msg);
4235 }
4236
4237
4238
4239
4240 template <>
4241 void CFISection<arm64>::cfiParse(class Parser<arm64>& parser, uint8_t* buffer,
4242 libunwind::CFI_Atom_Info<CFISection<arm64>::OAS>::CFI_Atom_Info cfiArray[],
4243 uint32_t& count, const pint_t cuStarts[], uint32_t cuCount)
4244 {
4245 // copy __eh_frame data to buffer
4246 memcpy(buffer, file().fileContent() + this->_machOSection->offset(), this->_machOSection->size());
4247
4248 // and apply relocations
4249 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + this->_machOSection->reloff());
4250 const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
4251 for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
4252 uint64_t* p64 = (uint64_t*)&buffer[reloc->r_address()];
4253 uint32_t* p32 = (uint32_t*)&buffer[reloc->r_address()];
4254 uint32_t addend32 = E::get32(*p32);
4255 uint64_t addend64 = E::get64(*p64);
4256 uint64_t value = 0;
4257 switch ( reloc->r_type() ) {
4258 case ARM64_RELOC_SUBTRACTOR:
4259 value = 0 - parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4260 ++reloc;
4261 if ( reloc->r_extern() )
4262 value += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4263 break;
4264 case ARM64_RELOC_UNSIGNED:
4265 value = parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4266 break;
4267 case ARM64_RELOC_POINTER_TO_GOT:
4268 // this is used for the reference to the personality function in CIEs
4269 // store the symbol number of the personality function for later use as a Fixup
4270 value = reloc->r_symbolnum();
4271 addend32 = 0;
4272 addend64 = 0;
4273 break;
4274 default:
4275 fprintf(stderr, "CFISection::cfiParse() unexpected relocation type at r_address=0x%08X\n", reloc->r_address());
4276 break;
4277 }
4278 switch ( reloc->r_length() ) {
4279 case 3:
4280 E::set64(*p64, value + addend64);
4281 break;
4282 case 2:
4283 E::set32(*p32, value + addend32);
4284 break;
4285 default:
4286 fprintf(stderr, "CFISection::cfiParse() unexpected relocation size at r_address=0x%08X\n", reloc->r_address());
4287 break;
4288 }
4289 }
4290
4291
4292 // create ObjectAddressSpace object for use by libunwind
4293 OAS oas(*this, buffer);
4294
4295 // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
4296 const char* msg;
4297 msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_arm64>::parseCFIs(
4298 oas, this->_machOSection->addr(), this->_machOSection->size(),
4299 cuStarts, cuCount, parser.keepDwarfUnwind(), parser.forceDwarfConversion(), parser.neverConvertDwarf(),
4300 cfiArray, count, (void*)&parser, warnFunc);
4301 if ( msg != NULL )
4302 throwf("malformed __eh_frame section: %s", msg);
4303 }
4304
4305
4306 template <typename A>
4307 uint32_t CFISection<A>::computeAtomCount(class Parser<A>& parser,
4308 struct Parser<A>::LabelAndCFIBreakIterator& it,
4309 const struct Parser<A>::CFI_CU_InfoArrays& cfis)
4310 {
4311 return cfis.cfiCount;
4312 }
4313
4314
4315
4316 template <typename A>
4317 uint32_t CFISection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4318 struct Parser<A>::LabelAndCFIBreakIterator& it,
4319 const struct Parser<A>::CFI_CU_InfoArrays& cfis)
4320 {
4321 this->_beginAtoms = (Atom<A>*)p;
4322 // walk CFI_Atom_Info array and create atom for each entry
4323 const CFI_Atom_Info* start = &cfis.cfiArray[0];
4324 const CFI_Atom_Info* end = &cfis.cfiArray[cfis.cfiCount];
4325 for(const CFI_Atom_Info* a=start; a < end; ++a) {
4326 Atom<A>* space = (Atom<A>*)p;
4327 new (space) Atom<A>(*this, (a->isCIE ? "CIE" : "FDE"), a->address, a->size,
4328 ld::Atom::definitionRegular, ld::Atom::combineNever, ld::Atom::scopeTranslationUnit,
4329 ld::Atom::typeCFI, ld::Atom::symbolTableNotInFinalLinkedImages,
4330 false, false, false, ld::Atom::Alignment(0));
4331 p += sizeof(Atom<A>);
4332 }
4333 this->_endAtoms = (Atom<A>*)p;
4334 return cfis.cfiCount;
4335 }
4336
4337
4338 template <> bool CFISection<x86_64>::bigEndian() { return false; }
4339 template <> bool CFISection<x86>::bigEndian() { return false; }
4340 template <> bool CFISection<arm>::bigEndian() { return false; }
4341 template <> bool CFISection<arm64>::bigEndian() { return false; }
4342
4343
4344 template <>
4345 void CFISection<x86_64>::addCiePersonalityFixups(class Parser<x86_64>& parser, const CFI_Atom_Info* cieInfo)
4346 {
4347 uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
4348 if ( personalityEncoding == 0x9B ) {
4349 // compiler always produces X86_64_RELOC_GOT with addend of 4 to personality function
4350 // CFISection<x86_64>::cfiParse() set targetAddress to be symbolIndex + 4 + addressInCIE
4351 uint32_t symbolIndex = cieInfo->u.cieInfo.personality.targetAddress - 4
4352 - cieInfo->address - cieInfo->u.cieInfo.personality.offsetInCFI;
4353 const macho_nlist<P>& sym = parser.symbolFromIndex(symbolIndex);
4354 const char* personalityName = parser.nameFromSymbol(sym);
4355
4356 Atom<x86_64>* cieAtom = this->findAtomByAddress(cieInfo->address);
4357 Parser<x86_64>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
4358 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, personalityName);
4359 parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, 4);
4360 parser.addFixup(src, ld::Fixup::k3of3, ld::Fixup::kindStoreX86PCRel32GOT);
4361 }
4362 else if ( personalityEncoding != 0 ) {
4363 throwf("unsupported address encoding (%02X) of personality function in CIE",
4364 personalityEncoding);
4365 }
4366 }
4367
4368 template <>
4369 void CFISection<x86>::addCiePersonalityFixups(class Parser<x86>& parser, const CFI_Atom_Info* cieInfo)
4370 {
4371 uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
4372 if ( (personalityEncoding == 0x9B) || (personalityEncoding == 0x90) ) {
4373 uint32_t offsetInCFI = cieInfo->u.cieInfo.personality.offsetInCFI;
4374 uint32_t nlpAddr = cieInfo->u.cieInfo.personality.targetAddress;
4375 Atom<x86>* cieAtom = this->findAtomByAddress(cieInfo->address);
4376 Atom<x86>* nlpAtom = parser.findAtomByAddress(nlpAddr);
4377 assert(nlpAtom->contentType() == ld::Atom::typeNonLazyPointer);
4378 Parser<x86>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
4379
4380 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, nlpAtom);
4381 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
4382 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, offsetInCFI);
4383 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
4384 }
4385 else if ( personalityEncoding != 0 ) {
4386 throwf("unsupported address encoding (%02X) of personality function in CIE", personalityEncoding);
4387 }
4388 }
4389
4390 #if SUPPORT_ARCH_arm64
4391 template <>
4392 void CFISection<arm64>::addCiePersonalityFixups(class Parser<arm64>& parser, const CFI_Atom_Info* cieInfo)
4393 {
4394 uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
4395 if ( personalityEncoding == 0x9B ) {
4396 // compiler always produces ARM64_RELOC_GOT r_pcrel=1 to personality function
4397 // CFISection<arm64>::cfiParse() set targetAddress to be symbolIndex + addressInCIE
4398 uint32_t symbolIndex = cieInfo->u.cieInfo.personality.targetAddress
4399 - cieInfo->address - cieInfo->u.cieInfo.personality.offsetInCFI;
4400 const macho_nlist<P>& sym = parser.symbolFromIndex(symbolIndex);
4401 const char* personalityName = parser.nameFromSymbol(sym);
4402
4403 Atom<arm64>* cieAtom = this->findAtomByAddress(cieInfo->address);
4404 Parser<arm64>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
4405 parser.addFixup(src, ld::Fixup::k1of2, ld::Fixup::kindSetTargetAddress, false, personalityName);
4406 parser.addFixup(src, ld::Fixup::k2of2, ld::Fixup::kindStoreARM64PCRelToGOT);
4407 }
4408 else if ( personalityEncoding != 0 ) {
4409 throwf("unsupported address encoding (%02X) of personality function in CIE",
4410 personalityEncoding);
4411 }
4412 }
4413 #endif
4414
4415 template <>
4416 void CFISection<arm>::addCiePersonalityFixups(class Parser<arm>& parser, const CFI_Atom_Info* cieInfo)
4417 {
4418 uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
4419 if ( (personalityEncoding == 0x9B) || (personalityEncoding == 0x90) ) {
4420 uint32_t offsetInCFI = cieInfo->u.cieInfo.personality.offsetInCFI;
4421 uint32_t nlpAddr = cieInfo->u.cieInfo.personality.targetAddress;
4422 Atom<arm>* cieAtom = this->findAtomByAddress(cieInfo->address);
4423 Atom<arm>* nlpAtom = parser.findAtomByAddress(nlpAddr);
4424 assert(nlpAtom->contentType() == ld::Atom::typeNonLazyPointer);
4425 Parser<arm>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
4426
4427 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, nlpAtom);
4428 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
4429 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, offsetInCFI);
4430 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
4431 }
4432 else if ( personalityEncoding != 0 ) {
4433 throwf("unsupported address encoding (%02X) of personality function in CIE", personalityEncoding);
4434 }
4435 }
4436
4437
4438
4439 template <typename A>
4440 void CFISection<A>::addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo)
4441 {
4442 assert(0 && "addCiePersonalityFixups() not implemented for arch");
4443 }
4444
4445 template <typename A>
4446 void CFISection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cfis)
4447 {
4448 ld::Fixup::Kind store32 = bigEndian() ? ld::Fixup::kindStoreBigEndian32 : ld::Fixup::kindStoreLittleEndian32;
4449 ld::Fixup::Kind store64 = bigEndian() ? ld::Fixup::kindStoreBigEndian64 : ld::Fixup::kindStoreLittleEndian64;
4450
4451 // add all references for FDEs, including implicit group references
4452 const CFI_Atom_Info* end = &cfis.cfiArray[cfis.cfiCount];
4453 for(const CFI_Atom_Info* p = &cfis.cfiArray[0]; p < end; ++p) {
4454 if ( p->isCIE ) {
4455 // add reference to personality function if used
4456 if ( p->u.cieInfo.personality.targetAddress != CFI_INVALID_ADDRESS ) {
4457 this->addCiePersonalityFixups(parser, p);
4458 }
4459 }
4460 else {
4461 // find FDE Atom
4462 Atom<A>* fdeAtom = this->findAtomByAddress(p->address);
4463 // find function Atom
4464 Atom<A>* functionAtom = parser.findAtomByAddress(p->u.fdeInfo.function.targetAddress);
4465 // find CIE Atom
4466 Atom<A>* cieAtom = this->findAtomByAddress(p->u.fdeInfo.cie.targetAddress);
4467 // find LSDA Atom
4468 Atom<A>* lsdaAtom = NULL;
4469 if ( p->u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS ) {
4470 lsdaAtom = parser.findAtomByAddress(p->u.fdeInfo.lsda.targetAddress);
4471 }
4472 // add reference from FDE to CIE (always 32-bit pc-rel)
4473 typename Parser<A>::SourceLocation fdeToCieSrc(fdeAtom, p->u.fdeInfo.cie.offsetInCFI);
4474 parser.addFixup(fdeToCieSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, fdeAtom);
4475 parser.addFixup(fdeToCieSrc, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, p->u.fdeInfo.cie.offsetInCFI);
4476 parser.addFixup(fdeToCieSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
4477 parser.addFixup(fdeToCieSrc, ld::Fixup::k4of4, store32, cieAtom);
4478
4479 // add reference from FDE to function
4480 typename Parser<A>::SourceLocation fdeToFuncSrc(fdeAtom, p->u.fdeInfo.function.offsetInCFI);
4481 switch (p->u.fdeInfo.function.encodingOfTargetAddress) {
4482 case DW_EH_PE_pcrel|DW_EH_PE_ptr:
4483 if ( sizeof(typename A::P::uint_t) == 8 ) {
4484 parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
4485 parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4486 parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
4487 parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store64);
4488 break;
4489 }
4490 // else fall into 32-bit case
4491 case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
4492 parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
4493 parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4494 parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
4495 parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store32);
4496 break;
4497 default:
4498 throw "unsupported encoding in FDE of pointer to function";
4499 }
4500
4501 // add reference from FDE to LSDA
4502 typename Parser<A>::SourceLocation fdeToLsdaSrc(fdeAtom, p->u.fdeInfo.lsda.offsetInCFI);
4503 if ( lsdaAtom != NULL ) {
4504 switch (p->u.fdeInfo.lsda.encodingOfTargetAddress) {
4505 case DW_EH_PE_pcrel|DW_EH_PE_ptr:
4506 if ( sizeof(typename A::P::uint_t) == 8 ) {
4507 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
4508 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4509 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
4510 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store64);
4511 break;
4512 }
4513 // else fall into 32-bit case
4514 case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
4515 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
4516 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4517 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
4518 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store32);
4519 break;
4520 default:
4521 throw "unsupported encoding in FDE of pointer to LSDA";
4522 }
4523 }
4524
4525 // FDE is in group lead by function atom
4526 typename Parser<A>::SourceLocation fdeSrc(functionAtom,0);
4527 parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateFDE, fdeAtom);
4528
4529 // LSDA is in group lead by function atom
4530 if ( lsdaAtom != NULL ) {
4531 parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, lsdaAtom);
4532 }
4533 }
4534 }
4535 }
4536
4537
4538
4539
4540 template <typename A>
4541 const void* CFISection<A>::OAS::mappedAddress(pint_t addr)
4542 {
4543 if ( (_ehFrameStartAddr <= addr) && (addr < _ehFrameEndAddr) )
4544 return &_ehFrameContent[addr-_ehFrameStartAddr];
4545 else {
4546 // requested bytes are not in __eh_frame section
4547 // this can occur when examining the instruction bytes in the __text
4548 File<A>& file = _ehFrameSection.file();
4549 for (uint32_t i=0; i < file._sectionsArrayCount; ++i ) {
4550 const macho_section<typename A::P>* sect = file._sectionsArray[i]->machoSection();
4551 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
4552 if ( sect != NULL ) {
4553 if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
4554 return file.fileContent() + sect->offset() + addr - sect->addr();
4555 }
4556 }
4557 }
4558 throwf("__eh_frame parsing problem. Can't find target of reference to address 0x%08llX", (uint64_t)addr);
4559 }
4560 }
4561
4562
4563 template <typename A>
4564 uint64_t CFISection<A>::OAS::getULEB128(pint_t& logicalAddr, pint_t end)
4565 {
4566 uintptr_t size = (end - logicalAddr);
4567 libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
4568 libunwind::LocalAddressSpace::pint_t sladdr = laddr;
4569 uint64_t result = libunwind::LocalAddressSpace::getULEB128(laddr, laddr+size);
4570 logicalAddr += (laddr-sladdr);
4571 return result;
4572 }
4573
4574 template <typename A>
4575 int64_t CFISection<A>::OAS::getSLEB128(pint_t& logicalAddr, pint_t end)
4576 {
4577 uintptr_t size = (end - logicalAddr);
4578 libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
4579 libunwind::LocalAddressSpace::pint_t sladdr = laddr;
4580 int64_t result = libunwind::LocalAddressSpace::getSLEB128(laddr, laddr+size);
4581 logicalAddr += (laddr-sladdr);
4582 return result;
4583 }
4584
4585 template <typename A>
4586 typename A::P::uint_t CFISection<A>::OAS::getEncodedP(pint_t& addr, pint_t end, uint8_t encoding)
4587 {
4588 pint_t startAddr = addr;
4589 pint_t p = addr;
4590 pint_t result;
4591
4592 // first get value
4593 switch (encoding & 0x0F) {
4594 case DW_EH_PE_ptr:
4595 result = getP(addr);
4596 p += sizeof(pint_t);
4597 addr = (pint_t)p;
4598 break;
4599 case DW_EH_PE_uleb128:
4600 result = getULEB128(addr, end);
4601 break;
4602 case DW_EH_PE_udata2:
4603 result = get16(addr);
4604 p += 2;
4605 addr = (pint_t)p;
4606 break;
4607 case DW_EH_PE_udata4:
4608 result = get32(addr);
4609 p += 4;
4610 addr = (pint_t)p;
4611 break;
4612 case DW_EH_PE_udata8:
4613 result = get64(addr);
4614 p += 8;
4615 addr = (pint_t)p;
4616 break;
4617 case DW_EH_PE_sleb128:
4618 result = getSLEB128(addr, end);
4619 break;
4620 case DW_EH_PE_sdata2:
4621 result = (int16_t)get16(addr);
4622 p += 2;
4623 addr = (pint_t)p;
4624 break;
4625 case DW_EH_PE_sdata4:
4626 result = (int32_t)get32(addr);
4627 p += 4;
4628 addr = (pint_t)p;
4629 break;
4630 case DW_EH_PE_sdata8:
4631 result = get64(addr);
4632 p += 8;
4633 addr = (pint_t)p;
4634 break;
4635 default:
4636 throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4637 }
4638
4639 // then add relative offset
4640 switch ( encoding & 0x70 ) {
4641 case DW_EH_PE_absptr:
4642 // do nothing
4643 break;
4644 case DW_EH_PE_pcrel:
4645 result += startAddr;
4646 break;
4647 case DW_EH_PE_textrel:
4648 throw "DW_EH_PE_textrel pointer encoding not supported";
4649 break;
4650 case DW_EH_PE_datarel:
4651 throw "DW_EH_PE_datarel pointer encoding not supported";
4652 break;
4653 case DW_EH_PE_funcrel:
4654 throw "DW_EH_PE_funcrel pointer encoding not supported";
4655 break;
4656 case DW_EH_PE_aligned:
4657 throw "DW_EH_PE_aligned pointer encoding not supported";
4658 break;
4659 default:
4660 throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4661 break;
4662 }
4663
4664 // Note: DW_EH_PE_indirect is only used in CIEs to refernce the personality pointer
4665 // When parsing .o files that pointer contains zero, so we don't to return that.
4666 // Instead we skip the dereference and return the address of the pointer.
4667 // if ( encoding & DW_EH_PE_indirect )
4668 // result = getP(result);
4669
4670 return result;
4671 }
4672
4673 template <>
4674 const char* CUSection<x86_64>::personalityName(class Parser<x86_64>& parser, const macho_relocation_info<x86_64::P>* reloc)
4675 {
4676 if ( reloc->r_extern() ) {
4677 assert((reloc->r_type() == X86_64_RELOC_UNSIGNED) && "wrong reloc type on personality column in __compact_unwind section");
4678 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4679 return parser.nameFromSymbol(sym);
4680 }
4681 else {
4682 const pint_t* content = (pint_t*)(this->file().fileContent() + this->_machOSection->offset() + reloc->r_address());
4683 pint_t personalityAddr = *content;
4684 assert((parser.sectionForAddress(personalityAddr)->type() == ld::Section::typeCode) && "personality column in __compact_unwind section is not pointer to function");
4685 // atoms may not be constructed yet, so scan symbol table for labels
4686 const char* name = parser.scanSymbolTableForAddress(personalityAddr);
4687 return name;
4688 }
4689 }
4690
4691 template <>
4692 const char* CUSection<x86>::personalityName(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
4693 {
4694 if ( reloc->r_extern() ) {
4695 assert((reloc->r_type() == GENERIC_RELOC_VANILLA) && "wrong reloc type on personality column in __compact_unwind section");
4696 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4697 return parser.nameFromSymbol(sym);
4698 }
4699 else {
4700 // support __LD, __compact_unwind personality entries which are pointer to personality non-lazy pointer
4701 const pint_t* content = (pint_t*)(this->file().fileContent() + this->_machOSection->offset() + reloc->r_address());
4702 pint_t nlPointerAddr = *content;
4703 Section<x86>* nlSection = parser.sectionForAddress(nlPointerAddr);
4704 if ( nlSection->type() == ld::Section::typeCode ) {
4705 // personality function is defined in this .o file, so this is a direct reference to it
4706 // atoms may not be constructed yet, so scan symbol table for labels
4707 const char* name = parser.scanSymbolTableForAddress(nlPointerAddr);
4708 return name;
4709 }
4710 else {
4711 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(nlPointerAddr, nlSection->machoSection());
4712 const macho_nlist<P>& nlSymbol = parser.symbolFromIndex(symIndex);
4713 return parser.nameFromSymbol(nlSymbol);
4714 }
4715 }
4716 }
4717
4718 #if SUPPORT_ARCH_arm64
4719 template <>
4720 const char* CUSection<arm64>::personalityName(class Parser<arm64>& parser, const macho_relocation_info<arm64::P>* reloc)
4721 {
4722 if ( reloc->r_extern() ) {
4723 assert((reloc->r_type() == ARM64_RELOC_UNSIGNED) && "wrong reloc type on personality column in __compact_unwind section");
4724 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4725 return parser.nameFromSymbol(sym);
4726 }
4727 else {
4728 const pint_t* content = (pint_t*)(this->file().fileContent() + this->_machOSection->offset() + reloc->r_address());
4729 pint_t personalityAddr = *content;
4730 Section<arm64>* personalitySection = parser.sectionForAddress(personalityAddr);
4731 assert((personalitySection->type() == ld::Section::typeCode) && "personality column in __compact_unwind section is not pointer to function");
4732 // atoms may not be constructed yet, so scan symbol table for labels
4733 const char* name = parser.scanSymbolTableForAddress(personalityAddr);
4734 return name;
4735 }
4736 }
4737 #endif
4738
4739 #if SUPPORT_ARCH_arm_any
4740 template <>
4741 const char* CUSection<arm>::personalityName(class Parser<arm>& parser, const macho_relocation_info<arm::P>* reloc)
4742 {
4743 if ( reloc->r_extern() ) {
4744 assert((reloc->r_type() == ARM_RELOC_VANILLA) && "wrong reloc type on personality column in __compact_unwind section");
4745 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4746 return parser.nameFromSymbol(sym);
4747 }
4748 else {
4749 // support __LD, __compact_unwind personality entries which are pointer to personality non-lazy pointer
4750 const pint_t* content = (pint_t*)(this->file().fileContent() + this->_machOSection->offset() + reloc->r_address());
4751 pint_t nlPointerAddr = *content;
4752 Section<arm>* nlSection = parser.sectionForAddress(nlPointerAddr);
4753 if ( nlSection->type() == ld::Section::typeCode ) {
4754 // personality function is defined in this .o file, so this is a direct reference to it
4755 // atoms may not be constructed yet, so scan symbol table for labels
4756 const char* name = parser.scanSymbolTableForAddress(nlPointerAddr);
4757 return name;
4758 }
4759 else {
4760 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(nlPointerAddr, nlSection->machoSection());
4761 const macho_nlist<P>& nlSymbol = parser.symbolFromIndex(symIndex);
4762 return parser.nameFromSymbol(nlSymbol);
4763 }
4764 }
4765 }
4766 #endif
4767
4768
4769 template <typename A>
4770 const char* CUSection<A>::personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
4771 {
4772 return NULL;
4773 }
4774
4775 template <>
4776 bool CUSection<x86>::encodingMeansUseDwarf(compact_unwind_encoding_t enc)
4777 {
4778 return ((enc & UNWIND_X86_MODE_MASK) == UNWIND_X86_MODE_DWARF);
4779 }
4780
4781 template <>
4782 bool CUSection<x86_64>::encodingMeansUseDwarf(compact_unwind_encoding_t enc)
4783 {
4784 return ((enc & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_DWARF);
4785 }
4786
4787 #if SUPPORT_ARCH_arm_any
4788 template <>
4789 bool CUSection<arm>::encodingMeansUseDwarf(compact_unwind_encoding_t enc)
4790 {
4791 return ((enc & UNWIND_ARM_MODE_MASK) == UNWIND_ARM_MODE_DWARF);
4792 }
4793 #endif
4794
4795 #if SUPPORT_ARCH_arm64
4796 template <>
4797 bool CUSection<arm64>::encodingMeansUseDwarf(compact_unwind_encoding_t enc)
4798 {
4799 return ((enc & UNWIND_ARM64_MODE_MASK) == UNWIND_ARM64_MODE_DWARF);
4800 }
4801 #endif
4802
4803 template <typename A>
4804 int CUSection<A>::infoSorter(const void* l, const void* r)
4805 {
4806 // sort references by symbol index, then address
4807 const Info* left = (Info*)l;
4808 const Info* right = (Info*)r;
4809 if ( left->functionSymbolIndex == right->functionSymbolIndex )
4810 return (left->functionStartAddress - right->functionStartAddress);
4811 else
4812 return (left->functionSymbolIndex - right->functionSymbolIndex);
4813 }
4814
4815 template <typename A>
4816 void CUSection<A>::parse(class Parser<A>& parser, uint32_t cnt, Info array[])
4817 {
4818 // walk section content and copy to Info array
4819 const macho_compact_unwind_entry<P>* const entries = (macho_compact_unwind_entry<P>*)(this->file().fileContent() + this->_machOSection->offset());
4820 for (uint32_t i=0; i < cnt; ++i) {
4821 Info* info = &array[i];
4822 const macho_compact_unwind_entry<P>* entry = &entries[i];
4823 info->functionStartAddress = entry->codeStart();
4824 info->functionSymbolIndex = 0xFFFFFFFF;
4825 info->rangeLength = entry->codeLen();
4826 info->compactUnwindInfo = entry->compactUnwindInfo();
4827 info->personality = NULL;
4828 info->lsdaAddress = entry->lsda();
4829 info->function = NULL;
4830 info->lsda = NULL;
4831 if ( (info->compactUnwindInfo & UNWIND_PERSONALITY_MASK) != 0 )
4832 warning("no bits should be set in UNWIND_PERSONALITY_MASK of compact unwind encoding in __LD,__compact_unwind section");
4833 if ( info->lsdaAddress != 0 ) {
4834 info->compactUnwindInfo |= UNWIND_HAS_LSDA;
4835 }
4836 }
4837
4838 // scan relocs, extern relocs are needed for personality references (possibly for function/lsda refs??)
4839 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(this->file().fileContent() + this->_machOSection->reloff());
4840 const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
4841 for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
4842 if ( reloc->r_extern() ) {
4843 // only expect external relocs on some colummns
4844 if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::personalityFieldOffset() ) {
4845 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4846 array[entryIndex].personality = this->personalityName(parser, reloc);
4847 }
4848 else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::lsdaFieldOffset() ) {
4849 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4850 const macho_nlist<P>& lsdaSym = parser.symbolFromIndex(reloc->r_symbolnum());
4851 if ( (lsdaSym.n_type() & N_TYPE) == N_SECT )
4852 array[entryIndex].lsdaAddress = lsdaSym.n_value();
4853 else
4854 warning("unexpected extern relocation to lsda in __compact_unwind section");
4855 }
4856 else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::codeStartFieldOffset() ) {
4857 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4858 array[entryIndex].functionSymbolIndex = reloc->r_symbolnum();
4859 array[entryIndex].functionStartAddress += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4860 }
4861 else {
4862 warning("unexpected extern relocation in __compact_unwind section");
4863 }
4864 }
4865 else {
4866 if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::personalityFieldOffset() ) {
4867 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4868 array[entryIndex].personality = this->personalityName(parser, reloc);
4869 }
4870 }
4871 }
4872
4873 // sort array by function start address so unwind infos will be contiguous for a given function
4874 ::qsort(array, cnt, sizeof(Info), infoSorter);
4875 }
4876
4877 template <typename A>
4878 uint32_t CUSection<A>::count()
4879 {
4880 const macho_section<P>* machoSect = this->machoSection();
4881 if ( (machoSect->size() % sizeof(macho_compact_unwind_entry<P>)) != 0 )
4882 throw "malformed __LD,__compact_unwind section, bad length";
4883
4884 return machoSect->size() / sizeof(macho_compact_unwind_entry<P>);
4885 }
4886
4887 template <typename A>
4888 void CUSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cus)
4889 {
4890 Info* const arrayStart = cus.cuArray;
4891 Info* const arrayEnd = &cus.cuArray[cus.cuCount];
4892 for (Info* info=arrayStart; info < arrayEnd; ++info) {
4893 // find function atom from address
4894 info->function = parser.findAtomByAddress(info->functionStartAddress);
4895 // find lsda atom from address
4896 if ( info->lsdaAddress != 0 ) {
4897 info->lsda = parser.findAtomByAddress(info->lsdaAddress);
4898 // add lsda subordinate
4899 typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4900 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, info->lsda);
4901 }
4902 if ( info->personality != NULL ) {
4903 // add personality subordinate
4904 typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4905 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinatePersonality, false, info->personality);
4906 }
4907 }
4908
4909 }
4910
4911 template <typename A>
4912 SymboledSection<A>::SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
4913 : Section<A>(f, s), _type(ld::Atom::typeUnclassified)
4914 {
4915 switch ( s->flags() & SECTION_TYPE ) {
4916 case S_ZEROFILL:
4917 _type = ld::Atom::typeZeroFill;
4918 break;
4919 case S_MOD_INIT_FUNC_POINTERS:
4920 _type = ld::Atom::typeInitializerPointers;
4921 break;
4922 case S_MOD_TERM_FUNC_POINTERS:
4923 _type = ld::Atom::typeTerminatorPointers;
4924 break;
4925 case S_THREAD_LOCAL_VARIABLES:
4926 _type = ld::Atom::typeTLV;
4927 break;
4928 case S_THREAD_LOCAL_ZEROFILL:
4929 _type = ld::Atom::typeTLVZeroFill;
4930 break;
4931 case S_THREAD_LOCAL_REGULAR:
4932 _type = ld::Atom::typeTLVInitialValue;
4933 break;
4934 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
4935 _type = ld::Atom::typeTLVInitializerPointers;
4936 break;
4937 case S_REGULAR:
4938 if ( strncmp(s->sectname(), "__gcc_except_tab", 16) == 0 )
4939 _type = ld::Atom::typeLSDA;
4940 else if ( this->type() == ld::Section::typeInitializerPointers )
4941 _type = ld::Atom::typeInitializerPointers;
4942 break;
4943 }
4944 }
4945
4946
4947 template <typename A>
4948 bool SymboledSection<A>::dontDeadStrip()
4949 {
4950 switch ( _type ) {
4951 case ld::Atom::typeInitializerPointers:
4952 case ld::Atom::typeTerminatorPointers:
4953 return true;
4954 default:
4955 // model an object file without MH_SUBSECTIONS_VIA_SYMBOLS as one in which nothing can be dead stripped
4956 if ( ! this->_file.canScatterAtoms() )
4957 return true;
4958 // call inherited
4959 return Section<A>::dontDeadStrip();
4960 }
4961 return false;
4962 }
4963
4964
4965 template <typename A>
4966 uint32_t SymboledSection<A>::computeAtomCount(class Parser<A>& parser,
4967 struct Parser<A>::LabelAndCFIBreakIterator& it,
4968 const struct Parser<A>::CFI_CU_InfoArrays&)
4969 {
4970 const pint_t startAddr = this->_machOSection->addr();
4971 const pint_t endAddr = startAddr + this->_machOSection->size();
4972 const uint32_t sectNum = this->sectionNum(parser);
4973
4974 uint32_t count = 0;
4975 pint_t addr;
4976 pint_t size;
4977 const macho_nlist<P>* sym;
4978 while ( it.next(parser, *this, sectNum, startAddr, endAddr, &addr, &size, &sym) ) {
4979 ++count;
4980 }
4981 //fprintf(stderr, "computeAtomCount(%s,%s) => %d\n", this->segmentName(), this->sectionName(), count);
4982 return count;
4983 }
4984
4985 template <typename A>
4986 uint32_t SymboledSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4987 struct Parser<A>::LabelAndCFIBreakIterator& it,
4988 const struct Parser<A>::CFI_CU_InfoArrays&)
4989 {
4990 this->_beginAtoms = (Atom<A>*)p;
4991
4992 //fprintf(stderr, "SymboledSection::appendAtoms() in section %s\n", this->_machOSection->sectname());
4993 const pint_t startAddr = this->_machOSection->addr();
4994 const pint_t endAddr = startAddr + this->_machOSection->size();
4995 const uint32_t sectNum = this->sectionNum(parser);
4996
4997 uint32_t count = 0;
4998 pint_t addr;
4999 pint_t size;
5000 const macho_nlist<P>* label;
5001 while ( it.next(parser, *this, sectNum, startAddr, endAddr, &addr, &size, &label) ) {
5002 Atom<A>* allocatedSpace = (Atom<A>*)p;
5003 // is break because of label or CFI?
5004 if ( label != NULL ) {
5005 // The size is computed based on the address of the next label (or the end of the section for the last label)
5006 // If there are two labels at the same address, we want them one to be an alias of the other.
5007 // If the label is at the end of a section, it is has zero size, but is not an alias
5008 const bool isAlias = ( (size == 0) && (addr < endAddr) );
5009 new (allocatedSpace) Atom<A>(*this, parser, *label, size, isAlias);
5010 if ( isAlias )
5011 this->_hasAliases = true;
5012 if ( parser.altEntryFromSymbol(*label) )
5013 this->_altEntries.insert(allocatedSpace);
5014 }
5015 else {
5016 ld::Atom::SymbolTableInclusion inclusion = ld::Atom::symbolTableNotIn;
5017 ld::Atom::ContentType ctype = this->contentType();
5018 if ( ctype == ld::Atom::typeLSDA )
5019 inclusion = ld::Atom::symbolTableInWithRandomAutoStripLabel;
5020 new (allocatedSpace) Atom<A>(*this, "anon", addr, size, ld::Atom::definitionRegular, ld::Atom::combineNever,
5021 ld::Atom::scopeTranslationUnit, ctype, inclusion,
5022 this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
5023 }
5024 p += sizeof(Atom<A>);
5025 ++count;
5026 }
5027
5028 this->_endAtoms = (Atom<A>*)p;
5029 return count;
5030 }
5031
5032
5033 template <>
5034 ld::Atom::SymbolTableInclusion ImplicitSizeSection<arm64>::symbolTableInclusion()
5035 {
5036 return ld::Atom::symbolTableInWithRandomAutoStripLabel;
5037 }
5038
5039 template <typename A>
5040 ld::Atom::SymbolTableInclusion ImplicitSizeSection<A>::symbolTableInclusion()
5041 {
5042 return ld::Atom::symbolTableNotIn;
5043 }
5044
5045
5046 template <typename A>
5047 uint32_t ImplicitSizeSection<A>::computeAtomCount(class Parser<A>& parser,
5048 struct Parser<A>::LabelAndCFIBreakIterator& it,
5049 const struct Parser<A>::CFI_CU_InfoArrays&)
5050 {
5051 uint32_t count = 0;
5052 const macho_section<P>* sect = this->machoSection();
5053 const pint_t startAddr = sect->addr();
5054 const pint_t endAddr = startAddr + sect->size();
5055 for (pint_t addr = startAddr; addr < endAddr; addr += elementSizeAtAddress(addr) ) {
5056 if ( useElementAt(parser, it, addr) )
5057 ++count;
5058 }
5059 if ( it.fileHasOverlappingSymbols && (sect->size() != 0) && (this->combine(parser, startAddr) == ld::Atom::combineByNameAndContent) ) {
5060 // if there are multiple labels in this section for the same address, then clone them into multi atoms
5061 pint_t prevSymbolAddr = (pint_t)(-1);
5062 uint8_t prevSymbolSectNum = 0;
5063 bool prevIgnore = false;
5064 for(uint32_t i=0; i < it.sortedSymbolCount; ++i) {
5065 const macho_nlist<P>& sym = parser.symbolFromIndex(it.sortedSymbolIndexes[i]);
5066 const pint_t symbolAddr = sym.n_value();
5067 const uint8_t symbolSectNum = sym.n_sect();
5068 const bool ignore = this->ignoreLabel(parser.nameFromSymbol(sym));
5069 if ( !ignore && !prevIgnore && (symbolAddr == prevSymbolAddr) && (prevSymbolSectNum == symbolSectNum) && (symbolSectNum == this->sectionNum(parser)) ) {
5070 ++count;
5071 }
5072 prevSymbolAddr = symbolAddr;
5073 prevSymbolSectNum = symbolSectNum;
5074 prevIgnore = ignore;
5075 }
5076 }
5077 return count;
5078 }
5079
5080 template <typename A>
5081 uint32_t ImplicitSizeSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
5082 struct Parser<A>::LabelAndCFIBreakIterator& it,
5083 const struct Parser<A>::CFI_CU_InfoArrays&)
5084 {
5085 this->_beginAtoms = (Atom<A>*)p;
5086
5087 const macho_section<P>* sect = this->machoSection();
5088 const pint_t startAddr = sect->addr();
5089 const pint_t endAddr = startAddr + sect->size();
5090 const uint32_t sectNum = this->sectionNum(parser);
5091 //fprintf(stderr, "ImplicitSizeSection::appendAtoms() in section %s\n", sect->sectname());
5092 uint32_t count = 0;
5093 pint_t foundAddr;
5094 pint_t size;
5095 const macho_nlist<P>* foundLabel;
5096 Atom<A>* allocatedSpace;
5097 while ( it.next(parser, *this, sectNum, startAddr, endAddr, &foundAddr, &size, &foundLabel) ) {
5098 if ( foundLabel != NULL ) {
5099 bool skip = false;
5100 pint_t labeledAtomSize = this->elementSizeAtAddress(foundAddr);
5101 allocatedSpace = (Atom<A>*)p;
5102 if ( this->ignoreLabel(parser.nameFromSymbol(*foundLabel)) ) {
5103 if ( size == 0 ) {
5104 // <rdar://problem/10018737>
5105 // a size of zero means there is another label at same location
5106 // and we are supposed to ignore this label
5107 skip = true;
5108 }
5109 else {
5110 //fprintf(stderr, " 0x%08llX make annon, size=%lld\n", (uint64_t)foundAddr, (uint64_t)size);
5111 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, foundAddr), foundAddr,
5112 this->elementSizeAtAddress(foundAddr), this->definition(),
5113 this->combine(parser, foundAddr), this->scopeAtAddress(parser, foundAddr),
5114 this->contentType(), this->symbolTableInclusion(),
5115 this->dontDeadStrip(), false, false, this->alignmentForAddress(foundAddr));
5116 }
5117 }
5118 else {
5119 // make named atom for label
5120 //fprintf(stderr, " 0x%08llX make labeled\n", (uint64_t)foundAddr);
5121 new (allocatedSpace) Atom<A>(*this, parser, *foundLabel, labeledAtomSize);
5122 }
5123 if ( !skip ) {
5124 ++count;
5125 p += sizeof(Atom<A>);
5126 foundAddr += labeledAtomSize;
5127 size -= labeledAtomSize;
5128 }
5129 }
5130 // some number of anonymous atoms
5131 for (pint_t addr = foundAddr; addr < (foundAddr+size); addr += elementSizeAtAddress(addr) ) {
5132 // make anon atoms for area before label
5133 if ( this->useElementAt(parser, it, addr) ) {
5134 //fprintf(stderr, " 0x%08llX make annon, size=%lld\n", (uint64_t)addr, (uint64_t)elementSizeAtAddress(addr));
5135 allocatedSpace = (Atom<A>*)p;
5136 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, addr), addr, this->elementSizeAtAddress(addr),
5137 this->definition(), this->combine(parser, addr), this->scopeAtAddress(parser, addr),
5138 this->contentType(), this->symbolTableInclusion(),
5139 this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
5140 ++count;
5141 p += sizeof(Atom<A>);
5142 }
5143 }
5144 }
5145
5146 this->_endAtoms = (Atom<A>*)p;
5147
5148 return count;
5149 }
5150
5151 template <typename A>
5152 bool Literal4Section<A>::ignoreLabel(const char* label) const
5153 {
5154 return (label[0] == 'L') || (label[0] == 'l');
5155 }
5156
5157 template <typename A>
5158 unsigned long Literal4Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5159 {
5160 const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
5161 return *literalContent;
5162 }
5163
5164 template <typename A>
5165 bool Literal4Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5166 const ld::IndirectBindingTable& ind) const
5167 {
5168 assert(this->type() == rhs.section().type());
5169 const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
5170
5171 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5172 assert(rhsAtom != NULL);
5173 if ( rhsAtom != NULL ) {
5174 const uint32_t* rhsLiteralContent = (uint32_t*)rhsAtom->contentPointer();
5175 return (*literalContent == *rhsLiteralContent);
5176 }
5177 return false;
5178 }
5179
5180
5181 template <typename A>
5182 bool Literal8Section<A>::ignoreLabel(const char* label) const
5183 {
5184 return (label[0] == 'L') || (label[0] == 'l');
5185 }
5186
5187 template <typename A>
5188 unsigned long Literal8Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5189 {
5190 #if __LP64__
5191 const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
5192 return *literalContent;
5193 #else
5194 unsigned long hash = 5381;
5195 const uint8_t* byteContent = atom->contentPointer();
5196 for (int i=0; i < 8; ++i) {
5197 hash = hash * 33 + byteContent[i];
5198 }
5199 return hash;
5200 #endif
5201 }
5202
5203 template <typename A>
5204 bool Literal8Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5205 const ld::IndirectBindingTable& ind) const
5206 {
5207 if ( rhs.section().type() != ld::Section::typeLiteral8 )
5208 return false;
5209 assert(this->type() == rhs.section().type());
5210 const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
5211
5212 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5213 assert(rhsAtom != NULL);
5214 if ( rhsAtom != NULL ) {
5215 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
5216 return (*literalContent == *rhsLiteralContent);
5217 }
5218 return false;
5219 }
5220
5221 template <typename A>
5222 bool Literal16Section<A>::ignoreLabel(const char* label) const
5223 {
5224 return (label[0] == 'L') || (label[0] == 'l');
5225 }
5226
5227 template <typename A>
5228 unsigned long Literal16Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5229 {
5230 unsigned long hash = 5381;
5231 const uint8_t* byteContent = atom->contentPointer();
5232 for (int i=0; i < 16; ++i) {
5233 hash = hash * 33 + byteContent[i];
5234 }
5235 return hash;
5236 }
5237
5238 template <typename A>
5239 bool Literal16Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5240 const ld::IndirectBindingTable& ind) const
5241 {
5242 if ( rhs.section().type() != ld::Section::typeLiteral16 )
5243 return false;
5244 assert(this->type() == rhs.section().type());
5245 const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
5246
5247 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5248 assert(rhsAtom != NULL);
5249 if ( rhsAtom != NULL ) {
5250 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
5251 return ((literalContent[0] == rhsLiteralContent[0]) && (literalContent[1] == rhsLiteralContent[1]));
5252 }
5253 return false;
5254 }
5255
5256
5257
5258 template <typename A>
5259 typename A::P::uint_t CStringSection<A>::elementSizeAtAddress(pint_t addr)
5260 {
5261 const macho_section<P>* sect = this->machoSection();
5262 const char* stringContent = (char*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
5263 return strlen(stringContent) + 1;
5264 }
5265
5266 template <typename A>
5267 bool CStringSection<A>::useElementAt(Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
5268 {
5269 return true;
5270 }
5271
5272 template <typename A>
5273 bool CStringSection<A>::ignoreLabel(const char* label) const
5274 {
5275 return (label[0] == 'L') || (label[0] == 'l');
5276 }
5277
5278
5279 template <typename A>
5280 Atom<A>* CStringSection<A>::findAtomByAddress(pint_t addr)
5281 {
5282 Atom<A>* result = this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms);
5283 return result;
5284 }
5285
5286 template <typename A>
5287 unsigned long CStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5288 {
5289 unsigned long hash = 5381;
5290 const char* stringContent = (char*)atom->contentPointer();
5291 for (const char* s = stringContent; *s != '\0'; ++s) {
5292 hash = hash * 33 + *s;
5293 }
5294 return hash;
5295 }
5296
5297
5298 template <typename A>
5299 bool CStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5300 const ld::IndirectBindingTable& ind) const
5301 {
5302 if ( rhs.section().type() != ld::Section::typeCString )
5303 return false;
5304 assert(this->type() == rhs.section().type());
5305 assert(strcmp(this->sectionName(), rhs.section().sectionName())== 0);
5306 assert(strcmp(this->segmentName(), rhs.section().segmentName())== 0);
5307 const char* stringContent = (char*)atom->contentPointer();
5308
5309 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5310 assert(rhsAtom != NULL);
5311 if ( rhsAtom != NULL ) {
5312 if ( atom->_size != rhsAtom->_size )
5313 return false;
5314 const char* rhsStringContent = (char*)rhsAtom->contentPointer();
5315 return (strcmp(stringContent, rhsStringContent) == 0);
5316 }
5317 return false;
5318 }
5319
5320
5321 template <>
5322 ld::Fixup::Kind NonLazyPointerSection<x86>::fixupKind()
5323 {
5324 return ld::Fixup::kindStoreLittleEndian32;
5325 }
5326
5327 template <>
5328 ld::Fixup::Kind NonLazyPointerSection<arm>::fixupKind()
5329 {
5330 return ld::Fixup::kindStoreLittleEndian32;
5331 }
5332
5333 template <>
5334 ld::Fixup::Kind NonLazyPointerSection<arm64>::fixupKind()
5335 {
5336 return ld::Fixup::kindStoreLittleEndian64;
5337 }
5338
5339
5340 template <>
5341 void NonLazyPointerSection<x86_64>::makeFixups(class Parser<x86_64>& parser, const struct Parser<x86_64>::CFI_CU_InfoArrays&)
5342 {
5343 assert(0 && "x86_64 should not have non-lazy-pointer sections in .o files");
5344 }
5345
5346 template <typename A>
5347 void NonLazyPointerSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
5348 {
5349 // add references for each NLP atom based on indirect symbol table
5350 const macho_section<P>* sect = this->machoSection();
5351 const pint_t endAddr = sect->addr() + sect->size();
5352 for( pint_t addr = sect->addr(); addr < endAddr; addr += sizeof(pint_t)) {
5353 typename Parser<A>::SourceLocation src;
5354 typename Parser<A>::TargetDesc target;
5355 src.atom = this->findAtomByAddress(addr);
5356 src.offsetInAtom = 0;
5357 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
5358 target.atom = NULL;
5359 target.name = NULL;
5360 target.weakImport = false;
5361 target.addend = 0;
5362 if ( symIndex == INDIRECT_SYMBOL_LOCAL ) {
5363 // use direct reference for local symbols
5364 const pint_t* nlpContent = (pint_t*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
5365 pint_t targetAddr = P::getP(*nlpContent);
5366 target.atom = parser.findAtomByAddress(targetAddr);
5367 target.weakImport = false;
5368 target.addend = (targetAddr - target.atom->objectAddress());
5369 // <rdar://problem/8385011> if pointer to thumb function, mask of thumb bit (not an addend of +1)
5370 if ( target.atom->isThumb() )
5371 target.addend &= (-2);
5372 assert(src.atom->combine() == ld::Atom::combineNever);
5373 }
5374 else {
5375 const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
5376 // use direct reference for local symbols
5377 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
5378 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5379 assert(src.atom->combine() == ld::Atom::combineNever);
5380 }
5381 else {
5382 target.name = parser.nameFromSymbol(sym);
5383 target.weakImport = parser.weakImportFromSymbol(sym);
5384 assert(src.atom->combine() == ld::Atom::combineByNameAndReferences);
5385 }
5386 }
5387 parser.addFixups(src, this->fixupKind(), target);
5388 }
5389 }
5390
5391 template <typename A>
5392 ld::Atom::Combine NonLazyPointerSection<A>::combine(Parser<A>& parser, pint_t addr)
5393 {
5394 const macho_section<P>* sect = this->machoSection();
5395 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
5396 if ( symIndex == INDIRECT_SYMBOL_LOCAL)
5397 return ld::Atom::combineNever;
5398
5399 // don't coalesce non-lazy-pointers to local symbols
5400 const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
5401 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) )
5402 return ld::Atom::combineNever;
5403
5404 return ld::Atom::combineByNameAndReferences;
5405 }
5406
5407 template <typename A>
5408 const char* NonLazyPointerSection<A>::targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind)
5409 {
5410 assert(atom->combine() == ld::Atom::combineByNameAndReferences);
5411 assert(atom->fixupCount() == 1);
5412 ld::Fixup::iterator fit = atom->fixupsBegin();
5413 const char* name = NULL;
5414 switch ( fit->binding ) {
5415 case ld::Fixup::bindingByNameUnbound:
5416 name = fit->u.name;
5417 break;
5418 case ld::Fixup::bindingByContentBound:
5419 name = fit->u.target->name();
5420 break;
5421 case ld::Fixup::bindingsIndirectlyBound:
5422 name = ind.indirectName(fit->u.bindingIndex);
5423 break;
5424 default:
5425 assert(0);
5426 }
5427 assert(name != NULL);
5428 return name;
5429 }
5430
5431 template <typename A>
5432 unsigned long NonLazyPointerSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5433 {
5434 assert(atom->combine() == ld::Atom::combineByNameAndReferences);
5435 unsigned long hash = 9508;
5436 for (const char* s = this->targetName(atom, ind); *s != '\0'; ++s) {
5437 hash = hash * 33 + *s;
5438 }
5439 return hash;
5440 }
5441
5442 template <typename A>
5443 bool NonLazyPointerSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5444 const ld::IndirectBindingTable& indirectBindingTable) const
5445 {
5446 if ( rhs.section().type() != ld::Section::typeNonLazyPointer )
5447 return false;
5448 assert(this->type() == rhs.section().type());
5449 // there can be many non-lazy pointer in different section names
5450 // we only want to coalesce in same section name
5451 if ( *this != rhs.section() )
5452 return false;
5453 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5454 assert(rhsAtom != NULL);
5455 const char* thisName = this->targetName(atom, indirectBindingTable);
5456 const char* rhsName = this->targetName(rhsAtom, indirectBindingTable);
5457 return (strcmp(thisName, rhsName) == 0);
5458 }
5459
5460 template <typename A>
5461 ld::Atom::Scope NonLazyPointerSection<A>::scopeAtAddress(Parser<A>& parser, pint_t addr)
5462 {
5463 const macho_section<P>* sect = this->machoSection();
5464 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
5465 if ( symIndex == INDIRECT_SYMBOL_LOCAL)
5466 return ld::Atom::scopeTranslationUnit;
5467 else
5468 return ld::Atom::scopeLinkageUnit;
5469 }
5470
5471 template <typename A>
5472 const uint8_t* CFStringSection<A>::targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
5473 ContentType* ct, unsigned int* count)
5474 {
5475 *ct = contentUnknown;
5476 for (ld::Fixup::iterator fit=atom->fixupsBegin(), end=atom->fixupsEnd(); fit != end; ++fit) {
5477 const ld::Atom* targetAtom = NULL;
5478 switch ( fit->binding ) {
5479 case ld::Fixup::bindingByNameUnbound:
5480 // ignore reference to ___CFConstantStringClassReference
5481 // we are just looking for reference to backing string data
5482 assert(fit->offsetInAtom == 0);
5483 assert(strcmp(fit->u.name, "___CFConstantStringClassReference") == 0);
5484 break;
5485 case ld::Fixup::bindingDirectlyBound:
5486 case ld::Fixup::bindingByContentBound:
5487 targetAtom = fit->u.target;
5488 break;
5489 case ld::Fixup::bindingsIndirectlyBound:
5490 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5491 break;
5492 default:
5493 assert(0 && "bad binding type");
5494 }
5495 assert(targetAtom != NULL);
5496 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5497 if ( targetAtom->section().type() == ld::Section::typeCString ) {
5498 *ct = contentUTF8;
5499 *count = targetAtom->size();
5500 }
5501 else if ( targetAtom->section().type() == ld::Section::typeUTF16Strings ) {
5502 *ct = contentUTF16;
5503 *count = (targetAtom->size()+1)/2; // round up incase of buggy compiler that has only one trailing zero byte
5504 }
5505 else {
5506 *ct = contentUnknown;
5507 *count = 0;
5508 return NULL;
5509 }
5510 return target->contentPointer();
5511 }
5512 assert(0);
5513 return NULL;
5514 }
5515
5516 template <typename A>
5517 unsigned long CFStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5518 {
5519 // base hash of CFString on hash of cstring it wraps
5520 ContentType cType;
5521 unsigned long hash;
5522 unsigned int charCount;
5523 const uint8_t* content = this->targetContent(atom, ind, &cType, &charCount);
5524 switch ( cType ) {
5525 case contentUTF8:
5526 hash = 9408;
5527 for (const char* s = (char*)content; *s != '\0'; ++s) {
5528 hash = hash * 33 + *s;
5529 }
5530 return hash;
5531 case contentUTF16:
5532 hash = 407955;
5533 --charCount; // don't add last 0x0000 to hash because some buggy compilers only have trailing single byte
5534 for (const uint16_t* s = (uint16_t*)content; charCount > 0; ++s, --charCount) {
5535 hash = hash * 1025 + *s;
5536 }
5537 return hash;
5538 case contentUnknown:
5539 // <rdar://problem/14134211> For malformed CFStrings, hash to address of atom so they have unique hashes
5540 return ULONG_MAX - (unsigned long)(atom);
5541 }
5542 return 0;
5543 }
5544
5545
5546 template <typename A>
5547 bool CFStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5548 const ld::IndirectBindingTable& indirectBindingTable) const
5549 {
5550 if ( atom == &rhs )
5551 return true;
5552 if ( rhs.section().type() != ld::Section::typeCFString)
5553 return false;
5554 assert(this->type() == rhs.section().type());
5555 assert(strcmp(this->sectionName(), "__cfstring") == 0);
5556
5557 ContentType thisType;
5558 unsigned int charCount;
5559 const uint8_t* cstringContent = this->targetContent(atom, indirectBindingTable, &thisType, &charCount);
5560 ContentType rhsType;
5561 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5562 assert(rhsAtom != NULL);
5563 unsigned int rhsCharCount;
5564 const uint8_t* rhsStringContent = this->targetContent(rhsAtom, indirectBindingTable, &rhsType, &rhsCharCount);
5565
5566 if ( thisType != rhsType )
5567 return false;
5568
5569 if ( thisType == contentUnknown )
5570 return false;
5571
5572 if ( rhsType == contentUnknown )
5573 return false;
5574
5575 // no need to compare content of pointers are already the same
5576 if ( cstringContent == rhsStringContent )
5577 return true;
5578
5579 // no need to compare content if size is different
5580 if ( charCount != rhsCharCount )
5581 return false;
5582
5583 switch ( thisType ) {
5584 case contentUTF8:
5585 return (strcmp((char*)cstringContent, (char*)rhsStringContent) == 0);
5586 case contentUTF16:
5587 {
5588 const uint16_t* cstringContent16 = (uint16_t*)cstringContent;
5589 const uint16_t* rhsStringContent16 = (uint16_t*)rhsStringContent;
5590 for (unsigned int i = 0; i < charCount; ++i) {
5591 if ( cstringContent16[i] != rhsStringContent16[i] )
5592 return false;
5593 }
5594 return true;
5595 }
5596 case contentUnknown:
5597 return false;
5598 }
5599 return false;
5600 }
5601
5602
5603 template <typename A>
5604 typename A::P::uint_t ObjC1ClassSection<A>::elementSizeAtAddress(pint_t addr)
5605 {
5606 // nominal size for each class is 48 bytes, but sometimes the compiler
5607 // over aligns and there is padding after class data
5608 const macho_section<P>* sct = this->machoSection();
5609 uint32_t align = 1 << sct->align();
5610 uint32_t size = ((12 * sizeof(pint_t)) + align-1) & (-align);
5611 return size;
5612 }
5613
5614 template <typename A>
5615 const char* ObjC1ClassSection<A>::unlabeledAtomName(Parser<A>& parser, pint_t addr)
5616 {
5617 // 8-bytes into class object is pointer to class name
5618 const macho_section<P>* sct = this->machoSection();
5619 uint32_t classObjcFileOffset = sct->offset() - sct->addr() + addr;
5620 const uint8_t* mappedFileContent = this->file().fileContent();
5621 pint_t nameAddr = P::getP(*((pint_t*)(mappedFileContent+classObjcFileOffset+2*sizeof(pint_t))));
5622
5623 // find section containing string address to get string bytes
5624 const macho_section<P>* const sections = parser.firstMachOSection();
5625 const uint32_t sectionCount = parser.machOSectionCount();
5626 for (uint32_t i=0; i < sectionCount; ++i) {
5627 const macho_section<P>* aSect = &sections[i];
5628 if ( (aSect->addr() <= nameAddr) && (nameAddr < (aSect->addr()+aSect->size())) ) {
5629 assert((aSect->flags() & SECTION_TYPE) == S_CSTRING_LITERALS);
5630 uint32_t nameFileOffset = aSect->offset() - aSect->addr() + nameAddr;
5631 const char* name = (char*)mappedFileContent + nameFileOffset;
5632 // spin through symbol table to find absolute symbol corresponding to this class
5633 for (uint32_t s=0; s < parser.symbolCount(); ++s) {
5634 const macho_nlist<P>& sym = parser.symbolFromIndex(s);
5635 if ( (sym.n_type() & N_TYPE) != N_ABS )
5636 continue;
5637 const char* absName = parser.nameFromSymbol(sym);
5638 if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
5639 if ( strcmp(&absName[17], name) == 0 )
5640 return absName;
5641 }
5642 }
5643 assert(0 && "obj class name not found in symbol table");
5644 }
5645 }
5646 assert(0 && "obj class name not found");
5647 return "unknown objc class";
5648 }
5649
5650
5651 template <typename A>
5652 const char* ObjC2ClassRefsSection<A>::targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5653 {
5654 assert(atom->fixupCount() == 1);
5655 ld::Fixup::iterator fit = atom->fixupsBegin();
5656 const char* className = NULL;
5657 switch ( fit->binding ) {
5658 case ld::Fixup::bindingByNameUnbound:
5659 className = fit->u.name;
5660 break;
5661 case ld::Fixup::bindingDirectlyBound:
5662 case ld::Fixup::bindingByContentBound:
5663 className = fit->u.target->name();
5664 break;
5665 case ld::Fixup::bindingsIndirectlyBound:
5666 className = ind.indirectName(fit->u.bindingIndex);
5667 break;
5668 default:
5669 assert(0 && "unsupported binding in objc2 class ref section");
5670 }
5671 assert(className != NULL);
5672 return className;
5673 }
5674
5675
5676 template <typename A>
5677 unsigned long ObjC2ClassRefsSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5678 {
5679 unsigned long hash = 978;
5680 for (const char* s = targetClassName(atom, ind); *s != '\0'; ++s) {
5681 hash = hash * 33 + *s;
5682 }
5683 return hash;
5684 }
5685
5686 template <typename A>
5687 bool ObjC2ClassRefsSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5688 const ld::IndirectBindingTable& indirectBindingTable) const
5689 {
5690 assert(this->type() == rhs.section().type());
5691 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5692 assert(rhsAtom != NULL);
5693 const char* thisClassName = targetClassName(atom, indirectBindingTable);
5694 const char* rhsClassName = targetClassName(rhsAtom, indirectBindingTable);
5695 return (strcmp(thisClassName, rhsClassName) == 0);
5696 }
5697
5698
5699 template <typename A>
5700 const char* Objc1ClassReferences<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5701 {
5702 assert(atom->fixupCount() == 2);
5703 ld::Fixup::iterator fit = atom->fixupsBegin();
5704 if ( fit->kind == ld::Fixup::kindSetTargetAddress )
5705 ++fit;
5706 const ld::Atom* targetAtom = NULL;
5707 switch ( fit->binding ) {
5708 case ld::Fixup::bindingByContentBound:
5709 targetAtom = fit->u.target;
5710 break;
5711 case ld::Fixup::bindingsIndirectlyBound:
5712 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5713 if ( targetAtom == NULL ) {
5714 fprintf(stderr, "missing target named %s\n", ind.indirectName(fit->u.bindingIndex));
5715 }
5716 break;
5717 default:
5718 assert(0);
5719 }
5720 assert(targetAtom != NULL);
5721 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5722 assert(target != NULL);
5723 return (char*)target->contentPointer();
5724 }
5725
5726
5727 template <typename A>
5728 const char* PointerToCStringSection<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5729 {
5730 assert(atom->fixupCount() == 1);
5731 ld::Fixup::iterator fit = atom->fixupsBegin();
5732 const ld::Atom* targetAtom = NULL;
5733 switch ( fit->binding ) {
5734 case ld::Fixup::bindingByContentBound:
5735 targetAtom = fit->u.target;
5736 break;
5737 case ld::Fixup::bindingsIndirectlyBound:
5738 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5739 break;
5740 case ld::Fixup::bindingDirectlyBound:
5741 targetAtom = fit->u.target;
5742 break;
5743 default:
5744 assert(0 && "unsupported reference to selector");
5745 }
5746 assert(targetAtom != NULL);
5747 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5748 assert(target != NULL);
5749 assert(target->contentType() == ld::Atom::typeCString);
5750 return (char*)target->contentPointer();
5751 }
5752
5753 template <typename A>
5754 unsigned long PointerToCStringSection<A>::contentHash(const class Atom<A>* atom,
5755 const ld::IndirectBindingTable& indirectBindingTable) const
5756 {
5757 // make hash from section name and target cstring name
5758 unsigned long hash = 123;
5759 for (const char* s = this->sectionName(); *s != '\0'; ++s) {
5760 hash = hash * 33 + *s;
5761 }
5762 for (const char* s = this->targetCString(atom, indirectBindingTable); *s != '\0'; ++s) {
5763 hash = hash * 33 + *s;
5764 }
5765 return hash;
5766 }
5767
5768 template <typename A>
5769 bool PointerToCStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5770 const ld::IndirectBindingTable& indirectBindingTable) const
5771 {
5772 assert(this->type() == rhs.section().type());
5773 // there can be pointers-to-cstrings in different section names
5774 // we only want to coalesce in same section name
5775 if ( *this != rhs.section() )
5776 return false;
5777
5778 // get string content for this
5779 const char* cstringContent = this->targetCString(atom, indirectBindingTable);
5780 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5781 assert(rhsAtom != NULL);
5782 const char* rhsCstringContent = this->targetCString(rhsAtom, indirectBindingTable);
5783
5784 assert(cstringContent != NULL);
5785 assert(rhsCstringContent != NULL);
5786 return (strcmp(cstringContent, rhsCstringContent) == 0);
5787 }
5788
5789
5790
5791 template <typename A>
5792 unsigned long UTF16StringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5793 {
5794 unsigned long hash = 5381;
5795 const uint16_t* stringContent = (uint16_t*)atom->contentPointer();
5796 // some buggy compilers end utf16 data with single byte, so don't use last word in hash computation
5797 unsigned int count = (atom->size()/2) - 1;
5798 for (const uint16_t* s = stringContent; count > 0; ++s, --count) {
5799 hash = hash * 33 + *s;
5800 }
5801 return hash;
5802 }
5803
5804 template <typename A>
5805 bool UTF16StringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5806 const ld::IndirectBindingTable& ind) const
5807 {
5808 if ( rhs.section().type() != ld::Section::typeUTF16Strings )
5809 return false;
5810 assert(0);
5811 return false;
5812 }
5813
5814
5815
5816
5817
5818
5819
5820 template <>
5821 uint32_t Section<x86_64>::x86_64PcRelOffset(uint8_t r_type)
5822 {
5823 switch ( r_type ) {
5824 case X86_64_RELOC_SIGNED:
5825 return 4;
5826 case X86_64_RELOC_SIGNED_1:
5827 return 5;
5828 case X86_64_RELOC_SIGNED_2:
5829 return 6;
5830 case X86_64_RELOC_SIGNED_4:
5831 return 8;
5832 }
5833 return 0;
5834 }
5835
5836
5837 template <>
5838 bool Section<x86_64>::addRelocFixup(class Parser<x86_64>& parser, const macho_relocation_info<P>* reloc)
5839 {
5840 const macho_section<P>* sect = this->machoSection();
5841 uint64_t srcAddr = sect->addr() + reloc->r_address();
5842 Parser<x86_64>::SourceLocation src;
5843 Parser<x86_64>::TargetDesc target;
5844 Parser<x86_64>::TargetDesc toTarget;
5845 src.atom = this->findAtomByAddress(srcAddr);
5846 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5847 const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
5848 uint64_t contentValue = 0;
5849 const macho_relocation_info<x86_64::P>* nextReloc = &reloc[1];
5850 bool result = false;
5851 bool useDirectBinding;
5852 switch ( reloc->r_length() ) {
5853 case 0:
5854 contentValue = *fixUpPtr;
5855 break;
5856 case 1:
5857 contentValue = (int64_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
5858 break;
5859 case 2:
5860 contentValue = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
5861 break;
5862 case 3:
5863 contentValue = E::get64(*((uint64_t*)fixUpPtr));
5864 break;
5865 }
5866 target.atom = NULL;
5867 target.name = NULL;
5868 target.weakImport = false;
5869 target.addend = 0;
5870 if ( reloc->r_extern() ) {
5871 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
5872 // use direct reference for local symbols
5873 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5874 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5875 target.addend += contentValue;
5876 }
5877 else {
5878 target.name = parser.nameFromSymbol(sym);
5879 target.weakImport = parser.weakImportFromSymbol(sym);
5880 target.addend = contentValue;
5881 }
5882 // cfstrings should always use direct reference to backing store
5883 if ( (this->type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
5884 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5885 target.addend = contentValue;
5886 }
5887 }
5888 else {
5889 if ( reloc->r_pcrel() )
5890 contentValue += srcAddr + x86_64PcRelOffset(reloc->r_type());
5891 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5892 }
5893 switch ( reloc->r_type() ) {
5894 case X86_64_RELOC_UNSIGNED:
5895 if ( reloc->r_pcrel() )
5896 throw "pcrel and X86_64_RELOC_UNSIGNED not supported";
5897 switch ( reloc->r_length() ) {
5898 case 0:
5899 case 1:
5900 throw "length < 2 and X86_64_RELOC_UNSIGNED not supported";
5901 case 2:
5902 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5903 break;
5904 case 3:
5905 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian64, target);
5906 break;
5907 }
5908 break;
5909 case X86_64_RELOC_SIGNED:
5910 case X86_64_RELOC_SIGNED_1:
5911 case X86_64_RELOC_SIGNED_2:
5912 case X86_64_RELOC_SIGNED_4:
5913 if ( ! reloc->r_pcrel() )
5914 throw "not pcrel and X86_64_RELOC_SIGNED* not supported";
5915 if ( reloc->r_length() != 2 )
5916 throw "length != 2 and X86_64_RELOC_SIGNED* not supported";
5917 switch ( reloc->r_type() ) {
5918 case X86_64_RELOC_SIGNED:
5919 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
5920 break;
5921 case X86_64_RELOC_SIGNED_1:
5922 if ( reloc->r_extern() )
5923 target.addend += 1;
5924 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_1, target);
5925 break;
5926 case X86_64_RELOC_SIGNED_2:
5927 if ( reloc->r_extern() )
5928 target.addend += 2;
5929 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_2, target);
5930 break;
5931 case X86_64_RELOC_SIGNED_4:
5932 if ( reloc->r_extern() )
5933 target.addend += 4;
5934 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_4, target);
5935 break;
5936 }
5937 break;
5938 case X86_64_RELOC_BRANCH:
5939 if ( ! reloc->r_pcrel() )
5940 throw "not pcrel and X86_64_RELOC_BRANCH not supported";
5941 switch ( reloc->r_length() ) {
5942 case 2:
5943 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5944 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
5945 parser.addDtraceExtraInfos(src, &target.name[16]);
5946 }
5947 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5948 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
5949 parser.addDtraceExtraInfos(src, &target.name[20]);
5950 }
5951 else {
5952 parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel32, target);
5953 }
5954 break;
5955 case 0:
5956 parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel8, target);
5957 break;
5958 default:
5959 throwf("length=%d and X86_64_RELOC_BRANCH not supported", reloc->r_length());
5960 }
5961 break;
5962 case X86_64_RELOC_GOT:
5963 if ( ! reloc->r_extern() )
5964 throw "not extern and X86_64_RELOC_GOT not supported";
5965 if ( ! reloc->r_pcrel() )
5966 throw "not pcrel and X86_64_RELOC_GOT not supported";
5967 if ( reloc->r_length() != 2 )
5968 throw "length != 2 and X86_64_RELOC_GOT not supported";
5969 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOT, target);
5970 break;
5971 case X86_64_RELOC_GOT_LOAD:
5972 if ( ! reloc->r_extern() )
5973 throw "not extern and X86_64_RELOC_GOT_LOAD not supported";
5974 if ( ! reloc->r_pcrel() )
5975 throw "not pcrel and X86_64_RELOC_GOT_LOAD not supported";
5976 if ( reloc->r_length() != 2 )
5977 throw "length != 2 and X86_64_RELOC_GOT_LOAD not supported";
5978 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOTLoad, target);
5979 break;
5980 case X86_64_RELOC_SUBTRACTOR:
5981 if ( reloc->r_pcrel() )
5982 throw "X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5983 if ( reloc->r_length() < 2 )
5984 throw "X86_64_RELOC_SUBTRACTOR must have r_length of 2 or 3";
5985 if ( !reloc->r_extern() )
5986 throw "X86_64_RELOC_SUBTRACTOR must have r_extern=1";
5987 if ( nextReloc->r_type() != X86_64_RELOC_UNSIGNED )
5988 throw "X86_64_RELOC_SUBTRACTOR must be followed by X86_64_RELOC_UNSIGNED";
5989 result = true;
5990 if ( nextReloc->r_pcrel() )
5991 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5992 if ( nextReloc->r_length() != reloc->r_length() )
5993 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR must have same r_length";
5994 if ( nextReloc->r_extern() ) {
5995 const macho_nlist<P>& sym = parser.symbolFromIndex(nextReloc->r_symbolnum());
5996 // use direct reference for local symbols
5997 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5998 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), toTarget);
5999 toTarget.addend = contentValue;
6000 useDirectBinding = true;
6001 }
6002 else {
6003 toTarget.name = parser.nameFromSymbol(sym);
6004 toTarget.weakImport = parser.weakImportFromSymbol(sym);
6005 toTarget.addend = contentValue;
6006 useDirectBinding = false;
6007 }
6008 }
6009 else {
6010 parser.findTargetFromAddressAndSectionNum(contentValue, nextReloc->r_symbolnum(), toTarget);
6011 useDirectBinding = (toTarget.atom->scope() == ld::Atom::scopeTranslationUnit);
6012 }
6013 if ( useDirectBinding )
6014 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.atom);
6015 else
6016 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.weakImport, toTarget.name);
6017 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, toTarget.addend);
6018 if ( target.atom == NULL )
6019 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, false, target.name);
6020 else
6021 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, target.atom);
6022 if ( reloc->r_length() == 2 )
6023 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
6024 else
6025 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian64);
6026 break;
6027 case X86_64_RELOC_TLV:
6028 if ( ! reloc->r_extern() )
6029 throw "not extern and X86_64_RELOC_TLV not supported";
6030 if ( ! reloc->r_pcrel() )
6031 throw "not pcrel and X86_64_RELOC_TLV not supported";
6032 if ( reloc->r_length() != 2 )
6033 throw "length != 2 and X86_64_RELOC_TLV not supported";
6034 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
6035 break;
6036 default:
6037 throwf("unknown relocation type %d", reloc->r_type());
6038 }
6039 return result;
6040 }
6041
6042
6043
6044 template <>
6045 bool Section<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<P>* reloc)
6046 {
6047 const macho_section<P>* sect = this->machoSection();
6048 uint32_t srcAddr;
6049 const uint8_t* fixUpPtr;
6050 uint32_t contentValue = 0;
6051 ld::Fixup::Kind kind = ld::Fixup::kindNone;
6052 Parser<x86>::SourceLocation src;
6053 Parser<x86>::TargetDesc target;
6054
6055 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
6056 srcAddr = sect->addr() + reloc->r_address();
6057 src.atom = this->findAtomByAddress(srcAddr);
6058 src.offsetInAtom = srcAddr - src.atom->_objAddress;
6059 fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6060 switch ( reloc->r_type() ) {
6061 case GENERIC_RELOC_VANILLA:
6062 switch ( reloc->r_length() ) {
6063 case 0:
6064 contentValue = (int32_t)(int8_t)*fixUpPtr;
6065 if ( reloc->r_pcrel() ) {
6066 kind = ld::Fixup::kindStoreX86BranchPCRel8;
6067 contentValue += srcAddr + sizeof(uint8_t);
6068 }
6069 else
6070 throw "r_length=0 and r_pcrel=0 not supported";
6071 break;
6072 case 1:
6073 contentValue = (int32_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
6074 if ( reloc->r_pcrel() ) {
6075 kind = ld::Fixup::kindStoreX86PCRel16;
6076 contentValue += srcAddr + sizeof(uint16_t);
6077 }
6078 else
6079 kind = ld::Fixup::kindStoreLittleEndian16;
6080 break;
6081 case 2:
6082 contentValue = E::get32(*((uint32_t*)fixUpPtr));
6083 if ( reloc->r_pcrel() ) {
6084 kind = ld::Fixup::kindStoreX86BranchPCRel32;
6085 contentValue += srcAddr + sizeof(uint32_t);
6086 }
6087 else
6088 kind = ld::Fixup::kindStoreLittleEndian32;
6089 break;
6090 case 3:
6091 throw "r_length=3 not supported";
6092 }
6093 if ( reloc->r_extern() ) {
6094 target.atom = NULL;
6095 const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
6096 target.name = parser.nameFromSymbol(targetSymbol);
6097 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
6098 target.addend = (int32_t)contentValue;
6099 }
6100 else {
6101 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
6102 }
6103 if ( (kind == ld::Fixup::kindStoreX86BranchPCRel32) && (target.name != NULL) ) {
6104 if ( strncmp(target.name, "___dtrace_probe$", 16) == 0 ) {
6105 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
6106 parser.addDtraceExtraInfos(src, &target.name[16]);
6107 return false;
6108 }
6109 else if ( strncmp(target.name, "___dtrace_isenabled$", 20) == 0 ) {
6110 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
6111 parser.addDtraceExtraInfos(src, &target.name[20]);
6112 return false;
6113 }
6114 }
6115 parser.addFixups(src, kind, target);
6116 return false;
6117 break;
6118 case GENERIC_RLEOC_TLV:
6119 {
6120 if ( !reloc->r_extern() )
6121 throw "r_extern=0 and r_type=GENERIC_RLEOC_TLV not supported";
6122 if ( reloc->r_length() != 2 )
6123 throw "r_length!=2 and r_type=GENERIC_RLEOC_TLV not supported";
6124 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
6125 // use direct reference for local symbols
6126 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
6127 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
6128 }
6129 else {
6130 target.atom = NULL;
6131 target.name = parser.nameFromSymbol(sym);
6132 target.weakImport = parser.weakImportFromSymbol(sym);
6133 }
6134 target.addend = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
6135 if ( reloc->r_pcrel() ) {
6136 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
6137 }
6138 else {
6139 parser.addFixups(src, ld::Fixup::kindStoreX86Abs32TLVLoad, target);
6140 }
6141 return false;
6142 }
6143 break;
6144 default:
6145 throwf("unsupported i386 relocation type (%d)", reloc->r_type());
6146 }
6147 }
6148 else {
6149 // scattered relocation
6150 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
6151 srcAddr = sect->addr() + sreloc->r_address();
6152 src.atom = this->findAtomByAddress(srcAddr);
6153 assert(src.atom != NULL);
6154 src.offsetInAtom = srcAddr - src.atom->_objAddress;
6155 fixUpPtr = file().fileContent() + sect->offset() + sreloc->r_address();
6156 uint32_t relocValue = sreloc->r_value();
6157 bool result = false;
6158 // file format allows pair to be scattered or not
6159 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
6160 const macho_relocation_info<P>* nextReloc = &reloc[1];
6161 bool nextRelocIsPair = false;
6162 uint32_t nextRelocAddress = 0;
6163 uint32_t nextRelocValue = 0;
6164 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
6165 if ( nextReloc->r_type() == GENERIC_RELOC_PAIR ) {
6166 nextRelocIsPair = true;
6167 nextRelocAddress = nextReloc->r_address();
6168 result = true; // iterator should skip next reloc, since we've consumed it here
6169 }
6170 }
6171 else {
6172 if ( nextSReloc->r_type() == GENERIC_RELOC_PAIR ) {
6173 nextRelocIsPair = true;
6174 nextRelocAddress = nextSReloc->r_address();
6175 nextRelocValue = nextSReloc->r_value();
6176 }
6177 }
6178 switch (sreloc->r_type()) {
6179 case GENERIC_RELOC_VANILLA:
6180 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
6181 target.atom = parser.findAtomByAddress(relocValue);
6182 if ( sreloc->r_pcrel() ) {
6183 switch ( sreloc->r_length() ) {
6184 case 0:
6185 contentValue = srcAddr + 1 + *fixUpPtr;
6186 target.addend = (int32_t)contentValue - (int32_t)relocValue;
6187 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel8, target);
6188 break;
6189 case 1:
6190 contentValue = srcAddr + 2 + LittleEndian::get16(*((uint16_t*)fixUpPtr));
6191 target.addend = (int32_t)contentValue - (int32_t)relocValue;
6192 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel16, target);
6193 break;
6194 case 2:
6195 contentValue = srcAddr + 4 + LittleEndian::get32(*((uint32_t*)fixUpPtr));
6196 target.addend = (int32_t)contentValue - (int32_t)relocValue;
6197 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
6198 break;
6199 case 3:
6200 throw "unsupported r_length=3 for scattered pc-rel vanilla reloc";
6201 break;
6202 }
6203 }
6204 else {
6205 if ( sreloc->r_length() != 2 )
6206 throwf("unsupported r_length=%d for scattered vanilla reloc", sreloc->r_length());
6207 contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6208 target.addend = (int32_t)contentValue - (int32_t)(target.atom->objectAddress());
6209 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
6210 }
6211 break;
6212 case GENERIC_RELOC_SECTDIFF:
6213 case GENERIC_RELOC_LOCAL_SECTDIFF:
6214 {
6215 if ( !nextRelocIsPair )
6216 throw "GENERIC_RELOC_SECTDIFF missing following pair";
6217 switch ( sreloc->r_length() ) {
6218 case 0:
6219 case 3:
6220 throw "bad length for GENERIC_RELOC_SECTDIFF";
6221 case 1:
6222 contentValue = (int32_t)(int16_t)LittleEndian::get16(*((uint16_t*)fixUpPtr));
6223 kind = ld::Fixup::kindStoreLittleEndian16;
6224 break;
6225 case 2:
6226 contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6227 kind = ld::Fixup::kindStoreLittleEndian32;
6228 break;
6229 }
6230 Atom<x86>* fromAtom = parser.findAtomByAddress(nextRelocValue);
6231 uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
6232 parser.findTargetFromAddress(sreloc->r_value(), target);
6233 // check for addend encoded in the section content
6234 int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
6235 if ( addend < 0 ) {
6236 // switch binding base on coalescing
6237 if ( target.atom == NULL ) {
6238 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
6239 }
6240 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
6241 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
6242 }
6243 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
6244 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
6245 }
6246 else {
6247 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
6248 }
6249 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
6250 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6251 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
6252 parser.addFixup(src, ld::Fixup::k5of5, kind);
6253 }
6254 else {
6255 // switch binding base on coalescing
6256 if ( target.atom == NULL ) {
6257 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
6258 }
6259 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
6260 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
6261 }
6262 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
6263 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
6264 }
6265 else {
6266 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
6267 }
6268 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend+addend);
6269 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6270 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
6271 parser.addFixup(src, ld::Fixup::k5of5, kind);
6272 }
6273 }
6274 break;
6275 }
6276 return result;
6277 }
6278 }
6279
6280
6281
6282
6283
6284 #if SUPPORT_ARCH_arm_any
6285 template <>
6286 bool Section<arm>::addRelocFixup(class Parser<arm>& parser, const macho_relocation_info<P>* reloc)
6287 {
6288 const macho_section<P>* sect = this->machoSection();
6289 bool result = false;
6290 uint32_t srcAddr;
6291 uint32_t dstAddr;
6292 uint32_t* fixUpPtr;
6293 int32_t displacement = 0;
6294 uint32_t instruction = 0;
6295 pint_t contentValue = 0;
6296 Parser<arm>::SourceLocation src;
6297 Parser<arm>::TargetDesc target;
6298 const macho_relocation_info<P>* nextReloc;
6299
6300 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
6301 bool externSymbolIsThumbDef = false;
6302 srcAddr = sect->addr() + reloc->r_address();
6303 src.atom = this->findAtomByAddress(srcAddr);
6304 src.offsetInAtom = srcAddr - src.atom->_objAddress;
6305 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + reloc->r_address());
6306 if ( reloc->r_type() != ARM_RELOC_PAIR )
6307 instruction = LittleEndian::get32(*fixUpPtr);
6308 if ( reloc->r_extern() ) {
6309 const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
6310 // use direct reference for local symbols
6311 if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) && (((targetSymbol.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(targetSymbol)[0] == 'L')) ) {
6312 parser.findTargetFromAddressAndSectionNum(targetSymbol.n_value(), targetSymbol.n_sect(), target);
6313 }
6314 else {
6315 target.atom = NULL;
6316 target.name = parser.nameFromSymbol(targetSymbol);
6317 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
6318 if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) && (targetSymbol.n_desc() & N_ARM_THUMB_DEF) )
6319 externSymbolIsThumbDef = true;
6320 }
6321 }
6322 switch ( reloc->r_type() ) {
6323 case ARM_RELOC_BR24:
6324 // Sign-extend displacement
6325 displacement = (instruction & 0x00FFFFFF) << 2;
6326 if ( (displacement & 0x02000000) != 0 )
6327 displacement |= 0xFC000000;
6328 // The pc added will be +8 from the pc
6329 displacement += 8;
6330 // If this is BLX add H << 1
6331 if ((instruction & 0xFE000000) == 0xFA000000)
6332 displacement += ((instruction & 0x01000000) >> 23);
6333 if ( reloc->r_extern() ) {
6334 dstAddr = srcAddr + displacement;
6335 // <rdar://problem/16652542> support large .o files
6336 if ( srcAddr > 0x2000000 ) {
6337 dstAddr -= ((srcAddr + 0x1FFFFFF) & 0xFC000000);
6338 }
6339 target.addend = dstAddr;
6340 if ( externSymbolIsThumbDef )
6341 target.addend &= -2; // remove thumb bit
6342 }
6343 else {
6344 dstAddr = srcAddr + displacement;
6345 parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
6346 }
6347 // special case "calls" for dtrace
6348 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
6349 parser.addFixup(src, ld::Fixup::k1of1,
6350 ld::Fixup::kindStoreARMDtraceCallSiteNop, false, target.name);
6351 parser.addDtraceExtraInfos(src, &target.name[16]);
6352 }
6353 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
6354 parser.addFixup(src, ld::Fixup::k1of1,
6355 ld::Fixup::kindStoreARMDtraceIsEnableSiteClear, false, target.name);
6356 parser.addDtraceExtraInfos(src, &target.name[20]);
6357 }
6358 else {
6359 parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
6360 }
6361 break;
6362 case ARM_THUMB_RELOC_BR22:
6363 // thumb2 added two more bits to displacement, complicating the displacement decoding
6364 {
6365 uint32_t s = (instruction >> 10) & 0x1;
6366 uint32_t j1 = (instruction >> 29) & 0x1;
6367 uint32_t j2 = (instruction >> 27) & 0x1;
6368 uint32_t imm10 = instruction & 0x3FF;
6369 uint32_t imm11 = (instruction >> 16) & 0x7FF;
6370 uint32_t i1 = (j1 == s);
6371 uint32_t i2 = (j2 == s);
6372 uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
6373 int32_t sdis = dis;
6374 if ( s )
6375 sdis |= 0xFE000000;
6376 displacement = sdis;
6377 }
6378 // The pc added will be +4 from the pc
6379 displacement += 4;
6380 // If the instruction was blx, force the low 2 bits to be clear
6381 dstAddr = srcAddr + displacement;
6382 if ((instruction & 0xD0000000) == 0xC0000000)
6383 dstAddr &= 0xFFFFFFFC;
6384
6385 if ( reloc->r_extern() ) {
6386 // <rdar://problem/16652542> support large .o files
6387 if ( srcAddr > 0x1000000 ) {
6388 dstAddr -= ((srcAddr + 0xFFFFFF) & 0xFE000000);
6389 }
6390 target.addend = (int64_t)(int32_t)dstAddr;
6391 }
6392 else {
6393 parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
6394 }
6395 // special case "calls" for dtrace
6396 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
6397 parser.addFixup(src, ld::Fixup::k1of1,
6398 ld::Fixup::kindStoreThumbDtraceCallSiteNop, false, target.name);
6399 parser.addDtraceExtraInfos(src, &target.name[16]);
6400 }
6401 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
6402 parser.addFixup(src, ld::Fixup::k1of1,
6403 ld::Fixup::kindStoreThumbDtraceIsEnableSiteClear, false, target.name);
6404 parser.addDtraceExtraInfos(src, &target.name[20]);
6405 }
6406 else {
6407 parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
6408 }
6409 break;
6410 case ARM_RELOC_VANILLA:
6411 if ( reloc->r_length() != 2 )
6412 throw "bad length for ARM_RELOC_VANILLA";
6413 contentValue = LittleEndian::get32(*fixUpPtr);
6414 if ( reloc->r_extern() ) {
6415 target.addend = (int32_t)contentValue;
6416 if ( externSymbolIsThumbDef )
6417 target.addend &= -2; // remove thumb bit
6418 }
6419 else {
6420 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
6421 // possible non-extern relocation turned into by-name ref because target is a weak-def
6422 if ( target.atom != NULL ) {
6423 if ( target.atom->isThumb() )
6424 target.addend &= -2; // remove thumb bit
6425 // if reference to LSDA, add group subordinate fixup
6426 if ( target.atom->contentType() == ld::Atom::typeLSDA ) {
6427 Parser<arm>::SourceLocation src2;
6428 src2.atom = src.atom;
6429 src2.offsetInAtom = 0;
6430 parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, target.atom);
6431 }
6432 }
6433 }
6434 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
6435 break;
6436 case ARM_THUMB_32BIT_BRANCH:
6437 // silently ignore old unnecessary reloc
6438 break;
6439 case ARM_RELOC_HALF:
6440 nextReloc = &reloc[1];
6441 if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
6442 uint32_t instruction16;
6443 uint32_t other16 = (nextReloc->r_address() & 0xFFFF);
6444 bool isThumb;
6445 if ( reloc->r_length() & 2 ) {
6446 isThumb = true;
6447 uint32_t i = ((instruction & 0x00000400) >> 10);
6448 uint32_t imm4 = (instruction & 0x0000000F);
6449 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
6450 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
6451 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
6452 }
6453 else {
6454 isThumb = false;
6455 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
6456 uint32_t imm12 = (instruction & 0x00000FFF);
6457 instruction16 = (imm4 << 12) | imm12;
6458 }
6459 if ( reloc->r_length() & 1 ) {
6460 // high 16
6461 dstAddr = ((instruction16 << 16) | other16);
6462 if ( reloc->r_extern() ) {
6463 target.addend = dstAddr;
6464 if ( externSymbolIsThumbDef )
6465 target.addend &= -2; // remove thumb bit
6466 }
6467 else {
6468 parser.findTargetFromAddress(dstAddr, target);
6469 if ( target.atom->isThumb() )
6470 target.addend &= (-2); // remove thumb bit
6471 }
6472 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16), target);
6473 }
6474 else {
6475 // low 16
6476 dstAddr = (other16 << 16) | instruction16;
6477 if ( reloc->r_extern() ) {
6478 target.addend = dstAddr;
6479 if ( externSymbolIsThumbDef )
6480 target.addend &= -2; // remove thumb bit
6481 }
6482 else {
6483 parser.findTargetFromAddress(dstAddr, target);
6484 if ( target.atom->isThumb() )
6485 target.addend &= (-2); // remove thumb bit
6486 }
6487 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16), target);
6488 }
6489 result = true;
6490 }
6491 else
6492 throw "for ARM_RELOC_HALF, next reloc is not ARM_RELOC_PAIR";
6493 break;
6494 default:
6495 throwf("unknown relocation type %d", reloc->r_type());
6496 break;
6497 }
6498 }
6499 else {
6500 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
6501 // file format allows pair to be scattered or not
6502 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
6503 nextReloc = &reloc[1];
6504 srcAddr = sect->addr() + sreloc->r_address();
6505 dstAddr = sreloc->r_value();
6506 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + sreloc->r_address());
6507 instruction = LittleEndian::get32(*fixUpPtr);
6508 src.atom = this->findAtomByAddress(srcAddr);
6509 src.offsetInAtom = srcAddr - src.atom->_objAddress;
6510 bool nextRelocIsPair = false;
6511 uint32_t nextRelocAddress = 0;
6512 uint32_t nextRelocValue = 0;
6513 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
6514 if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
6515 nextRelocIsPair = true;
6516 nextRelocAddress = nextReloc->r_address();
6517 result = true;
6518 }
6519 }
6520 else {
6521 if ( nextSReloc->r_type() == ARM_RELOC_PAIR ) {
6522 nextRelocIsPair = true;
6523 nextRelocAddress = nextSReloc->r_address();
6524 nextRelocValue = nextSReloc->r_value();
6525 result = true;
6526 }
6527 }
6528 switch ( sreloc->r_type() ) {
6529 case ARM_RELOC_VANILLA:
6530 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
6531 if ( sreloc->r_length() != 2 )
6532 throw "bad length for ARM_RELOC_VANILLA";
6533 target.atom = parser.findAtomByAddress(sreloc->r_value());
6534 if ( target.atom == NULL )
6535 throwf("bad r_value (0x%08X) for ARM_RELOC_VANILLA\n", sreloc->r_value());
6536 contentValue = LittleEndian::get32(*fixUpPtr);
6537 target.addend = contentValue - target.atom->_objAddress;
6538 if ( target.atom->isThumb() )
6539 target.addend &= -2; // remove thumb bit
6540 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
6541 break;
6542 case ARM_RELOC_BR24:
6543 // Sign-extend displacement
6544 displacement = (instruction & 0x00FFFFFF) << 2;
6545 if ( (displacement & 0x02000000) != 0 )
6546 displacement |= 0xFC000000;
6547 // The pc added will be +8 from the pc
6548 displacement += 8;
6549 // If this is BLX add H << 1
6550 if ((instruction & 0xFE000000) == 0xFA000000)
6551 displacement += ((instruction & 0x01000000) >> 23);
6552 target.atom = parser.findAtomByAddress(sreloc->r_value());
6553 target.addend = (int64_t)(srcAddr + displacement) - (int64_t)(target.atom->_objAddress);
6554 parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
6555 break;
6556 case ARM_THUMB_RELOC_BR22:
6557 // thumb2 added two more bits to displacement, complicating the displacement decoding
6558 {
6559 uint32_t s = (instruction >> 10) & 0x1;
6560 uint32_t j1 = (instruction >> 29) & 0x1;
6561 uint32_t j2 = (instruction >> 27) & 0x1;
6562 uint32_t imm10 = instruction & 0x3FF;
6563 uint32_t imm11 = (instruction >> 16) & 0x7FF;
6564 uint32_t i1 = (j1 == s);
6565 uint32_t i2 = (j2 == s);
6566 uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
6567 int32_t sdis = dis;
6568 if ( s )
6569 sdis |= 0xFE000000;
6570 displacement = sdis;
6571 }
6572 // The pc added will be +4 from the pc
6573 displacement += 4;
6574 dstAddr = srcAddr+displacement;
6575 // If the instruction was blx, force the low 2 bits to be clear
6576 if ((instruction & 0xF8000000) == 0xE8000000)
6577 dstAddr &= 0xFFFFFFFC;
6578 target.atom = parser.findAtomByAddress(sreloc->r_value());
6579 target.addend = dstAddr - target.atom->_objAddress;
6580 parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
6581 break;
6582 case ARM_RELOC_SECTDIFF:
6583 case ARM_RELOC_LOCAL_SECTDIFF:
6584 {
6585 if ( ! nextRelocIsPair )
6586 throw "ARM_RELOC_SECTDIFF missing following pair";
6587 if ( sreloc->r_length() != 2 )
6588 throw "bad length for ARM_RELOC_SECTDIFF";
6589 contentValue = LittleEndian::get32(*fixUpPtr);
6590 Atom<arm>* fromAtom = parser.findAtomByAddress(nextRelocValue);
6591 uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
6592 uint32_t offsetInTarget;
6593 Atom<arm>* targetAtom = parser.findAtomByAddressOrLocalTargetOfStub(sreloc->r_value(), &offsetInTarget);
6594 // check for addend encoded in the section content
6595 int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
6596 if ( targetAtom->isThumb() )
6597 addend &= -2; // remove thumb bit
6598 // if reference to LSDA, add group subordinate fixup
6599 if ( targetAtom->contentType() == ld::Atom::typeLSDA ) {
6600 Parser<arm>::SourceLocation src2;
6601 src2.atom = src.atom;
6602 src2.offsetInAtom = 0;
6603 parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, targetAtom);
6604 }
6605 if ( addend < 0 ) {
6606 // switch binding base on coalescing
6607 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6608 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
6609 }
6610 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6611 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6612 }
6613 else {
6614 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6615 }
6616 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, offsetInTarget);
6617 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6618 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
6619 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
6620 }
6621 else {
6622 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6623 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
6624 }
6625 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6626 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6627 }
6628 else {
6629 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6630 }
6631 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)(offsetInTarget+addend));
6632 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6633 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
6634 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
6635 }
6636 }
6637 break;
6638 case ARM_RELOC_HALF_SECTDIFF:
6639 if ( nextRelocIsPair ) {
6640 instruction = LittleEndian::get32(*fixUpPtr);
6641 Atom<arm>* fromAtom = parser.findAtomByAddress(nextRelocValue);
6642 uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
6643 Atom<arm>* targetAtom = parser.findAtomByAddress(sreloc->r_value());
6644 uint32_t offsetInTarget = sreloc->r_value() - targetAtom->_objAddress;
6645 uint32_t instruction16;
6646 uint32_t other16 = (nextRelocAddress & 0xFFFF);
6647 bool isThumb;
6648 if ( sreloc->r_length() & 2 ) {
6649 isThumb = true;
6650 uint32_t i = ((instruction & 0x00000400) >> 10);
6651 uint32_t imm4 = (instruction & 0x0000000F);
6652 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
6653 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
6654 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
6655 }
6656 else {
6657 isThumb = false;
6658 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
6659 uint32_t imm12 = (instruction & 0x00000FFF);
6660 instruction16 = (imm4 << 12) | imm12;
6661 }
6662 if ( sreloc->r_length() & 1 )
6663 dstAddr = ((instruction16 << 16) | other16);
6664 else
6665 dstAddr = (other16 << 16) | instruction16;
6666 if ( targetAtom->isThumb() )
6667 dstAddr &= (-2); // remove thumb bit
6668 int32_t addend = dstAddr - (sreloc->r_value() - nextRelocValue);
6669 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6670 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
6671 }
6672 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6673 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6674 }
6675 else {
6676 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6677 }
6678 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)offsetInTarget+addend);
6679 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6680 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
6681 if ( sreloc->r_length() & 1 ) {
6682 // high 16
6683 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
6684 }
6685 else {
6686 // low 16
6687 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
6688 }
6689 result = true;
6690 }
6691 else
6692 throw "ARM_RELOC_HALF_SECTDIFF reloc missing following pair";
6693 break;
6694 case ARM_RELOC_HALF:
6695 if ( nextRelocIsPair ) {
6696 instruction = LittleEndian::get32(*fixUpPtr);
6697 Atom<arm>* targetAtom = parser.findAtomByAddress(sreloc->r_value());
6698 uint32_t instruction16;
6699 uint32_t other16 = (nextRelocAddress & 0xFFFF);
6700 bool isThumb;
6701 if ( sreloc->r_length() & 2 ) {
6702 isThumb = true;
6703 uint32_t i = ((instruction & 0x00000400) >> 10);
6704 uint32_t imm4 = (instruction & 0x0000000F);
6705 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
6706 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
6707 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
6708 }
6709 else {
6710 isThumb = false;
6711 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
6712 uint32_t imm12 = (instruction & 0x00000FFF);
6713 instruction16 = (imm4 << 12) | imm12;
6714 }
6715 if ( sreloc->r_length() & 1 )
6716 dstAddr = ((instruction16 << 16) | other16);
6717 else
6718 dstAddr = (other16 << 16) | instruction16;
6719 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6720 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, targetAtom);
6721 }
6722 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6723 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6724 }
6725 else {
6726 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6727 }
6728 parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, dstAddr - targetAtom->_objAddress);
6729 if ( sreloc->r_length() & 1 ) {
6730 // high 16
6731 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
6732 }
6733 else {
6734 // low 16
6735 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
6736 }
6737 result = true;
6738 }
6739 else
6740 throw "scattered ARM_RELOC_HALF reloc missing following pair";
6741 break;
6742 default:
6743 throwf("unknown ARM scattered relocation type %d", sreloc->r_type());
6744 }
6745 }
6746 return result;
6747 }
6748 #endif
6749
6750
6751 #if SUPPORT_ARCH_arm64
6752 template <>
6753 bool Section<arm64>::addRelocFixup(class Parser<arm64>& parser, const macho_relocation_info<P>* reloc)
6754 {
6755 bool result = false;
6756 Parser<arm64>::SourceLocation src;
6757 Parser<arm64>::TargetDesc target = { NULL, NULL, false, 0 };
6758 Parser<arm64>::TargetDesc toTarget;
6759 int32_t prefixRelocAddend = 0;
6760 if ( reloc->r_type() == ARM64_RELOC_ADDEND ) {
6761 uint32_t rawAddend = reloc->r_symbolnum();
6762 prefixRelocAddend = rawAddend;
6763 if ( rawAddend & 0x00800000 )
6764 prefixRelocAddend |= 0xFF000000; // sign extend 24-bit signed int to 32-bits
6765 uint32_t addendAddress = reloc->r_address();
6766 ++reloc; //advance to next reloc record
6767 result = true;
6768 if ( reloc->r_address() != addendAddress )
6769 throw "ARM64_RELOC_ADDEND r_address does not match next reloc's r_address";
6770 }
6771 const macho_section<P>* sect = this->machoSection();
6772 uint64_t srcAddr = sect->addr() + reloc->r_address();
6773 src.atom = this->findAtomByAddress(srcAddr);
6774 src.offsetInAtom = srcAddr - src.atom->_objAddress;
6775 const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6776 uint64_t contentValue = 0;
6777 const macho_relocation_info<arm64::P>* nextReloc = &reloc[1];
6778 bool useDirectBinding;
6779 uint32_t instruction;
6780 uint32_t encodedAddend;
6781 switch ( reloc->r_length() ) {
6782 case 0:
6783 contentValue = *fixUpPtr;
6784 break;
6785 case 1:
6786 contentValue = (int64_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
6787 break;
6788 case 2:
6789 contentValue = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
6790 break;
6791 case 3:
6792 contentValue = E::get64(*((uint64_t*)fixUpPtr));
6793 break;
6794 }
6795 if ( reloc->r_extern() ) {
6796 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
6797 const char* symbolName = parser.nameFromSymbol(sym);
6798 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (symbolName[0] == 'L') || (symbolName[0] == 'l')) ) {
6799 // use direct reference for local symbols
6800 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
6801 //target.addend += contentValue;
6802 }
6803 else if ( ((sym.n_type() & N_TYPE) == N_SECT) && (src.atom->_objAddress <= sym.n_value()) && (sym.n_value() < (src.atom->_objAddress+src.atom->size())) ) {
6804 // <rdar://problem/13700961> spurious warning when weak function has reference to itself
6805 // use direct reference when atom targets itself
6806 target.atom = src.atom;
6807 target.name = NULL;
6808 }
6809 else {
6810 target.name = symbolName;
6811 target.weakImport = parser.weakImportFromSymbol(sym);
6812 //target.addend = contentValue;
6813 }
6814 // cfstrings should always use direct reference to backing store
6815 if ( (this->type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
6816 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
6817 //target.addend = contentValue;
6818 }
6819 }
6820 else {
6821 if ( reloc->r_pcrel() )
6822 contentValue += srcAddr;
6823 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
6824 }
6825 switch ( reloc->r_type() ) {
6826 case ARM64_RELOC_UNSIGNED:
6827 if ( reloc->r_pcrel() )
6828 throw "pcrel and ARM64_RELOC_UNSIGNED not supported";
6829 target.addend = contentValue;
6830 switch ( reloc->r_length() ) {
6831 case 0:
6832 case 1:
6833 throw "length < 2 and ARM64_RELOC_UNSIGNED not supported";
6834 case 2:
6835 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
6836 break;
6837 case 3:
6838 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian64, target);
6839 break;
6840 }
6841 break;
6842 case ARM64_RELOC_BRANCH26:
6843 if ( ! reloc->r_pcrel() )
6844 throw "not pcrel and ARM64_RELOC_BRANCH26 not supported";
6845 if ( ! reloc->r_extern() )
6846 throw "r_extern == 0 and ARM64_RELOC_BRANCH26 not supported";
6847 if ( reloc->r_length() != 2 )
6848 throw "r_length != 2 and ARM64_RELOC_BRANCH26 not supported";
6849 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
6850 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreARM64DtraceCallSiteNop, false, target.name);
6851 parser.addDtraceExtraInfos(src, &target.name[16]);
6852 }
6853 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
6854 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreARM64DtraceIsEnableSiteClear, false, target.name);
6855 parser.addDtraceExtraInfos(src, &target.name[20]);
6856 }
6857 else {
6858 target.addend = prefixRelocAddend;
6859 instruction = contentValue;
6860 encodedAddend = (instruction & 0x03FFFFFF) << 2;
6861 if ( encodedAddend != 0 ) {
6862 if ( prefixRelocAddend == 0 ) {
6863 warning("branch26 instruction at 0x%08X has embedded addend. ARM64_RELOC_ADDEND should be used instead", reloc->r_address());
6864 target.addend = encodedAddend;
6865 }
6866 else {
6867 throwf("branch26 instruction at 0x%08X has embedded addend and ARM64_RELOC_ADDEND also used", reloc->r_address());
6868 }
6869 }
6870 parser.addFixups(src, ld::Fixup::kindStoreARM64Branch26, target);
6871 }
6872 break;
6873 case ARM64_RELOC_PAGE21:
6874 if ( ! reloc->r_pcrel() )
6875 throw "not pcrel and ARM64_RELOC_PAGE21 not supported";
6876 if ( ! reloc->r_extern() )
6877 throw "r_extern == 0 and ARM64_RELOC_PAGE21 not supported";
6878 if ( reloc->r_length() != 2 )
6879 throw "length != 2 and ARM64_RELOC_PAGE21 not supported";
6880 target.addend = prefixRelocAddend;
6881 instruction = contentValue;
6882 encodedAddend = ((instruction & 0x60000000) >> 29) | ((instruction & 0x01FFFFE0) >> 3);
6883 encodedAddend *= 4096; // internally addend is in bytes, so scale
6884 if ( encodedAddend != 0 ) {
6885 if ( prefixRelocAddend == 0 ) {
6886 warning("adrp instruction at 0x%08X has embedded addend. ARM64_RELOC_ADDEND should be used instead", reloc->r_address());
6887 target.addend = encodedAddend;
6888 }
6889 else {
6890 throwf("adrp instruction at 0x%08X has embedded addend and ARM64_RELOC_ADDEND also used", reloc->r_address());
6891 }
6892 }
6893 parser.addFixups(src, ld::Fixup::kindStoreARM64Page21, target);
6894 break;
6895 case ARM64_RELOC_PAGEOFF12:
6896 if ( reloc->r_pcrel() )
6897 throw "pcrel and ARM64_RELOC_PAGEOFF12 not supported";
6898 if ( ! reloc->r_extern() )
6899 throw "r_extern == 0 and ARM64_RELOC_PAGEOFF12 not supported";
6900 if ( reloc->r_length() != 2 )
6901 throw "length != 2 and ARM64_RELOC_PAGEOFF12 not supported";
6902 target.addend = prefixRelocAddend;
6903 instruction = contentValue;
6904 encodedAddend = ((instruction & 0x003FFC00) >> 10);
6905 // internally addend is in bytes. Some instructions have an implicit scale factor
6906 if ( (instruction & 0x3B000000) == 0x39000000 ) {
6907 switch ( instruction & 0xC0000000 ) {
6908 case 0x00000000:
6909 break;
6910 case 0x40000000:
6911 encodedAddend *= 2;
6912 break;
6913 case 0x80000000:
6914 encodedAddend *= 4;
6915 break;
6916 case 0xC0000000:
6917 encodedAddend *= 8;
6918 break;
6919 }
6920 }
6921 if ( encodedAddend != 0 ) {
6922 if ( prefixRelocAddend == 0 ) {
6923 warning("pageoff12 instruction at 0x%08X has embedded addend. ARM64_RELOC_ADDEND should be used instead", reloc->r_address());
6924 target.addend = encodedAddend;
6925 }
6926 else {
6927 throwf("pageoff12 instruction at 0x%08X has embedded addend and ARM64_RELOC_ADDEND also used", reloc->r_address());
6928 }
6929 }
6930 parser.addFixups(src, ld::Fixup::kindStoreARM64PageOff12, target);
6931 break;
6932 case ARM64_RELOC_GOT_LOAD_PAGE21:
6933 if ( ! reloc->r_pcrel() )
6934 throw "not pcrel and ARM64_RELOC_GOT_LOAD_PAGE21 not supported";
6935 if ( ! reloc->r_extern() )
6936 throw "r_extern == 0 and ARM64_RELOC_GOT_LOAD_PAGE21 not supported";
6937 if ( reloc->r_length() != 2 )
6938 throw "length != 2 and ARM64_RELOC_GOT_LOAD_PAGE21 not supported";
6939 if ( prefixRelocAddend != 0 )
6940 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_GOT_LOAD_PAGE21 not supported";
6941 instruction = contentValue;
6942 target.addend = ((instruction & 0x60000000) >> 29) | ((instruction & 0x01FFFFE0) >> 3);
6943 if ( target.addend != 0 )
6944 throw "non-zero addend with ARM64_RELOC_GOT_LOAD_PAGE21 is not supported";
6945 parser.addFixups(src, ld::Fixup::kindStoreARM64GOTLoadPage21, target);
6946 break;
6947 case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
6948 if ( reloc->r_pcrel() )
6949 throw "pcrel and ARM64_RELOC_GOT_LOAD_PAGEOFF12 not supported";
6950 if ( ! reloc->r_extern() )
6951 throw "r_extern == 0 and ARM64_RELOC_GOT_LOAD_PAGEOFF12 not supported";
6952 if ( reloc->r_length() != 2 )
6953 throw "length != 2 and ARM64_RELOC_GOT_LOAD_PAGEOFF12 not supported";
6954 if ( prefixRelocAddend != 0 )
6955 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_GOT_LOAD_PAGEOFF12 not supported";
6956 instruction = contentValue;
6957 target.addend = ((instruction & 0x003FFC00) >> 10);
6958 parser.addFixups(src, ld::Fixup::kindStoreARM64GOTLoadPageOff12, target);
6959 break;
6960 case ARM64_RELOC_TLVP_LOAD_PAGE21:
6961 if ( ! reloc->r_pcrel() )
6962 throw "not pcrel and ARM64_RELOC_TLVP_LOAD_PAGE21 not supported";
6963 if ( ! reloc->r_extern() )
6964 throw "r_extern == 0 and ARM64_RELOC_TLVP_LOAD_PAGE21 not supported";
6965 if ( reloc->r_length() != 2 )
6966 throw "length != 2 and ARM64_RELOC_TLVP_LOAD_PAGE21 not supported";
6967 if ( prefixRelocAddend != 0 )
6968 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_TLVP_LOAD_PAGE21 not supported";
6969 instruction = contentValue;
6970 target.addend = ((instruction & 0x60000000) >> 29) | ((instruction & 0x01FFFFE0) >> 3);
6971 if ( target.addend != 0 )
6972 throw "non-zero addend with ARM64_RELOC_GOT_LOAD_PAGE21 is not supported";
6973 parser.addFixups(src, ld::Fixup::kindStoreARM64TLVPLoadPage21, target);
6974 break;
6975 case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
6976 if ( reloc->r_pcrel() )
6977 throw "pcrel and ARM64_RELOC_TLVP_LOAD_PAGEOFF12 not supported";
6978 if ( ! reloc->r_extern() )
6979 throw "r_extern == 0 and ARM64_RELOC_TLVP_LOAD_PAGEOFF12 not supported";
6980 if ( reloc->r_length() != 2 )
6981 throw "length != 2 and ARM64_RELOC_TLVP_LOAD_PAGEOFF12 not supported";
6982 if ( prefixRelocAddend != 0 )
6983 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_TLVP_LOAD_PAGEOFF12 not supported";
6984 instruction = contentValue;
6985 target.addend = ((instruction & 0x003FFC00) >> 10);
6986 parser.addFixups(src, ld::Fixup::kindStoreARM64TLVPLoadPageOff12, target);
6987 break;
6988 case ARM64_RELOC_SUBTRACTOR:
6989 if ( reloc->r_pcrel() )
6990 throw "ARM64_RELOC_SUBTRACTOR cannot be pc-relative";
6991 if ( reloc->r_length() < 2 )
6992 throw "ARM64_RELOC_SUBTRACTOR must have r_length of 2 or 3";
6993 if ( !reloc->r_extern() )
6994 throw "ARM64_RELOC_SUBTRACTOR must have r_extern=1";
6995 if ( nextReloc->r_type() != ARM64_RELOC_UNSIGNED )
6996 throw "ARM64_RELOC_SUBTRACTOR must be followed by ARM64_RELOC_UNSIGNED";
6997 if ( prefixRelocAddend != 0 )
6998 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_SUBTRACTOR not supported";
6999 result = true;
7000 if ( nextReloc->r_pcrel() )
7001 throw "ARM64_RELOC_UNSIGNED following a ARM64_RELOC_SUBTRACTOR cannot be pc-relative";
7002 if ( nextReloc->r_length() != reloc->r_length() )
7003 throw "ARM64_RELOC_UNSIGNED following a ARM64_RELOC_SUBTRACTOR must have same r_length";
7004 if ( nextReloc->r_extern() ) {
7005 const macho_nlist<P>& sym = parser.symbolFromIndex(nextReloc->r_symbolnum());
7006 // use direct reference for local symbols
7007 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
7008 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), toTarget);
7009 toTarget.addend = contentValue;
7010 useDirectBinding = true;
7011 }
7012 else {
7013 toTarget.name = parser.nameFromSymbol(sym);
7014 toTarget.weakImport = parser.weakImportFromSymbol(sym);
7015 toTarget.addend = contentValue;
7016 useDirectBinding = false;
7017 }
7018 }
7019 else {
7020 parser.findTargetFromAddressAndSectionNum(contentValue, nextReloc->r_symbolnum(), toTarget);
7021 useDirectBinding = (toTarget.atom->scope() == ld::Atom::scopeTranslationUnit);
7022 }
7023 if ( useDirectBinding )
7024 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.atom);
7025 else
7026 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.weakImport, toTarget.name);
7027 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, toTarget.addend);
7028 if ( target.atom == NULL )
7029 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, false, target.name);
7030 else
7031 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, target.atom);
7032 if ( reloc->r_length() == 2 )
7033 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
7034 else
7035 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian64);
7036 break;
7037 case ARM64_RELOC_POINTER_TO_GOT:
7038 if ( ! reloc->r_extern() )
7039 throw "r_extern == 0 and ARM64_RELOC_POINTER_TO_GOT not supported";
7040 if ( prefixRelocAddend != 0 )
7041 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_POINTER_TO_GOT not supported";
7042 if ( reloc->r_pcrel() ) {
7043 if ( reloc->r_length() != 2 )
7044 throw "r_length != 2 and r_extern = 1 and ARM64_RELOC_POINTER_TO_GOT not supported";
7045 parser.addFixups(src, ld::Fixup::kindStoreARM64PCRelToGOT, target);
7046 }
7047 else {
7048 if ( reloc->r_length() != 3 )
7049 throw "r_length != 3 and r_extern = 0 and ARM64_RELOC_POINTER_TO_GOT not supported";
7050 parser.addFixups(src, ld::Fixup::kindStoreARM64PointerToGOT, target);
7051 }
7052 break;
7053 default:
7054 throwf("unknown relocation type %d", reloc->r_type());
7055 }
7056 return result;
7057 }
7058 #endif
7059
7060 template <typename A>
7061 bool ObjC1ClassSection<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
7062 {
7063 // inherited
7064 FixedSizeSection<A>::addRelocFixup(parser, reloc);
7065
7066 assert(0 && "needs template specialization");
7067 return false;
7068 }
7069
7070 template <>
7071 bool ObjC1ClassSection<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
7072 {
7073 // if this is the reloc for the super class name string, add implicit reference to super class
7074 if ( ((reloc->r_address() & R_SCATTERED) == 0) && (reloc->r_type() == GENERIC_RELOC_VANILLA) ) {
7075 assert( reloc->r_length() == 2 );
7076 assert( ! reloc->r_pcrel() );
7077
7078 const macho_section<P>* sect = this->machoSection();
7079 Parser<x86>::SourceLocation src;
7080 uint32_t srcAddr = sect->addr() + reloc->r_address();
7081 src.atom = this->findAtomByAddress(srcAddr);
7082 src.offsetInAtom = srcAddr - src.atom->objectAddress();
7083 if ( src.offsetInAtom == 4 ) {
7084 Parser<x86>::TargetDesc stringTarget;
7085 const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
7086 uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
7087 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
7088
7089 assert(stringTarget.atom != NULL);
7090 assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
7091 const char* superClassBaseName = (char*)stringTarget.atom->rawContentPointer();
7092 char* superClassName = new char[strlen(superClassBaseName) + 20];
7093 strcpy(superClassName, ".objc_class_name_");
7094 strcat(superClassName, superClassBaseName);
7095
7096 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, superClassName);
7097 }
7098 }
7099 // inherited
7100 return FixedSizeSection<x86>::addRelocFixup(parser, reloc);
7101 }
7102
7103
7104
7105 template <typename A>
7106 bool Objc1ClassReferences<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
7107 {
7108 // inherited
7109 PointerToCStringSection<A>::addRelocFixup(parser, reloc);
7110
7111 assert(0 && "needs template specialization");
7112 return false;
7113 }
7114
7115
7116
7117 template <>
7118 bool Objc1ClassReferences<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
7119 {
7120 // add implict class refs, fixups not usable yet, so look at relocations
7121 assert( (reloc->r_address() & R_SCATTERED) == 0 );
7122 assert( reloc->r_type() == GENERIC_RELOC_VANILLA );
7123 assert( reloc->r_length() == 2 );
7124 assert( ! reloc->r_pcrel() );
7125
7126 const macho_section<P>* sect = this->machoSection();
7127 Parser<x86>::SourceLocation src;
7128 uint32_t srcAddr = sect->addr() + reloc->r_address();
7129 src.atom = this->findAtomByAddress(srcAddr);
7130 src.offsetInAtom = srcAddr - src.atom->objectAddress();
7131 Parser<x86>::TargetDesc stringTarget;
7132 const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
7133 uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
7134 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
7135
7136 assert(stringTarget.atom != NULL);
7137 assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
7138 const char* baseClassName = (char*)stringTarget.atom->rawContentPointer();
7139 char* objcClassName = new char[strlen(baseClassName) + 20];
7140 strcpy(objcClassName, ".objc_class_name_");
7141 strcat(objcClassName, baseClassName);
7142
7143 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, objcClassName);
7144
7145 // inherited
7146 return PointerToCStringSection<x86>::addRelocFixup(parser, reloc);
7147 }
7148
7149 #if SUPPORT_ARCH_arm64
7150 template <>
7151 void Section<arm64>::addLOH(class Parser<arm64>& parser, int kind, int count, const uint64_t addrs[]) {
7152 switch (kind) {
7153 case LOH_ARM64_ADRP_ADRP:
7154 case LOH_ARM64_ADRP_LDR:
7155 case LOH_ARM64_ADRP_ADD:
7156 case LOH_ARM64_ADRP_LDR_GOT:
7157 if ( count != 2 )
7158 warning("arm64 Linker Optimiztion Hint %d has wrong number of arguments", kind);
7159 break;
7160 case LOH_ARM64_ADRP_ADD_LDR:
7161 case LOH_ARM64_ADRP_LDR_GOT_LDR:
7162 case LOH_ARM64_ADRP_ADD_STR:
7163 case LOH_ARM64_ADRP_LDR_GOT_STR:
7164 if ( count != 3 )
7165 warning("arm64 Linker Optimiztion Hint %d has wrong number of arguments", kind);
7166 }
7167
7168 // pick lowest address in tuple for use as offsetInAtom
7169 uint64_t lowestAddress = addrs[0];
7170 for(int i=1; i < count; ++i) {
7171 if ( addrs[i] < lowestAddress )
7172 lowestAddress = addrs[i];
7173 }
7174 // verify all other address are in same atom
7175 Atom<arm64>* inAtom = parser.findAtomByAddress(lowestAddress);
7176 const uint64_t atomStartAddr = inAtom->objectAddress();
7177 const uint64_t atomEndAddr = atomStartAddr + inAtom->size();
7178 for(int i=0; i < count; ++i) {
7179 if ( (addrs[i] < atomStartAddr) || (addrs[i] >= atomEndAddr) ) {
7180 warning("arm64 Linker Optimiztion Hint addresses are not in same atom: 0x%08llX and 0x%08llX",
7181 lowestAddress, addrs[i]);
7182 return; // skip this LOH
7183 }
7184 if ( (addrs[i] & 0x3) != 0 ) {
7185 warning("arm64 Linker Optimiztion Hint address is not 4-byte aligned: 0x%08llX", addrs[i]);
7186 return; // skip this LOH
7187 }
7188 if ( (addrs[i] - lowestAddress) > 0xFFFF ) {
7189 if ( parser.verboseOptimizationHints() ) {
7190 warning("arm64 Linker Optimiztion Hint addresses are too far apart: 0x%08llX and 0x%08llX",
7191 lowestAddress, addrs[i]);
7192 }
7193 return; // skip this LOH
7194 }
7195 }
7196
7197 // encoded kind, count, and address deltas in 64-bit addend
7198 ld::Fixup::LOH_arm64 extra;
7199 extra.addend = 0;
7200 extra.info.kind = kind;
7201 extra.info.count = count-1;
7202 extra.info.delta1 = (addrs[0] - lowestAddress) >> 2;
7203 extra.info.delta2 = (count > 1) ? ((addrs[1] - lowestAddress) >> 2) : 0;
7204 extra.info.delta3 = (count > 2) ? ((addrs[2] - lowestAddress) >> 2) : 0;
7205 extra.info.delta4 = (count > 3) ? ((addrs[3] - lowestAddress) >> 2) : 0;
7206 typename Parser<arm64>::SourceLocation src(inAtom, lowestAddress- inAtom->objectAddress());
7207 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindLinkerOptimizationHint, extra.addend);
7208 }
7209 #endif
7210
7211 template <typename A>
7212 void Section<A>::addLOH(class Parser<A>& parser, int kind, int count, const uint64_t addrs[]) {
7213
7214 }
7215
7216 template <typename A>
7217 void Section<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
7218 {
7219 const macho_section<P>* sect = this->machoSection();
7220 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + sect->reloff());
7221 const uint32_t relocCount = sect->nreloc();
7222 for (uint32_t r = 0; r < relocCount; ++r) {
7223 try {
7224 if ( this->addRelocFixup(parser, &relocs[r]) )
7225 ++r; // skip next
7226 }
7227 catch (const char* msg) {
7228 throwf("in section %s,%s reloc %u: %s", sect->segname(), Section<A>::makeSectionName(sect), r, msg);
7229 }
7230 }
7231
7232 // add follow-on fixups if .o file is missing .subsections_via_symbols
7233 if ( this->addFollowOnFixups() ) {
7234 Atom<A>* end = &_endAtoms[-1];
7235 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
7236 typename Parser<A>::SourceLocation src(p, 0);
7237 Atom<A>* nextAtom = &p[1];
7238 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
7239 }
7240 }
7241 else if ( this->type() == ld::Section::typeCode ) {
7242 // if FDE broke text not at a symbol, use followOn to keep code together
7243 Atom<A>* end = &_endAtoms[-1];
7244 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
7245 typename Parser<A>::SourceLocation src(p, 0);
7246 Atom<A>* nextAtom = &p[1];
7247 if ( (p->symbolTableInclusion() == ld::Atom::symbolTableIn) && (nextAtom->symbolTableInclusion() == ld::Atom::symbolTableNotIn) ) {
7248 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
7249 }
7250 }
7251 }
7252 if ( !this->_altEntries.empty() && !this->addFollowOnFixups() ) {
7253 if ( _altEntries.count(_beginAtoms) != 0 )
7254 warning("N_ALT_ENTRY bit set on first atom in section %s/%s", sect->segname(), Section<A>::makeSectionName(sect));
7255
7256 Atom<A>* end = &_endAtoms[-1];
7257 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
7258 Atom<A>* nextAtom = &p[1];
7259 if ( _altEntries.count(nextAtom) != 0 ) {
7260 typename Parser<A>::SourceLocation src(p, 0);
7261 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
7262 typename Parser<A>::SourceLocation src2(nextAtom, 0);
7263 parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinate, p);
7264 }
7265 }
7266 }
7267
7268 // <rdar://problem/9218847> track data-in-code
7269 if ( parser.hasDataInCodeLabels() && (this->type() == ld::Section::typeCode) ) {
7270 for (uint32_t i=0; i < parser.symbolCount(); ++i) {
7271 const macho_nlist<P>& sym = parser.symbolFromIndex(i);
7272 // ignore stabs
7273 if ( (sym.n_type() & N_STAB) != 0 )
7274 continue;
7275 // ignore non-definitions
7276 if ( (sym.n_type() & N_TYPE) != N_SECT )
7277 continue;
7278
7279 // 'L' labels do not denote atom breaks
7280 const char* symbolName = parser.nameFromSymbol(sym);
7281 if ( symbolName[0] == 'L' ) {
7282 if ( strncmp(symbolName, "L$start$", 8) == 0 ) {
7283 ld::Fixup::Kind kind = ld::Fixup::kindNone;
7284 if ( strncmp(&symbolName[8], "data$", 5) == 0 )
7285 kind = ld::Fixup::kindDataInCodeStartData;
7286 else if ( strncmp(&symbolName[8], "code$", 5) == 0 )
7287 kind = ld::Fixup::kindDataInCodeEnd;
7288 else if ( strncmp(&symbolName[8], "jt8$", 4) == 0 )
7289 kind = ld::Fixup::kindDataInCodeStartJT8;
7290 else if ( strncmp(&symbolName[8], "jt16$", 4) == 0 )
7291 kind = ld::Fixup::kindDataInCodeStartJT16;
7292 else if ( strncmp(&symbolName[8], "jt32$", 4) == 0 )
7293 kind = ld::Fixup::kindDataInCodeStartJT32;
7294 else if ( strncmp(&symbolName[8], "jta32$", 4) == 0 )
7295 kind = ld::Fixup::kindDataInCodeStartJTA32;
7296 else
7297 warning("unknown L$start$ label %s in file %s", symbolName, this->file().path());
7298 if ( kind != ld::Fixup::kindNone ) {
7299 Atom<A>* inAtom = parser.findAtomByAddress(sym.n_value());
7300 typename Parser<A>::SourceLocation src(inAtom, sym.n_value() - inAtom->objectAddress());
7301 parser.addFixup(src, ld::Fixup::k1of1, kind);
7302 }
7303 }
7304 }
7305 }
7306 }
7307
7308 // <rdar://problem/11150575> Handle LC_DATA_IN_CODE in object files
7309 if ( this->type() == ld::Section::typeCode ) {
7310 const pint_t startAddr = this->_machOSection->addr();
7311 const pint_t endAddr = startAddr + this->_machOSection->size();
7312 for ( const macho_data_in_code_entry<P>* p = parser.dataInCodeStart(); p != parser.dataInCodeEnd(); ++p ) {
7313 if ( (p->offset() >= startAddr) && (p->offset() < endAddr) ) {
7314 ld::Fixup::Kind kind = ld::Fixup::kindNone;
7315 switch ( p->kind() ) {
7316 case DICE_KIND_DATA:
7317 kind = ld::Fixup::kindDataInCodeStartData;
7318 break;
7319 case DICE_KIND_JUMP_TABLE8:
7320 kind = ld::Fixup::kindDataInCodeStartJT8;
7321 break;
7322 case DICE_KIND_JUMP_TABLE16:
7323 kind = ld::Fixup::kindDataInCodeStartJT16;
7324 break;
7325 case DICE_KIND_JUMP_TABLE32:
7326 kind = ld::Fixup::kindDataInCodeStartJT32;
7327 break;
7328 case DICE_KIND_ABS_JUMP_TABLE32:
7329 kind = ld::Fixup::kindDataInCodeStartJTA32;
7330 break;
7331 default:
7332 kind = ld::Fixup::kindDataInCodeStartData;
7333 warning("uknown LC_DATA_IN_CODE kind (%d) at offset 0x%08X", p->kind(), p->offset());
7334 break;
7335 }
7336 Atom<A>* inAtom = parser.findAtomByAddress(p->offset());
7337 typename Parser<A>::SourceLocation srcStart(inAtom, p->offset() - inAtom->objectAddress());
7338 parser.addFixup(srcStart, ld::Fixup::k1of1, kind);
7339 typename Parser<A>::SourceLocation srcEnd(inAtom, p->offset() + p->length() - inAtom->objectAddress());
7340 parser.addFixup(srcEnd, ld::Fixup::k1of1, ld::Fixup::kindDataInCodeEnd);
7341 }
7342 }
7343 }
7344
7345 // <rdar://problem/11945700> convert linker optimization hints into internal format
7346 if ( this->type() == ld::Section::typeCode && parser.hasOptimizationHints() ) {
7347 const pint_t startAddr = this->_machOSection->addr();
7348 const pint_t endAddr = startAddr + this->_machOSection->size();
7349 for (const uint8_t* p = parser.optimizationHintsStart(); p < parser.optimizationHintsEnd(); ) {
7350 uint64_t addrs[4];
7351 int32_t kind = read_uleb128(&p, parser.optimizationHintsEnd());
7352 if ( kind == 0 ) // padding at end of loh buffer
7353 break;
7354 if ( kind == -1 ) {
7355 warning("malformed uleb128 kind in LC_LINKER_OPTIMIZATION_HINTS");
7356 break;
7357 }
7358 int32_t count = read_uleb128(&p, parser.optimizationHintsEnd());
7359 if ( count == -1 ) {
7360 warning("malformed uleb128 count in LC_LINKER_OPTIMIZATION_HINTS");
7361 break;
7362 }
7363 if ( count > 3 ) {
7364 warning("address count > 3 in LC_LINKER_OPTIMIZATION_HINTS");
7365 break;
7366 }
7367 for (int32_t i=0; i < count; ++i) {
7368 addrs[i] = read_uleb128(&p, parser.optimizationHintsEnd());
7369 }
7370 if ( (startAddr <= addrs[0]) && (addrs[0] < endAddr) ) {
7371 this->addLOH(parser, kind, count, addrs);
7372 //fprintf(stderr, "kind=%d", kind);
7373 //for (int32_t i=0; i < count; ++i) {
7374 // fprintf(stderr, ", addr=0x%08llX", addrs[i]);
7375 //}
7376 //fprintf(stderr, "\n");
7377 }
7378 }
7379 }
7380
7381
7382 // add follow-on fixups for aliases
7383 if ( _hasAliases ) {
7384 for(Atom<A>* p = _beginAtoms; p < _endAtoms; ++p) {
7385 if ( p->isAlias() && ! this->addFollowOnFixups() ) {
7386 Atom<A>* targetOfAlias = &p[1];
7387 assert(p < &_endAtoms[-1]);
7388 assert(p->_objAddress == targetOfAlias->_objAddress);
7389 typename Parser<A>::SourceLocation src(p, 0);
7390 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, targetOfAlias);
7391 }
7392 }
7393 }
7394 }
7395
7396
7397
7398 //
7399 // main function used by linker to instantiate ld::Files
7400 //
7401 ld::relocatable::File* parse(const uint8_t* fileContent, uint64_t fileLength,
7402 const char* path, time_t modTime, ld::File::Ordinal ordinal, const ParserOptions& opts)
7403 {
7404 switch ( opts.architecture ) {
7405 #if SUPPORT_ARCH_x86_64
7406 case CPU_TYPE_X86_64:
7407 if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) )
7408 return mach_o::relocatable::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
7409 break;
7410 #endif
7411 #if SUPPORT_ARCH_i386
7412 case CPU_TYPE_I386:
7413 if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) )
7414 return mach_o::relocatable::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
7415 break;
7416 #endif
7417 #if SUPPORT_ARCH_arm_any
7418 case CPU_TYPE_ARM:
7419 if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) )
7420 return mach_o::relocatable::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
7421 break;
7422 #endif
7423 #if SUPPORT_ARCH_arm64
7424 case CPU_TYPE_ARM64:
7425 if ( mach_o::relocatable::Parser<arm64>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) )
7426 return mach_o::relocatable::Parser<arm64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
7427 break;
7428 #endif
7429 }
7430 return NULL;
7431 }
7432
7433 //
7434 // used by archive reader to validate member object file
7435 //
7436 bool isObjectFile(const uint8_t* fileContent, uint64_t fileLength, const ParserOptions& opts)
7437 {
7438 switch ( opts.architecture ) {
7439 case CPU_TYPE_X86_64:
7440 return ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) );
7441 case CPU_TYPE_I386:
7442 return ( mach_o::relocatable::Parser<x86>::validFile(fileContent) );
7443 case CPU_TYPE_ARM:
7444 return ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) );
7445 case CPU_TYPE_ARM64:
7446 return ( mach_o::relocatable::Parser<arm64>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) );
7447 }
7448 return false;
7449 }
7450
7451 //
7452 // used by linker to infer architecture when no -arch is on command line
7453 //
7454 bool isObjectFile(const uint8_t* fileContent, cpu_type_t* result, cpu_subtype_t* subResult)
7455 {
7456 if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
7457 *result = CPU_TYPE_X86_64;
7458 const macho_header<Pointer64<LittleEndian> >* header = (const macho_header<Pointer64<LittleEndian> >*)fileContent;
7459 *subResult = header->cpusubtype();
7460 return true;
7461 }
7462 if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
7463 *result = CPU_TYPE_I386;
7464 *subResult = CPU_SUBTYPE_X86_ALL;
7465 return true;
7466 }
7467 if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
7468 *result = CPU_TYPE_ARM;
7469 const macho_header<Pointer32<LittleEndian> >* header = (const macho_header<Pointer32<LittleEndian> >*)fileContent;
7470 *subResult = header->cpusubtype();
7471 return true;
7472 }
7473 if ( mach_o::relocatable::Parser<arm64>::validFile(fileContent, false, 0) ) {
7474 *result = CPU_TYPE_ARM64;
7475 *subResult = CPU_SUBTYPE_ARM64_ALL;
7476 return true;
7477 }
7478 return false;
7479 }
7480
7481 //
7482 // used by linker is error messages to describe bad .o file
7483 //
7484 const char* archName(const uint8_t* fileContent)
7485 {
7486 if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
7487 return mach_o::relocatable::Parser<x86_64>::fileKind(fileContent);
7488 }
7489 if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
7490 return mach_o::relocatable::Parser<x86>::fileKind(fileContent);
7491 }
7492 if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
7493 return mach_o::relocatable::Parser<arm>::fileKind(fileContent);
7494 }
7495 return NULL;
7496 }
7497
7498 //
7499 // Used by archive reader when -ObjC option is specified
7500 //
7501 bool hasObjC2Categories(const uint8_t* fileContent)
7502 {
7503 if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
7504 return mach_o::relocatable::Parser<x86_64>::hasObjC2Categories(fileContent);
7505 }
7506 else if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
7507 return mach_o::relocatable::Parser<arm>::hasObjC2Categories(fileContent);
7508 }
7509 else if ( mach_o::relocatable::Parser<x86>::validFile(fileContent, false, 0) ) {
7510 return mach_o::relocatable::Parser<x86>::hasObjC2Categories(fileContent);
7511 }
7512 #if SUPPORT_ARCH_arm64
7513 else if ( mach_o::relocatable::Parser<arm64>::validFile(fileContent, false, 0) ) {
7514 return mach_o::relocatable::Parser<arm64>::hasObjC2Categories(fileContent);
7515 }
7516 #endif
7517 return false;
7518 }
7519
7520 //
7521 // Used by archive reader when -ObjC option is specified
7522 //
7523 bool hasObjC1Categories(const uint8_t* fileContent)
7524 {
7525 if ( mach_o::relocatable::Parser<x86>::validFile(fileContent, false, 0) ) {
7526 return mach_o::relocatable::Parser<x86>::hasObjC1Categories(fileContent);
7527 }
7528 return false;
7529 }
7530
7531
7532
7533 } // namespace relocatable
7534 } // namespace mach_o
7535
7536