]> git.saurik.com Git - apple/ld64.git/blob - src/ld/parsers/macho_relocatable_file.cpp
ad5720ed1212e2b3b060c558df673658b49c5eb6
[apple/ld64.git] / src / ld / parsers / macho_relocatable_file.cpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <math.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <sys/param.h>
32 #include <sys/stat.h>
33 #include <sys/mman.h>
34
35 #include "MachOFileAbstraction.hpp"
36
37 #include "libunwind/DwarfInstructions.hpp"
38 #include "libunwind/AddressSpace.hpp"
39 #include "libunwind/Registers.hpp"
40
41 #include <vector>
42 #include <set>
43 #include <map>
44 #include <algorithm>
45
46 #include "dwarf2.h"
47 #include "debugline.h"
48
49 #include "Architectures.hpp"
50 #include "ld.hpp"
51 #include "macho_relocatable_file.h"
52
53
54
55 extern void throwf(const char* format, ...) __attribute__ ((noreturn,format(printf, 1, 2)));
56 extern void warning(const char* format, ...) __attribute__((format(printf, 1, 2)));
57
58 namespace mach_o {
59 namespace relocatable {
60
61
62 // forward reference
63 template <typename A> class Parser;
64 template <typename A> class Atom;
65 template <typename A> class Section;
66 template <typename A> class CFISection;
67 template <typename A> class CUSection;
68
69 template <typename A>
70 class File : public ld::relocatable::File
71 {
72 public:
73 File(const char* p, time_t mTime, const uint8_t* content, ld::File::Ordinal ord) :
74 ld::relocatable::File(p,mTime,ord), _fileContent(content),
75 _sectionsArray(NULL), _atomsArray(NULL),
76 _sectionsArrayCount(0), _atomsArrayCount(0),
77 _debugInfoKind(ld::relocatable::File::kDebugInfoNone),
78 _dwarfTranslationUnitPath(NULL),
79 _dwarfDebugInfoSect(NULL), _dwarfDebugAbbrevSect(NULL),
80 _dwarfDebugLineSect(NULL), _dwarfDebugStringSect(NULL),
81 _objConstraint(ld::File::objcConstraintNone),
82 _cpuSubType(0),
83 _canScatterAtoms(false) {}
84 virtual ~File();
85
86 // overrides of ld::File
87 virtual bool forEachAtom(ld::File::AtomHandler&) const;
88 virtual bool justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const
89 { return false; }
90
91 // overrides of ld::relocatable::File
92 virtual ObjcConstraint objCConstraint() const { return _objConstraint; }
93 virtual uint32_t cpuSubType() const { return _cpuSubType; }
94 virtual DebugInfoKind debugInfo() const { return _debugInfoKind; }
95 virtual const std::vector<ld::relocatable::File::Stab>* stabs() const { return &_stabs; }
96 virtual bool canScatterAtoms() const { return _canScatterAtoms; }
97 virtual const char* translationUnitSource() const;
98 virtual LinkerOptionsList* linkerOptions() const { return &_linkerOptions; }
99
100 const uint8_t* fileContent() { return _fileContent; }
101 private:
102 friend class Atom<A>;
103 friend class Section<A>;
104 friend class Parser<A>;
105 friend class CFISection<A>::OAS;
106
107 typedef typename A::P P;
108
109 const uint8_t* _fileContent;
110 Section<A>** _sectionsArray;
111 uint8_t* _atomsArray;
112 uint32_t _sectionsArrayCount;
113 uint32_t _atomsArrayCount;
114 std::vector<ld::Fixup> _fixups;
115 std::vector<ld::Atom::UnwindInfo> _unwindInfos;
116 std::vector<ld::Atom::LineInfo> _lineInfos;
117 std::vector<ld::relocatable::File::Stab>_stabs;
118 ld::relocatable::File::DebugInfoKind _debugInfoKind;
119 const char* _dwarfTranslationUnitPath;
120 const macho_section<P>* _dwarfDebugInfoSect;
121 const macho_section<P>* _dwarfDebugAbbrevSect;
122 const macho_section<P>* _dwarfDebugLineSect;
123 const macho_section<P>* _dwarfDebugStringSect;
124 ld::File::ObjcConstraint _objConstraint;
125 uint32_t _cpuSubType;
126 bool _canScatterAtoms;
127 std::vector<std::vector<const char*> > _linkerOptions;
128 };
129
130
131 template <typename A>
132 class Section : public ld::Section
133 {
134 public:
135 typedef typename A::P::uint_t pint_t;
136 typedef typename A::P P;
137 typedef typename A::P::E E;
138
139 virtual ~Section() { }
140 class File<A>& file() const { return _file; }
141 const macho_section<P>* machoSection() const { return _machOSection; }
142 uint32_t sectionNum(class Parser<A>&) const;
143 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr);
144 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeUnclassified; }
145 virtual bool dontDeadStrip() { return (this->_machOSection->flags() & S_ATTR_NO_DEAD_STRIP); }
146 virtual Atom<A>* findAtomByAddress(pint_t addr) { return this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms); }
147 virtual bool addFollowOnFixups() const { return ! _file.canScatterAtoms(); }
148 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer,
149 struct Parser<A>::LabelAndCFIBreakIterator& it,
150 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
151 virtual uint32_t computeAtomCount(class Parser<A>& parser,
152 struct Parser<A>::LabelAndCFIBreakIterator& it,
153 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
154 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
155 virtual bool addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
156 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const { return 0; }
157 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
158 const ld::IndirectBindingTable& ind) const { return false; }
159 virtual bool ignoreLabel(const char* label) const { return false; }
160 static const char* makeSectionName(const macho_section<typename A::P>* s);
161
162 protected:
163 Section(File<A>& f, const macho_section<typename A::P>* s)
164 : ld::Section(makeSegmentName(s), makeSectionName(s), sectionType(s)),
165 _file(f), _machOSection(s), _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
166 Section(File<A>& f, const char* segName, const char* sectName, ld::Section::Type t, bool hidden=false)
167 : ld::Section(segName, sectName, t, hidden), _file(f), _machOSection(NULL),
168 _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
169
170
171 Atom<A>* findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end);
172 uint32_t x86_64PcRelOffset(uint8_t r_type);
173 void addLOH(class Parser<A>& parser, int kind, int count, const uint64_t addrs[]);
174 static const char* makeSegmentName(const macho_section<typename A::P>* s);
175 static bool readable(const macho_section<typename A::P>* s);
176 static bool writable(const macho_section<typename A::P>* s);
177 static bool exectuable(const macho_section<typename A::P>* s);
178 static ld::Section::Type sectionType(const macho_section<typename A::P>* s);
179
180 File<A>& _file;
181 const macho_section<P>* _machOSection;
182 class Atom<A>* _beginAtoms;
183 class Atom<A>* _endAtoms;
184 bool _hasAliases;
185 };
186
187
188 template <typename A>
189 class CFISection : public Section<A>
190 {
191 public:
192 CFISection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
193 : Section<A>(f, s) { }
194 uint32_t cfiCount();
195
196 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeCFI; }
197 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
198 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
199 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
200 virtual bool addFollowOnFixups() const { return false; }
201
202
203 ///
204 /// ObjectFileAddressSpace is used as a template parameter to UnwindCursor for parsing
205 /// dwarf CFI information in an object file.
206 ///
207 class OAS
208 {
209 public:
210 typedef typename A::P::uint_t pint_t;
211 typedef typename A::P P;
212 typedef typename A::P::E E;
213 typedef typename A::P::uint_t sint_t;
214
215 OAS(CFISection<A>& ehFrameSection, const uint8_t* ehFrameBuffer) :
216 _ehFrameSection(ehFrameSection),
217 _ehFrameContent(ehFrameBuffer),
218 _ehFrameStartAddr(ehFrameSection.machoSection()->addr()),
219 _ehFrameEndAddr(ehFrameSection.machoSection()->addr()+ehFrameSection.machoSection()->size()) {}
220
221 uint8_t get8(pint_t addr) { return *((uint8_t*)mappedAddress(addr)); }
222 uint16_t get16(pint_t addr) { return E::get16(*((uint16_t*)mappedAddress(addr))); }
223 uint32_t get32(pint_t addr) { return E::get32(*((uint32_t*)mappedAddress(addr))); }
224 uint64_t get64(pint_t addr) { return E::get64(*((uint64_t*)mappedAddress(addr))); }
225 pint_t getP(pint_t addr) { return P::getP(*((pint_t*)mappedAddress(addr))); }
226 uint64_t getULEB128(pint_t& addr, pint_t end);
227 int64_t getSLEB128(pint_t& addr, pint_t end);
228 pint_t getEncodedP(pint_t& addr, pint_t end, uint8_t encoding);
229 private:
230 const void* mappedAddress(pint_t addr);
231
232 CFISection<A>& _ehFrameSection;
233 const uint8_t* _ehFrameContent;
234 pint_t _ehFrameStartAddr;
235 pint_t _ehFrameEndAddr;
236 };
237
238
239 typedef typename A::P::uint_t pint_t;
240 typedef libunwind::CFI_Atom_Info<OAS> CFI_Atom_Info;
241
242 void cfiParse(class Parser<A>& parser, uint8_t* buffer, CFI_Atom_Info cfiArray[], uint32_t& cfiCount, const pint_t cuStarts[], uint32_t cuCount);
243 bool needsRelocating();
244
245 static bool bigEndian();
246 private:
247 void addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo);
248 static void warnFunc(void* ref, uint64_t funcAddr, const char* msg);
249 };
250
251
252 template <typename A>
253 class CUSection : public Section<A>
254 {
255 public:
256 CUSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
257 : Section<A>(f, s) { }
258
259 typedef typename A::P::uint_t pint_t;
260 typedef typename A::P P;
261 typedef typename A::P::E E;
262
263 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
264 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
265 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
266 virtual bool addFollowOnFixups() const { return false; }
267
268 struct Info {
269 pint_t functionStartAddress;
270 uint32_t functionSymbolIndex;
271 uint32_t rangeLength;
272 uint32_t compactUnwindInfo;
273 const char* personality;
274 pint_t lsdaAddress;
275 Atom<A>* function;
276 Atom<A>* lsda;
277 };
278
279 uint32_t count();
280 void parse(class Parser<A>& parser, uint32_t cnt, Info array[]);
281 static bool encodingMeansUseDwarf(compact_unwind_encoding_t enc);
282
283
284 private:
285
286 const char* personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc);
287
288 static int infoSorter(const void* l, const void* r);
289
290 };
291
292
293 template <typename A>
294 class TentativeDefinitionSection : public Section<A>
295 {
296 public:
297 TentativeDefinitionSection(Parser<A>& parser, File<A>& f)
298 : Section<A>(f, "__DATA", "__comm/tent", ld::Section::typeTentativeDefs) {}
299
300 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeZeroFill; }
301 virtual bool addFollowOnFixups() const { return false; }
302 virtual Atom<A>* findAtomByAddress(typename A::P::uint_t addr) { throw "TentativeDefinitionSection::findAtomByAddress() should never be called"; }
303 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
304 const struct Parser<A>::CFI_CU_InfoArrays&);
305 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer,
306 struct Parser<A>::LabelAndCFIBreakIterator& it,
307 const struct Parser<A>::CFI_CU_InfoArrays&);
308 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
309 private:
310 typedef typename A::P::uint_t pint_t;
311 typedef typename A::P P;
312 };
313
314
315 template <typename A>
316 class AbsoluteSymbolSection : public Section<A>
317 {
318 public:
319 AbsoluteSymbolSection(Parser<A>& parser, File<A>& f)
320 : Section<A>(f, "__DATA", "__abs", ld::Section::typeAbsoluteSymbols, true) {}
321
322 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeUnclassified; }
323 virtual bool dontDeadStrip() { return false; }
324 virtual ld::Atom::Alignment alignmentForAddress(typename A::P::uint_t addr) { return ld::Atom::Alignment(0); }
325 virtual bool addFollowOnFixups() const { return false; }
326 virtual Atom<A>* findAtomByAddress(typename A::P::uint_t addr) { throw "AbsoluteSymbolSection::findAtomByAddress() should never be called"; }
327 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
328 const struct Parser<A>::CFI_CU_InfoArrays&);
329 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer,
330 struct Parser<A>::LabelAndCFIBreakIterator& it,
331 const struct Parser<A>::CFI_CU_InfoArrays&);
332 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
333 virtual Atom<A>* findAbsAtomForValue(typename A::P::uint_t);
334
335 private:
336 typedef typename A::P::uint_t pint_t;
337 typedef typename A::P P;
338 };
339
340
341 template <typename A>
342 class SymboledSection : public Section<A>
343 {
344 public:
345 SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s);
346 virtual ld::Atom::ContentType contentType() { return _type; }
347 virtual bool dontDeadStrip();
348 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
349 const struct Parser<A>::CFI_CU_InfoArrays&);
350 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer,
351 struct Parser<A>::LabelAndCFIBreakIterator& it,
352 const struct Parser<A>::CFI_CU_InfoArrays&);
353 protected:
354 typedef typename A::P::uint_t pint_t;
355 typedef typename A::P P;
356
357 ld::Atom::ContentType _type;
358 };
359
360
361 template <typename A>
362 class TLVDefsSection : public SymboledSection<A>
363 {
364 public:
365 TLVDefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s) :
366 SymboledSection<A>(parser, f, s) { }
367
368 private:
369
370 };
371
372
373 template <typename A>
374 class ImplicitSizeSection : public Section<A>
375 {
376 public:
377 ImplicitSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
378 : Section<A>(f, s) { }
379 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
380 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
381 protected:
382 typedef typename A::P::uint_t pint_t;
383 typedef typename A::P P;
384
385 virtual bool addFollowOnFixups() const { return false; }
386 virtual const char* unlabeledAtomName(Parser<A>& parser, pint_t addr) = 0;
387 virtual ld::Atom::SymbolTableInclusion symbolTableInclusion();
388 virtual pint_t elementSizeAtAddress(pint_t addr) = 0;
389 virtual ld::Atom::Scope scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeLinkageUnit; }
390 virtual bool useElementAt(Parser<A>& parser,
391 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr) = 0;
392 virtual ld::Atom::Definition definition() { return ld::Atom::definitionRegular; }
393 virtual ld::Atom::Combine combine(Parser<A>& parser, pint_t addr) = 0;
394 virtual bool ignoreLabel(const char* label) const { return (label[0] == 'L'); }
395 };
396
397
398 template <typename A>
399 class FixedSizeSection : public ImplicitSizeSection<A>
400 {
401 public:
402 FixedSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
403 : ImplicitSizeSection<A>(parser, f, s) { }
404 protected:
405 typedef typename A::P::uint_t pint_t;
406 typedef typename A::P P;
407 typedef typename A::P::E E;
408
409 virtual bool useElementAt(Parser<A>& parser,
410 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
411 { return true; }
412 };
413
414
415 template <typename A>
416 class Literal4Section : public FixedSizeSection<A>
417 {
418 public:
419 Literal4Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
420 : FixedSizeSection<A>(parser, f, s) {}
421 protected:
422 typedef typename A::P::uint_t pint_t;
423 typedef typename A::P P;
424
425 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(2); }
426 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "4-byte-literal"; }
427 virtual pint_t elementSizeAtAddress(pint_t addr) { return 4; }
428 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
429 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
430 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
431 const ld::IndirectBindingTable& ind) const;
432 };
433
434 template <typename A>
435 class Literal8Section : public FixedSizeSection<A>
436 {
437 public:
438 Literal8Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
439 : FixedSizeSection<A>(parser, f, s) {}
440 protected:
441 typedef typename A::P::uint_t pint_t;
442 typedef typename A::P P;
443
444 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(3); }
445 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "8-byte-literal"; }
446 virtual pint_t elementSizeAtAddress(pint_t addr) { return 8; }
447 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
448 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
449 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
450 const ld::IndirectBindingTable& ind) const;
451 };
452
453 template <typename A>
454 class Literal16Section : public FixedSizeSection<A>
455 {
456 public:
457 Literal16Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
458 : FixedSizeSection<A>(parser, f, s) {}
459 protected:
460 typedef typename A::P::uint_t pint_t;
461 typedef typename A::P P;
462
463 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(4); }
464 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "16-byte-literal"; }
465 virtual pint_t elementSizeAtAddress(pint_t addr) { return 16; }
466 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
467 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
468 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
469 const ld::IndirectBindingTable& ind) const;
470 };
471
472
473 template <typename A>
474 class NonLazyPointerSection : public FixedSizeSection<A>
475 {
476 public:
477 NonLazyPointerSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
478 : FixedSizeSection<A>(parser, f, s) {}
479 protected:
480 typedef typename A::P::uint_t pint_t;
481 typedef typename A::P P;
482
483 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
484 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeNonLazyPointer; }
485 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
486 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "non_lazy_ptr"; }
487 virtual pint_t elementSizeAtAddress(pint_t addr) { return sizeof(pint_t); }
488 virtual ld::Atom::Scope scopeAtAddress(Parser<A>& parser, pint_t addr);
489 virtual ld::Atom::Combine combine(Parser<A>&, pint_t);
490 virtual bool ignoreLabel(const char* label) const { return true; }
491 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
492 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
493 const ld::IndirectBindingTable& ind) const;
494
495 private:
496 static const char* targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind);
497 static ld::Fixup::Kind fixupKind();
498 };
499
500
501 template <typename A>
502 class CFStringSection : public FixedSizeSection<A>
503 {
504 public:
505 CFStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
506 : FixedSizeSection<A>(parser, f, s) {}
507 protected:
508 typedef typename A::P::uint_t pint_t;
509
510 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
511 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "CFString"; }
512 virtual pint_t elementSizeAtAddress(pint_t addr) { return 4*sizeof(pint_t); }
513 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndReferences; }
514 virtual bool ignoreLabel(const char* label) const { return true; }
515 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
516 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
517 const ld::IndirectBindingTable& ind) const;
518 private:
519 enum ContentType { contentUTF8, contentUTF16, contentUnknown };
520 static const uint8_t* targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
521 ContentType* ct, unsigned int* count);
522 };
523
524
525 template <typename A>
526 class ObjC1ClassSection : public FixedSizeSection<A>
527 {
528 public:
529 ObjC1ClassSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
530 : FixedSizeSection<A>(parser, f, s) {}
531 protected:
532 typedef typename A::P::uint_t pint_t;
533 typedef typename A::P P;
534 typedef typename A::P::E E;
535
536 virtual ld::Atom::Scope scopeAtAddress(Parser<A>& , pint_t ) { return ld::Atom::scopeGlobal; }
537 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(2); }
538 virtual const char* unlabeledAtomName(Parser<A>&, pint_t);
539 virtual ld::Atom::SymbolTableInclusion symbolTableInclusion() { return ld::Atom::symbolTableIn; }
540 virtual pint_t elementSizeAtAddress(pint_t addr);
541 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineNever; }
542 virtual bool ignoreLabel(const char* label) const { return true; }
543 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
544 { return 0; }
545 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
546 const ld::IndirectBindingTable& ind) const { return false; }
547 virtual bool addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
548 };
549
550
551 template <typename A>
552 class ObjC2ClassRefsSection : public FixedSizeSection<A>
553 {
554 public:
555 ObjC2ClassRefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
556 : FixedSizeSection<A>(parser, f, s) {}
557 protected:
558 typedef typename A::P::uint_t pint_t;
559
560 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
561 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "objc-class-ref"; }
562 virtual pint_t elementSizeAtAddress(pint_t addr) { return sizeof(pint_t); }
563 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndReferences; }
564 virtual bool ignoreLabel(const char* label) const { return true; }
565 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
566 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
567 const ld::IndirectBindingTable& ind) const;
568 private:
569 const char* targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
570 };
571
572
573 template <typename A>
574 class ObjC2CategoryListSection : public FixedSizeSection<A>
575 {
576 public:
577 ObjC2CategoryListSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
578 : FixedSizeSection<A>(parser, f, s) {}
579 protected:
580 typedef typename A::P::uint_t pint_t;
581
582 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
583 virtual ld::Atom::Scope scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeTranslationUnit; }
584 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "objc-cat-list"; }
585 virtual pint_t elementSizeAtAddress(pint_t addr) { return sizeof(pint_t); }
586 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineNever; }
587 virtual bool ignoreLabel(const char* label) const { return true; }
588 private:
589 const char* targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
590 };
591
592
593 template <typename A>
594 class PointerToCStringSection : public FixedSizeSection<A>
595 {
596 public:
597 PointerToCStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
598 : FixedSizeSection<A>(parser, f, s) {}
599 protected:
600 typedef typename A::P::uint_t pint_t;
601
602 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
603 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "pointer-to-literal-cstring"; }
604 virtual pint_t elementSizeAtAddress(pint_t addr) { return sizeof(pint_t); }
605 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndReferences; }
606 virtual bool ignoreLabel(const char* label) const { return true; }
607 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
608 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
609 const ld::IndirectBindingTable& ind) const;
610 virtual const char* targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
611 };
612
613
614 template <typename A>
615 class Objc1ClassReferences : public PointerToCStringSection<A>
616 {
617 public:
618 Objc1ClassReferences(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
619 : PointerToCStringSection<A>(parser, f, s) {}
620
621 typedef typename A::P::uint_t pint_t;
622 typedef typename A::P P;
623
624 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "pointer-to-literal-objc-class-name"; }
625 virtual bool addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
626 virtual const char* targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
627 };
628
629
630 template <typename A>
631 class CStringSection : public ImplicitSizeSection<A>
632 {
633 public:
634 CStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
635 : ImplicitSizeSection<A>(parser, f, s) {}
636 protected:
637 typedef typename A::P::uint_t pint_t;
638 typedef typename A::P P;
639
640 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeCString; }
641 virtual Atom<A>* findAtomByAddress(pint_t addr);
642 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "cstring"; }
643 virtual pint_t elementSizeAtAddress(pint_t addr);
644 virtual bool ignoreLabel(const char* label) const;
645 virtual bool useElementAt(Parser<A>& parser,
646 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr);
647 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
648 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
649 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
650 const ld::IndirectBindingTable& ind) const;
651
652 };
653
654
655 template <typename A>
656 class UTF16StringSection : public SymboledSection<A>
657 {
658 public:
659 UTF16StringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
660 : SymboledSection<A>(parser, f, s) {}
661 protected:
662 typedef typename A::P::uint_t pint_t;
663 typedef typename A::P P;
664
665 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
666 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
667 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
668 const ld::IndirectBindingTable& ind) const;
669 };
670
671
672 //
673 // Atoms in mach-o files
674 //
675 template <typename A>
676 class Atom : public ld::Atom
677 {
678 public:
679 // overrides of ld::Atom
680 virtual const ld::File* file() const;
681 virtual const char* translationUnitSource() const
682 { return sect().file().translationUnitSource(); }
683 virtual const char* name() const { return _name; }
684 virtual uint64_t size() const { return _size; }
685 virtual uint64_t objectAddress() const { return _objAddress; }
686 virtual void copyRawContent(uint8_t buffer[]) const;
687 virtual const uint8_t* rawContentPointer() const { return contentPointer(); }
688 virtual unsigned long contentHash(const ld::IndirectBindingTable& ind) const
689 { if ( _hash == 0 ) _hash = sect().contentHash(this, ind); return _hash; }
690 virtual bool canCoalesceWith(const ld::Atom& rhs, const ld::IndirectBindingTable& ind) const
691 { return sect().canCoalesceWith(this, rhs, ind); }
692 virtual ld::Fixup::iterator fixupsBegin() const { return &machofile()._fixups[_fixupsStartIndex]; }
693 virtual ld::Fixup::iterator fixupsEnd() const { return &machofile()._fixups[_fixupsStartIndex+_fixupsCount]; }
694 virtual ld::Atom::UnwindInfo::iterator beginUnwind() const { return &machofile()._unwindInfos[_unwindInfoStartIndex]; }
695 virtual ld::Atom::UnwindInfo::iterator endUnwind() const { return &machofile()._unwindInfos[_unwindInfoStartIndex+_unwindInfoCount]; }
696 virtual ld::Atom::LineInfo::iterator beginLineInfo() const{ return &machofile()._lineInfos[_lineInfoStartIndex]; }
697 virtual ld::Atom::LineInfo::iterator endLineInfo() const { return &machofile()._lineInfos[_lineInfoStartIndex+_lineInfoCount]; }
698 virtual void setFile(const ld::File* f);
699
700 private:
701
702 enum { kFixupStartIndexBits = 32,
703 kLineInfoStartIndexBits = 32,
704 kUnwindInfoStartIndexBits = 24,
705 kFixupCountBits = 24,
706 kLineInfoCountBits = 12,
707 kUnwindInfoCountBits = 4
708 }; // must sum to 128
709
710 public:
711 // methods for all atoms from mach-o object file
712 Section<A>& sect() const { return (Section<A>&)section(); }
713 File<A>& machofile() const { return ((Section<A>*)(this->_section))->file(); }
714 void setFixupsRange(uint32_t s, uint32_t c);
715 void setUnwindInfoRange(uint32_t s, uint32_t c);
716 void extendUnwindInfoRange();
717 void setLineInfoRange(uint32_t s, uint32_t c);
718 bool roomForMoreLineInfoCount() { return (_lineInfoCount < ((1<<kLineInfoCountBits)-1)); }
719 void incrementLineInfoCount() { assert(roomForMoreLineInfoCount()); ++_lineInfoCount; }
720 void incrementFixupCount() { if (_fixupsCount == ((1 << kFixupCountBits)-1))
721 throwf("too may fixups in %s", name()); ++_fixupsCount; }
722 const uint8_t* contentPointer() const;
723 uint32_t fixupCount() const { return _fixupsCount; }
724 void verifyAlignment(const macho_section<typename A::P>&) const;
725
726 typedef typename A::P P;
727 typedef typename A::P::E E;
728 typedef typename A::P::uint_t pint_t;
729 // constuct via all attributes
730 Atom(Section<A>& sct, const char* nm, pint_t addr, uint64_t sz,
731 ld::Atom::Definition d, ld::Atom::Combine c, ld::Atom::Scope s,
732 ld::Atom::ContentType ct, ld::Atom::SymbolTableInclusion i,
733 bool dds, bool thumb, bool al, ld::Atom::Alignment a)
734 : ld::Atom((ld::Section&)sct, d, c, s, ct, i, dds, thumb, al, a),
735 _size(sz), _objAddress(addr), _name(nm), _hash(0),
736 _fixupsStartIndex(0), _lineInfoStartIndex(0),
737 _unwindInfoStartIndex(0), _fixupsCount(0),
738 _lineInfoCount(0), _unwindInfoCount(0) { }
739 // construct via symbol table entry
740 Atom(Section<A>& sct, Parser<A>& parser, const macho_nlist<P>& sym,
741 uint64_t sz, bool alias=false)
742 : ld::Atom((ld::Section&)sct, parser.definitionFromSymbol(sym),
743 parser.combineFromSymbol(sym), parser.scopeFromSymbol(sym),
744 parser.resolverFromSymbol(sym) ? ld::Atom::typeResolver : sct.contentType(),
745 parser.inclusionFromSymbol(sym),
746 parser.dontDeadStripFromSymbol(sym) || sct.dontDeadStrip(),
747 parser.isThumbFromSymbol(sym), alias,
748 sct.alignmentForAddress(sym.n_value())),
749 _size(sz), _objAddress(sym.n_value()),
750 _name(parser.nameFromSymbol(sym)), _hash(0),
751 _fixupsStartIndex(0), _lineInfoStartIndex(0),
752 _unwindInfoStartIndex(0), _fixupsCount(0),
753 _lineInfoCount(0), _unwindInfoCount(0) {
754 // <rdar://problem/6783167> support auto-hidden weak symbols
755 if ( _scope == ld::Atom::scopeGlobal &&
756 (sym.n_desc() & (N_WEAK_DEF|N_WEAK_REF)) == (N_WEAK_DEF|N_WEAK_REF) )
757 this->setAutoHide();
758 this->verifyAlignment(*sct.machoSection());
759 }
760
761 private:
762 friend class Parser<A>;
763 friend class Section<A>;
764 friend class CStringSection<A>;
765 friend class AbsoluteSymbolSection<A>;
766
767 pint_t _size;
768 pint_t _objAddress;
769 const char* _name;
770 mutable unsigned long _hash;
771
772 uint64_t _fixupsStartIndex : kFixupStartIndexBits,
773 _lineInfoStartIndex : kLineInfoStartIndexBits,
774 _unwindInfoStartIndex : kUnwindInfoStartIndexBits,
775 _fixupsCount : kFixupCountBits,
776 _lineInfoCount : kLineInfoCountBits,
777 _unwindInfoCount : kUnwindInfoCountBits;
778
779 static std::map<const ld::Atom*, const ld::File*> _s_fileOverride;
780 };
781
782 template <typename A>
783 std::map<const ld::Atom*, const ld::File*> Atom<A>::_s_fileOverride;
784
785 template <typename A>
786 void Atom<A>::setFile(const ld::File* f) {
787 _s_fileOverride[this] = f;
788 }
789
790 template <typename A>
791 const ld::File* Atom<A>::file() const
792 {
793 std::map<const ld::Atom*, const ld::File*>::iterator pos = _s_fileOverride.find(this);
794 if ( pos != _s_fileOverride.end() )
795 return pos->second;
796
797 return &sect().file();
798 }
799
800 template <typename A>
801 void Atom<A>::setFixupsRange(uint32_t startIndex, uint32_t count)
802 {
803 if ( count >= (1 << kFixupCountBits) )
804 throwf("too many fixups in function %s", this->name());
805 if ( startIndex >= (1 << kFixupStartIndexBits) )
806 throwf("too many fixups in file");
807 assert(((startIndex+count) <= sect().file()._fixups.size()) && "fixup index out of range");
808 _fixupsStartIndex = startIndex;
809 _fixupsCount = count;
810 }
811
812 template <typename A>
813 void Atom<A>::setUnwindInfoRange(uint32_t startIndex, uint32_t count)
814 {
815 if ( count >= (1 << kUnwindInfoCountBits) )
816 throwf("too many compact unwind infos in function %s", this->name());
817 if ( startIndex >= (1 << kUnwindInfoStartIndexBits) )
818 throwf("too many compact unwind infos (%d) in file", startIndex);
819 assert((startIndex+count) <= sect().file()._unwindInfos.size() && "unwindinfo index out of range");
820 _unwindInfoStartIndex = startIndex;
821 _unwindInfoCount = count;
822 }
823
824 template <typename A>
825 void Atom<A>::extendUnwindInfoRange()
826 {
827 if ( _unwindInfoCount+1 >= (1 << kUnwindInfoCountBits) )
828 throwf("too many compact unwind infos in function %s", this->name());
829 _unwindInfoCount += 1;
830 }
831
832 template <typename A>
833 void Atom<A>::setLineInfoRange(uint32_t startIndex, uint32_t count)
834 {
835 assert((count < (1 << kLineInfoCountBits)) && "too many line infos");
836 assert((startIndex+count) < sect().file()._lineInfos.size() && "line info index out of range");
837 _lineInfoStartIndex = startIndex;
838 _lineInfoCount = count;
839 }
840
841 template <typename A>
842 const uint8_t* Atom<A>::contentPointer() const
843 {
844 const macho_section<P>* sct = this->sect().machoSection();
845 if ( this->_objAddress > sct->addr() + sct->size() )
846 throwf("malformed .o file, symbol has address 0x%0llX which is outside range of its section", (uint64_t)this->_objAddress);
847 uint32_t fileOffset = sct->offset() - sct->addr() + this->_objAddress;
848 return this->sect().file().fileContent()+fileOffset;
849 }
850
851
852 template <typename A>
853 void Atom<A>::copyRawContent(uint8_t buffer[]) const
854 {
855 // copy base bytes
856 if ( this->contentType() == ld::Atom::typeZeroFill ) {
857 bzero(buffer, _size);
858 }
859 else if ( _size != 0 ) {
860 memcpy(buffer, this->contentPointer(), _size);
861 }
862 }
863
864 template <>
865 void Atom<arm>::verifyAlignment(const macho_section<P>&) const
866 {
867 if ( (this->section().type() == ld::Section::typeCode) && ! isThumb() ) {
868 if ( ((_objAddress % 4) != 0) || (this->alignment().powerOf2 < 2) )
869 warning("ARM function not 4-byte aligned: %s from %s", this->name(), this->file()->path());
870 }
871 }
872
873 #if SUPPORT_ARCH_arm64
874 template <>
875 void Atom<arm64>::verifyAlignment(const macho_section<P>& sect) const
876 {
877 if ( (this->section().type() == ld::Section::typeCode) && (sect.size() != 0) ) {
878 if ( ((_objAddress % 4) != 0) || (this->alignment().powerOf2 < 2) )
879 warning("arm64 function not 4-byte aligned: %s from %s", this->name(), this->file()->path());
880 }
881 }
882 #endif
883
884 template <typename A>
885 void Atom<A>::verifyAlignment(const macho_section<P>&) const
886 {
887 }
888
889
890 template <typename A>
891 class Parser
892 {
893 public:
894 static bool validFile(const uint8_t* fileContent, bool subtypeMustMatch=false,
895 cpu_subtype_t subtype=0);
896 static const char* fileKind(const uint8_t* fileContent);
897 static bool hasObjC2Categories(const uint8_t* fileContent);
898 static bool hasObjC1Categories(const uint8_t* fileContent);
899 static ld::relocatable::File* parse(const uint8_t* fileContent, uint64_t fileLength,
900 const char* path, time_t modTime, ld::File::Ordinal ordinal,
901 const ParserOptions& opts) {
902 Parser p(fileContent, fileLength, path, modTime,
903 ordinal, opts.warnUnwindConversionProblems,
904 opts.keepDwarfUnwind, opts.forceDwarfConversion,
905 opts.neverConvertDwarf, opts.verboseOptimizationHints);
906 return p.parse(opts);
907 }
908
909 typedef typename A::P P;
910 typedef typename A::P::E E;
911 typedef typename A::P::uint_t pint_t;
912
913 struct SourceLocation {
914 SourceLocation() {}
915 SourceLocation(Atom<A>* a, uint32_t o) : atom(a), offsetInAtom(o) {}
916 Atom<A>* atom;
917 uint32_t offsetInAtom;
918 };
919
920 struct TargetDesc {
921 Atom<A>* atom;
922 const char* name; // only used if targetAtom is NULL
923 int64_t addend;
924 bool weakImport; // only used if targetAtom is NULL
925 };
926
927 struct FixupInAtom {
928 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) :
929 fixup(src.offsetInAtom, c, k, target), atom(src.atom) { src.atom->incrementFixupCount(); }
930
931 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) :
932 fixup(src.offsetInAtom, c, k, b, target), atom(src.atom) { src.atom->incrementFixupCount(); }
933
934 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) :
935 fixup(src.offsetInAtom, c, k, wi, name), atom(src.atom) { src.atom->incrementFixupCount(); }
936
937 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) :
938 fixup(src.offsetInAtom, c, k, b, name), atom(src.atom) { src.atom->incrementFixupCount(); }
939
940 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) :
941 fixup(src.offsetInAtom, c, k, addend), atom(src.atom) { src.atom->incrementFixupCount(); }
942
943 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) :
944 fixup(src.offsetInAtom, c, k, (uint64_t)0), atom(src.atom) { src.atom->incrementFixupCount(); }
945
946 ld::Fixup fixup;
947 Atom<A>* atom;
948 };
949
950 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) {
951 _allFixups.push_back(FixupInAtom(src, c, k, target));
952 }
953
954 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) {
955 _allFixups.push_back(FixupInAtom(src, c, k, b, target));
956 }
957
958 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) {
959 _allFixups.push_back(FixupInAtom(src, c, k, wi, name));
960 }
961
962 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) {
963 _allFixups.push_back(FixupInAtom(src, c, k, b, name));
964 }
965
966 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) {
967 _allFixups.push_back(FixupInAtom(src, c, k, addend));
968 }
969
970 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) {
971 _allFixups.push_back(FixupInAtom(src, c, k));
972 }
973
974 const char* path() { return _path; }
975 uint32_t symbolCount() { return _symbolCount; }
976 uint32_t indirectSymbol(uint32_t indirectIndex);
977 const macho_nlist<P>& symbolFromIndex(uint32_t index);
978 const char* nameFromSymbol(const macho_nlist<P>& sym);
979 ld::Atom::Scope scopeFromSymbol(const macho_nlist<P>& sym);
980 static ld::Atom::Definition definitionFromSymbol(const macho_nlist<P>& sym);
981 static ld::Atom::Combine combineFromSymbol(const macho_nlist<P>& sym);
982 ld::Atom::SymbolTableInclusion inclusionFromSymbol(const macho_nlist<P>& sym);
983 static bool dontDeadStripFromSymbol(const macho_nlist<P>& sym);
984 static bool isThumbFromSymbol(const macho_nlist<P>& sym);
985 static bool weakImportFromSymbol(const macho_nlist<P>& sym);
986 static bool resolverFromSymbol(const macho_nlist<P>& sym);
987 uint32_t symbolIndexFromIndirectSectionAddress(pint_t,const macho_section<P>*);
988 const macho_section<P>* firstMachOSection() { return _sectionsStart; }
989 const macho_section<P>* machOSectionFromSectionIndex(uint32_t index);
990 uint32_t machOSectionCount() { return _machOSectionsCount; }
991 uint32_t undefinedStartIndex() { return _undefinedStartIndex; }
992 uint32_t undefinedEndIndex() { return _undefinedEndIndex; }
993 void addFixup(FixupInAtom f) { _allFixups.push_back(f); }
994 Section<A>* sectionForNum(unsigned int sectNum);
995 Section<A>* sectionForAddress(pint_t addr);
996 Atom<A>* findAtomByAddress(pint_t addr);
997 Atom<A>* findAtomByAddressOrNullIfStub(pint_t addr);
998 Atom<A>* findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom);
999 Atom<A>* findAtomByName(const char* name); // slow!
1000 void findTargetFromAddress(pint_t addr, TargetDesc& target);
1001 void findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target);
1002 void findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum,
1003 TargetDesc& target);
1004 uint32_t tentativeDefinitionCount() { return _tentativeDefinitionCount; }
1005 uint32_t absoluteSymbolCount() { return _absoluteSymbolCount; }
1006
1007 bool hasStubsSection() { return (_stubsSectionNum != 0); }
1008 unsigned int stubsSectionNum() { return _stubsSectionNum; }
1009 void addDtraceExtraInfos(const SourceLocation& src, const char* provider);
1010 const char* scanSymbolTableForAddress(uint64_t addr);
1011 bool warnUnwindConversionProblems() { return _warnUnwindConversionProblems; }
1012 bool hasDataInCodeLabels() { return _hasDataInCodeLabels; }
1013 bool keepDwarfUnwind() { return _keepDwarfUnwind; }
1014 bool forceDwarfConversion() { return _forceDwarfConversion; }
1015 bool verboseOptimizationHints() { return _verboseOptimizationHints; }
1016 bool neverConvertDwarf() { return _neverConvertDwarf; }
1017
1018 macho_data_in_code_entry<P>* dataInCodeStart() { return _dataInCodeStart; }
1019 macho_data_in_code_entry<P>* dataInCodeEnd() { return _dataInCodeEnd; }
1020 const uint8_t* optimizationHintsStart() { return _lohStart; }
1021 const uint8_t* optimizationHintsEnd() { return _lohEnd; }
1022 bool hasOptimizationHints() { return _lohStart != _lohEnd; }
1023
1024
1025 void addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target);
1026 void addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase);
1027
1028
1029
1030 struct LabelAndCFIBreakIterator {
1031 typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
1032 LabelAndCFIBreakIterator(const uint32_t* ssa, uint32_t ssc, const pint_t* cfisa,
1033 uint32_t cfisc, bool ols)
1034 : sortedSymbolIndexes(ssa), sortedSymbolCount(ssc), cfiStartsArray(cfisa),
1035 cfiStartsCount(cfisc), fileHasOverlappingSymbols(ols),
1036 newSection(false), cfiIndex(0), symIndex(0) {}
1037 bool next(Parser<A>& parser, const Section<A>& sect, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
1038 pint_t* addr, pint_t* size, const macho_nlist<P>** sym);
1039 pint_t peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr);
1040 void beginSection() { newSection = true; symIndex = 0; }
1041
1042 const uint32_t* const sortedSymbolIndexes;
1043 const uint32_t sortedSymbolCount;
1044 const pint_t* cfiStartsArray;
1045 const uint32_t cfiStartsCount;
1046 const bool fileHasOverlappingSymbols;
1047 bool newSection;
1048 uint32_t cfiIndex;
1049 uint32_t symIndex;
1050 };
1051
1052 struct CFI_CU_InfoArrays {
1053 typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
1054 typedef typename CUSection<A>::Info CU_Info;
1055 CFI_CU_InfoArrays(const CFI_Atom_Info* cfiAr, uint32_t cfiC, CU_Info* cuAr, uint32_t cuC)
1056 : cfiArray(cfiAr), cuArray(cuAr), cfiCount(cfiC), cuCount(cuC) {}
1057 const CFI_Atom_Info* const cfiArray;
1058 CU_Info* const cuArray;
1059 const uint32_t cfiCount;
1060 const uint32_t cuCount;
1061 };
1062
1063
1064
1065 private:
1066 friend class Section<A>;
1067
1068 enum SectionType { sectionTypeIgnore, sectionTypeLiteral4, sectionTypeLiteral8, sectionTypeLiteral16,
1069 sectionTypeNonLazy, sectionTypeCFI, sectionTypeCString, sectionTypeCStringPointer,
1070 sectionTypeUTF16Strings, sectionTypeCFString, sectionTypeObjC2ClassRefs, typeObjC2CategoryList,
1071 sectionTypeObjC1Classes, sectionTypeSymboled, sectionTypeObjC1ClassRefs,
1072 sectionTypeTentativeDefinitions, sectionTypeAbsoluteSymbols, sectionTypeTLVDefs,
1073 sectionTypeCompactUnwind };
1074
1075 template <typename P>
1076 struct MachOSectionAndSectionClass
1077 {
1078 const macho_section<P>* sect;
1079 SectionType type;
1080
1081 static int sorter(const void* l, const void* r) {
1082 const MachOSectionAndSectionClass<P>* left = (MachOSectionAndSectionClass<P>*)l;
1083 const MachOSectionAndSectionClass<P>* right = (MachOSectionAndSectionClass<P>*)r;
1084 int64_t diff = left->sect->addr() - right->sect->addr();
1085 if ( diff == 0 )
1086 return 0;
1087 if ( diff < 0 )
1088 return -1;
1089 else
1090 return 1;
1091 }
1092 };
1093
1094 struct ParserAndSectionsArray { Parser* parser; const uint32_t* sortedSectionsArray; };
1095
1096
1097 Parser(const uint8_t* fileContent, uint64_t fileLength,
1098 const char* path, time_t modTime, ld::File::Ordinal ordinal,
1099 bool warnUnwindConversionProblems, bool keepDwarfUnwind,
1100 bool forceDwarfConversion, bool neverConvertDwarf, bool verboseOptimizationHints);
1101 ld::relocatable::File* parse(const ParserOptions& opts);
1102 uint8_t loadCommandSizeMask();
1103 bool parseLoadCommands();
1104 void makeSections();
1105 void prescanSymbolTable();
1106 void makeSortedSymbolsArray(uint32_t symArray[], const uint32_t sectionArray[]);
1107 void makeSortedSectionsArray(uint32_t array[]);
1108 static int pointerSorter(const void* l, const void* r);
1109 static int symbolIndexSorter(void* extra, const void* l, const void* r);
1110 static int sectionIndexSorter(void* extra, const void* l, const void* r);
1111
1112 void parseDebugInfo();
1113 void parseStabs();
1114 static bool isConstFunStabs(const char *stabStr);
1115 bool read_comp_unit(const char ** name, const char ** comp_dir,
1116 uint64_t *stmt_list);
1117 const char* getDwarfString(uint64_t form, const uint8_t* p);
1118 bool skip_form(const uint8_t ** offset, const uint8_t * end,
1119 uint64_t form, uint8_t addr_size, bool dwarf64);
1120
1121
1122 // filled in by constructor
1123 const uint8_t* _fileContent;
1124 uint32_t _fileLength;
1125 const char* _path;
1126 time_t _modTime;
1127 ld::File::Ordinal _ordinal;
1128
1129 // filled in by parseLoadCommands()
1130 File<A>* _file;
1131 const macho_nlist<P>* _symbols;
1132 uint32_t _symbolCount;
1133 const char* _strings;
1134 uint32_t _stringsSize;
1135 const uint32_t* _indirectTable;
1136 uint32_t _indirectTableCount;
1137 uint32_t _undefinedStartIndex;
1138 uint32_t _undefinedEndIndex;
1139 const macho_section<P>* _sectionsStart;
1140 uint32_t _machOSectionsCount;
1141 bool _hasUUID;
1142 macho_data_in_code_entry<P>* _dataInCodeStart;
1143 macho_data_in_code_entry<P>* _dataInCodeEnd;
1144 const uint8_t* _lohStart;
1145 const uint8_t* _lohEnd;
1146
1147 // filled in by parse()
1148 CFISection<A>* _EHFrameSection;
1149 CUSection<A>* _compactUnwindSection;
1150 AbsoluteSymbolSection<A>* _absoluteSection;
1151 uint32_t _tentativeDefinitionCount;
1152 uint32_t _absoluteSymbolCount;
1153 uint32_t _symbolsInSections;
1154 bool _hasLongBranchStubs;
1155 bool _AppleObjc; // FSF has objc that uses different data layout
1156 bool _overlappingSymbols;
1157 bool _warnUnwindConversionProblems;
1158 bool _hasDataInCodeLabels;
1159 bool _keepDwarfUnwind;
1160 bool _forceDwarfConversion;
1161 bool _neverConvertDwarf;
1162 bool _verboseOptimizationHints;
1163 unsigned int _stubsSectionNum;
1164 const macho_section<P>* _stubsMachOSection;
1165 std::vector<const char*> _dtraceProviderInfo;
1166 std::vector<FixupInAtom> _allFixups;
1167 };
1168
1169
1170
1171 template <typename A>
1172 Parser<A>::Parser(const uint8_t* fileContent, uint64_t fileLength, const char* path, time_t modTime,
1173 ld::File::Ordinal ordinal, bool convertDUI, bool keepDwarfUnwind, bool forceDwarfConversion,
1174 bool neverConvertDwarf, bool verboseOptimizationHints)
1175 : _fileContent(fileContent), _fileLength(fileLength), _path(path), _modTime(modTime),
1176 _ordinal(ordinal), _file(NULL),
1177 _symbols(NULL), _symbolCount(0), _strings(NULL), _stringsSize(0),
1178 _indirectTable(NULL), _indirectTableCount(0),
1179 _undefinedStartIndex(0), _undefinedEndIndex(0),
1180 _sectionsStart(NULL), _machOSectionsCount(0), _hasUUID(false),
1181 _dataInCodeStart(NULL), _dataInCodeEnd(NULL),
1182 _lohStart(NULL), _lohEnd(NULL),
1183 _EHFrameSection(NULL), _compactUnwindSection(NULL), _absoluteSection(NULL),
1184 _tentativeDefinitionCount(0), _absoluteSymbolCount(0),
1185 _symbolsInSections(0), _hasLongBranchStubs(false), _AppleObjc(false),
1186 _overlappingSymbols(false), _warnUnwindConversionProblems(convertDUI), _hasDataInCodeLabels(false),
1187 _keepDwarfUnwind(keepDwarfUnwind), _forceDwarfConversion(forceDwarfConversion),
1188 _neverConvertDwarf(neverConvertDwarf),
1189 _verboseOptimizationHints(verboseOptimizationHints),
1190 _stubsSectionNum(0), _stubsMachOSection(NULL)
1191 {
1192 }
1193
1194
1195 template <>
1196 bool Parser<x86>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1197 {
1198 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1199 if ( header->magic() != MH_MAGIC )
1200 return false;
1201 if ( header->cputype() != CPU_TYPE_I386 )
1202 return false;
1203 if ( header->filetype() != MH_OBJECT )
1204 return false;
1205 return true;
1206 }
1207
1208 template <>
1209 bool Parser<x86_64>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1210 {
1211 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1212 if ( header->magic() != MH_MAGIC_64 )
1213 return false;
1214 if ( header->cputype() != CPU_TYPE_X86_64 )
1215 return false;
1216 if ( header->filetype() != MH_OBJECT )
1217 return false;
1218 return true;
1219 }
1220
1221 template <>
1222 bool Parser<arm>::validFile(const uint8_t* fileContent, bool subtypeMustMatch, cpu_subtype_t subtype)
1223 {
1224 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1225 if ( header->magic() != MH_MAGIC )
1226 return false;
1227 if ( header->cputype() != CPU_TYPE_ARM )
1228 return false;
1229 if ( header->filetype() != MH_OBJECT )
1230 return false;
1231 if ( subtypeMustMatch ) {
1232 if ( (cpu_subtype_t)header->cpusubtype() == subtype )
1233 return true;
1234 // hack until libcc_kext.a is made fat
1235 if ( header->cpusubtype() == CPU_SUBTYPE_ARM_ALL )
1236 return true;
1237 return false;
1238 }
1239 return true;
1240 }
1241
1242
1243 template <>
1244 bool Parser<arm64>::validFile(const uint8_t* fileContent, bool subtypeMustMatch, cpu_subtype_t subtype)
1245 {
1246 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1247 if ( header->magic() != MH_MAGIC_64 )
1248 return false;
1249 if ( header->cputype() != CPU_TYPE_ARM64 )
1250 return false;
1251 if ( header->filetype() != MH_OBJECT )
1252 return false;
1253 return true;
1254 }
1255
1256
1257 template <>
1258 const char* Parser<x86>::fileKind(const uint8_t* fileContent)
1259 {
1260 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1261 if ( header->magic() != MH_MAGIC )
1262 return NULL;
1263 if ( header->cputype() != CPU_TYPE_I386 )
1264 return NULL;
1265 return "i386";
1266 }
1267
1268 template <>
1269 const char* Parser<x86_64>::fileKind(const uint8_t* fileContent)
1270 {
1271 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1272 if ( header->magic() != MH_MAGIC )
1273 return NULL;
1274 if ( header->cputype() != CPU_TYPE_X86_64 )
1275 return NULL;
1276 return "x86_64";
1277 }
1278
1279 template <>
1280 const char* Parser<arm>::fileKind(const uint8_t* fileContent)
1281 {
1282 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1283 if ( header->magic() != MH_MAGIC )
1284 return NULL;
1285 if ( header->cputype() != CPU_TYPE_ARM )
1286 return NULL;
1287 for (const ArchInfo* t=archInfoArray; t->archName != NULL; ++t) {
1288 if ( (t->cpuType == CPU_TYPE_ARM) && ((cpu_subtype_t)header->cpusubtype() == t->cpuSubType) ) {
1289 return t->archName;
1290 }
1291 }
1292 return "arm???";
1293 }
1294
1295 #if SUPPORT_ARCH_arm64
1296 template <>
1297 const char* Parser<arm64>::fileKind(const uint8_t* fileContent)
1298 {
1299 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1300 if ( header->magic() != MH_MAGIC )
1301 return NULL;
1302 if ( header->cputype() != CPU_TYPE_ARM64 )
1303 return NULL;
1304 return "arm64";
1305 }
1306 #endif
1307
1308 template <typename A>
1309 bool Parser<A>::hasObjC2Categories(const uint8_t* fileContent)
1310 {
1311 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1312 const uint32_t cmd_count = header->ncmds();
1313 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1314 const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1315 const macho_load_command<P>* cmd = cmds;
1316 for (uint32_t i = 0; i < cmd_count; ++i) {
1317 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1318 const macho_segment_command<P>* segment = (macho_segment_command<P>*)cmd;
1319 const macho_section<P>* sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1320 for (uint32_t si=0; si < segment->nsects(); ++si) {
1321 const macho_section<P>* sect = &sectionsStart[si];
1322 if ( (sect->size() > 0)
1323 && (strcmp(sect->sectname(), "__objc_catlist") == 0)
1324 && (strcmp(sect->segname(), "__DATA") == 0) ) {
1325 return true;
1326 }
1327 }
1328 }
1329 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1330 if ( cmd > cmdsEnd )
1331 throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1332 }
1333 return false;
1334 }
1335
1336
1337 template <typename A>
1338 bool Parser<A>::hasObjC1Categories(const uint8_t* fileContent)
1339 {
1340 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1341 const uint32_t cmd_count = header->ncmds();
1342 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1343 const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1344 const macho_load_command<P>* cmd = cmds;
1345 for (uint32_t i = 0; i < cmd_count; ++i) {
1346 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1347 const macho_segment_command<P>* segment = (macho_segment_command<P>*)cmd;
1348 const macho_section<P>* sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1349 for (uint32_t si=0; si < segment->nsects(); ++si) {
1350 const macho_section<P>* sect = &sectionsStart[si];
1351 if ( (sect->size() > 0)
1352 && (strcmp(sect->sectname(), "__category") == 0)
1353 && (strcmp(sect->segname(), "__OBJC") == 0) ) {
1354 return true;
1355 }
1356 }
1357 }
1358 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1359 if ( cmd > cmdsEnd )
1360 throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1361 }
1362 return false;
1363 }
1364
1365 template <typename A>
1366 int Parser<A>::pointerSorter(const void* l, const void* r)
1367 {
1368 // sort references by address
1369 const pint_t* left = (pint_t*)l;
1370 const pint_t* right = (pint_t*)r;
1371 return (*left - *right);
1372 }
1373
1374 template <typename A>
1375 typename A::P::uint_t Parser<A>::LabelAndCFIBreakIterator::peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr)
1376 {
1377 pint_t symbolAddr;
1378 if ( symIndex < sortedSymbolCount )
1379 symbolAddr = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]).n_value();
1380 else
1381 symbolAddr = endAddr;
1382 pint_t cfiAddr;
1383 if ( cfiIndex < cfiStartsCount )
1384 cfiAddr = cfiStartsArray[cfiIndex];
1385 else
1386 cfiAddr = endAddr;
1387 if ( (cfiAddr < symbolAddr) && (cfiAddr >= startAddr) ) {
1388 if ( cfiAddr < endAddr )
1389 return cfiAddr;
1390 else
1391 return endAddr;
1392 }
1393 else {
1394 if ( symbolAddr < endAddr )
1395 return symbolAddr;
1396 else
1397 return endAddr;
1398 }
1399 }
1400
1401 //
1402 // Parses up a section into chunks based on labels and CFI information.
1403 // Each call returns the next chunk address and size, and (if the break
1404 // was becuase of a label, the symbol). Returns false when no more chunks.
1405 //
1406 template <typename A>
1407 bool Parser<A>::LabelAndCFIBreakIterator::next(Parser<A>& parser, const Section<A>& sect, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
1408 pint_t* addr, pint_t* size, const macho_nlist<P>** symbol)
1409 {
1410 // may not be a label on start of section, but need atom demarcation there
1411 if ( newSection ) {
1412 newSection = false;
1413 // advance symIndex until we get to the first label at or past the start of this section
1414 while ( symIndex < sortedSymbolCount ) {
1415 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1416 if ( ! sect.ignoreLabel(parser.nameFromSymbol(sym)) ) {
1417 pint_t nextSymbolAddr = sym.n_value();
1418 //fprintf(stderr, "sectNum=%d, nextSymbolAddr=0x%08llX, name=%s\n", sectNum, (uint64_t)nextSymbolAddr, parser.nameFromSymbol(sym));
1419 if ( (nextSymbolAddr > startAddr) || ((nextSymbolAddr == startAddr) && (sym.n_sect() == sectNum)) )
1420 break;
1421 }
1422 ++symIndex;
1423 }
1424 if ( symIndex < sortedSymbolCount ) {
1425 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1426 pint_t nextSymbolAddr = sym.n_value();
1427 // if next symbol found is not in this section
1428 if ( sym.n_sect() != sectNum ) {
1429 // check for CFI break instead of symbol break
1430 if ( cfiIndex < cfiStartsCount ) {
1431 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1432 if ( nextCfiAddr < endAddr ) {
1433 // use cfi
1434 ++cfiIndex;
1435 *addr = nextCfiAddr;
1436 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1437 *symbol = NULL;
1438 return true;
1439 }
1440 }
1441 *addr = startAddr;
1442 *size = endAddr - startAddr;
1443 *symbol = NULL;
1444 if ( startAddr == endAddr )
1445 return false; // zero size section
1446 else
1447 return true; // whole section is one atom with no label
1448 }
1449 // if also CFI break here, eat it
1450 if ( cfiIndex < cfiStartsCount ) {
1451 if ( cfiStartsArray[cfiIndex] == nextSymbolAddr )
1452 ++cfiIndex;
1453 }
1454 if ( nextSymbolAddr == startAddr ) {
1455 // label at start of section, return it as chunk
1456 ++symIndex;
1457 *addr = startAddr;
1458 *size = peek(parser, startAddr, endAddr) - startAddr;
1459 *symbol = &sym;
1460 return true;
1461 }
1462 // return chunk before first symbol
1463 *addr = startAddr;
1464 *size = nextSymbolAddr - startAddr;
1465 *symbol = NULL;
1466 return true;
1467 }
1468 // no symbols in section, check CFI
1469 if ( cfiIndex < cfiStartsCount ) {
1470 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1471 if ( nextCfiAddr < endAddr ) {
1472 // use cfi
1473 ++cfiIndex;
1474 *addr = nextCfiAddr;
1475 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1476 *symbol = NULL;
1477 return true;
1478 }
1479 }
1480 // no cfi, so whole section is one chunk
1481 *addr = startAddr;
1482 *size = endAddr - startAddr;
1483 *symbol = NULL;
1484 if ( startAddr == endAddr )
1485 return false; // zero size section
1486 else
1487 return true; // whole section is one atom with no label
1488 }
1489
1490 while ( (symIndex < sortedSymbolCount) && (cfiIndex < cfiStartsCount) ) {
1491 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1492 pint_t nextSymbolAddr = sym.n_value();
1493 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1494 if ( nextSymbolAddr < nextCfiAddr ) {
1495 if ( nextSymbolAddr >= endAddr )
1496 return false;
1497 ++symIndex;
1498 if ( nextSymbolAddr < startAddr )
1499 continue;
1500 *addr = nextSymbolAddr;
1501 *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1502 *symbol = &sym;
1503 return true;
1504 }
1505 else if ( nextCfiAddr < nextSymbolAddr ) {
1506 if ( nextCfiAddr >= endAddr )
1507 return false;
1508 ++cfiIndex;
1509 if ( nextCfiAddr < startAddr )
1510 continue;
1511 *addr = nextCfiAddr;
1512 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1513 *symbol = NULL;
1514 return true;
1515 }
1516 else {
1517 if ( nextCfiAddr >= endAddr )
1518 return false;
1519 ++symIndex;
1520 ++cfiIndex;
1521 if ( nextCfiAddr < startAddr )
1522 continue;
1523 *addr = nextCfiAddr;
1524 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1525 *symbol = &sym;
1526 return true;
1527 }
1528 }
1529 while ( symIndex < sortedSymbolCount ) {
1530 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1531 pint_t nextSymbolAddr = sym.n_value();
1532 // if next symbol found is not in this section, then done with iteration
1533 if ( sym.n_sect() != sectNum )
1534 return false;
1535 ++symIndex;
1536 if ( nextSymbolAddr < startAddr )
1537 continue;
1538 *addr = nextSymbolAddr;
1539 *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1540 *symbol = &sym;
1541 return true;
1542 }
1543 while ( cfiIndex < cfiStartsCount ) {
1544 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1545 if ( nextCfiAddr >= endAddr )
1546 return false;
1547 ++cfiIndex;
1548 if ( nextCfiAddr < startAddr )
1549 continue;
1550 *addr = nextCfiAddr;
1551 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1552 *symbol = NULL;
1553 return true;
1554 }
1555 return false;
1556 }
1557
1558 #define STACK_ALLOC_IF_SMALL(_type, _name, _actual_count, _maxCount) \
1559 _type* _name = NULL; \
1560 uint32_t _name##_count = 1; \
1561 if ( _actual_count > _maxCount ) \
1562 _name = (_type*)malloc(sizeof(_type) * _actual_count); \
1563 else \
1564 _name##_count = _actual_count; \
1565 _type _name##_buffer[_name##_count]; \
1566 if ( _name == NULL ) \
1567 _name = _name##_buffer;
1568
1569
1570 template <typename A>
1571 ld::relocatable::File* Parser<A>::parse(const ParserOptions& opts)
1572 {
1573 // create file object
1574 _file = new File<A>(_path, _modTime, _fileContent, _ordinal);
1575
1576 // respond to -t option
1577 if ( opts.logAllFiles )
1578 printf("%s\n", _path);
1579
1580 // parse start of mach-o file
1581 if ( ! parseLoadCommands() )
1582 return _file;
1583
1584 // make array of
1585 uint32_t sortedSectionIndexes[_machOSectionsCount];
1586 this->makeSortedSectionsArray(sortedSectionIndexes);
1587
1588 // make symbol table sorted by address
1589 this->prescanSymbolTable();
1590 uint32_t sortedSymbolIndexes[_symbolsInSections];
1591 this->makeSortedSymbolsArray(sortedSymbolIndexes, sortedSectionIndexes);
1592
1593 // allocate Section<A> object for each mach-o section
1594 makeSections();
1595
1596 // if it exists, do special early parsing of __compact_unwind section
1597 uint32_t countOfCUs = 0;
1598 if ( _compactUnwindSection != NULL )
1599 countOfCUs = _compactUnwindSection->count();
1600 // stack allocate (if not too large) cuInfoBuffer
1601 STACK_ALLOC_IF_SMALL(typename CUSection<A>::Info, cuInfoArray, countOfCUs, 1024);
1602 if ( countOfCUs != 0 )
1603 _compactUnwindSection->parse(*this, countOfCUs, cuInfoArray);
1604
1605 // create lists of address that already have compact unwind and thus don't need the dwarf parsed
1606 unsigned cuLsdaCount = 0;
1607 pint_t cuStarts[countOfCUs];
1608 for (uint32_t i=0; i < countOfCUs; ++i) {
1609 if ( CUSection<A>::encodingMeansUseDwarf(cuInfoArray[i].compactUnwindInfo) )
1610 cuStarts[i] = -1;
1611 else
1612 cuStarts[i] = cuInfoArray[i].functionStartAddress;
1613 if ( cuInfoArray[i].lsdaAddress != 0 )
1614 ++cuLsdaCount;
1615 }
1616
1617
1618 // if it exists, do special early parsing of __eh_frame section
1619 // stack allocate (if not too large) array of CFI_Atom_Info
1620 uint32_t countOfCFIs = 0;
1621 if ( _EHFrameSection != NULL )
1622 countOfCFIs = _EHFrameSection->cfiCount();
1623 STACK_ALLOC_IF_SMALL(typename CFISection<A>::CFI_Atom_Info, cfiArray, countOfCFIs, 1024);
1624
1625 // stack allocate (if not too large) a copy of __eh_frame to apply relocations to
1626 uint32_t sectSize = 4;
1627 if ( (countOfCFIs != 0) && _EHFrameSection->needsRelocating() )
1628 sectSize = _EHFrameSection->machoSection()->size()+4;
1629 STACK_ALLOC_IF_SMALL(uint8_t, ehBuffer, sectSize, 50*1024);
1630 uint32_t cfiStartsCount = 0;
1631 if ( countOfCFIs != 0 ) {
1632 _EHFrameSection->cfiParse(*this, ehBuffer, cfiArray, countOfCFIs, cuStarts, countOfCUs);
1633 // count functions and lsdas
1634 for(uint32_t i=0; i < countOfCFIs; ++i) {
1635 if ( cfiArray[i].isCIE )
1636 continue;
1637 //fprintf(stderr, "cfiArray[i].func = 0x%08llX, cfiArray[i].lsda = 0x%08llX, encoding=0x%08X\n",
1638 // (uint64_t)cfiArray[i].u.fdeInfo.function.targetAddress,
1639 // (uint64_t)cfiArray[i].u.fdeInfo.lsda.targetAddress,
1640 // cfiArray[i].u.fdeInfo.compactUnwindInfo);
1641 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1642 ++cfiStartsCount;
1643 if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1644 ++cfiStartsCount;
1645 }
1646 }
1647 CFI_CU_InfoArrays cfis(cfiArray, countOfCFIs, cuInfoArray, countOfCUs);
1648
1649 // create sorted array of function starts and lsda starts
1650 pint_t cfiStartsArray[cfiStartsCount+cuLsdaCount];
1651 uint32_t countOfFDEs = 0;
1652 uint32_t cfiStartsArrayCount = 0;
1653 if ( countOfCFIs != 0 ) {
1654 for(uint32_t i=0; i < countOfCFIs; ++i) {
1655 if ( cfiArray[i].isCIE )
1656 continue;
1657 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1658 cfiStartsArray[cfiStartsArrayCount++] = cfiArray[i].u.fdeInfo.function.targetAddress;
1659 if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1660 cfiStartsArray[cfiStartsArrayCount++] = cfiArray[i].u.fdeInfo.lsda.targetAddress;
1661 ++countOfFDEs;
1662 }
1663 }
1664 if ( cuLsdaCount != 0 ) {
1665 // merge in an lsda info from compact unwind
1666 for (uint32_t i=0; i < countOfCUs; ++i) {
1667 if ( cuInfoArray[i].lsdaAddress == 0 )
1668 continue;
1669 // append to cfiStartsArray if not already in that list
1670 bool found = false;
1671 for(uint32_t j=0; j < cfiStartsArrayCount; ++j) {
1672 if ( cfiStartsArray[j] == cuInfoArray[i].lsdaAddress )
1673 found = true;
1674 }
1675 if ( ! found ) {
1676 cfiStartsArray[cfiStartsArrayCount++] = cuInfoArray[i].lsdaAddress;
1677 }
1678 }
1679 }
1680 if ( cfiStartsArrayCount != 0 ) {
1681 ::qsort(cfiStartsArray, cfiStartsArrayCount, sizeof(pint_t), pointerSorter);
1682 #ifndef NDEBUG
1683 // scan for FDEs claming the same function
1684 for(uint32_t i=1; i < cfiStartsArrayCount; ++i) {
1685 assert( cfiStartsArray[i] != cfiStartsArray[i-1] );
1686 }
1687 #endif
1688 }
1689
1690 Section<A>** sections = _file->_sectionsArray;
1691 uint32_t sectionsCount = _file->_sectionsArrayCount;
1692
1693 // figure out how many atoms will be allocated and allocate
1694 LabelAndCFIBreakIterator breakIterator(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1695 cfiStartsArrayCount, _overlappingSymbols);
1696 uint32_t computedAtomCount = 0;
1697 for (uint32_t i=0; i < sectionsCount; ++i ) {
1698 breakIterator.beginSection();
1699 uint32_t count = sections[i]->computeAtomCount(*this, breakIterator, cfis);
1700 //const macho_section<P>* sect = sections[i]->machoSection();
1701 //fprintf(stderr, "computed count=%u for section %s size=%llu\n", count, sect->sectname(), (sect != NULL) ? sect->size() : 0);
1702 computedAtomCount += count;
1703 }
1704 //fprintf(stderr, "allocating %d atoms * sizeof(Atom<A>)=%ld, sizeof(ld::Atom)=%ld\n", computedAtomCount, sizeof(Atom<A>), sizeof(ld::Atom));
1705 _file->_atomsArray = new uint8_t[computedAtomCount*sizeof(Atom<A>)];
1706 _file->_atomsArrayCount = 0;
1707
1708 // have each section append atoms to _atomsArray
1709 LabelAndCFIBreakIterator breakIterator2(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1710 cfiStartsArrayCount, _overlappingSymbols);
1711 for (uint32_t i=0; i < sectionsCount; ++i ) {
1712 uint8_t* atoms = _file->_atomsArray + _file->_atomsArrayCount*sizeof(Atom<A>);
1713 breakIterator2.beginSection();
1714 uint32_t count = sections[i]->appendAtoms(*this, atoms, breakIterator2, cfis);
1715 //fprintf(stderr, "append count=%u for section %s/%s\n", count, sections[i]->machoSection()->segname(), sections[i]->machoSection()->sectname());
1716 _file->_atomsArrayCount += count;
1717 }
1718 assert( _file->_atomsArrayCount == computedAtomCount && "more atoms allocated than expected");
1719
1720
1721 // have each section add all fix-ups for its atoms
1722 _allFixups.reserve(computedAtomCount*5);
1723 for (uint32_t i=0; i < sectionsCount; ++i )
1724 sections[i]->makeFixups(*this, cfis);
1725
1726 // assign fixups start offset for each atom
1727 uint8_t* p = _file->_atomsArray;
1728 uint32_t fixupOffset = 0;
1729 for(int i=_file->_atomsArrayCount; i > 0; --i) {
1730 Atom<A>* atom = (Atom<A>*)p;
1731 atom->_fixupsStartIndex = fixupOffset;
1732 fixupOffset += atom->_fixupsCount;
1733 atom->_fixupsCount = 0;
1734 p += sizeof(Atom<A>);
1735 }
1736 assert(fixupOffset == _allFixups.size());
1737 _file->_fixups.reserve(fixupOffset);
1738
1739 // copy each fixup for each atom
1740 for(typename std::vector<FixupInAtom>::iterator it=_allFixups.begin(); it != _allFixups.end(); ++it) {
1741 uint32_t slot = it->atom->_fixupsStartIndex + it->atom->_fixupsCount;
1742 _file->_fixups[slot] = it->fixup;
1743 it->atom->_fixupsCount++;
1744 }
1745
1746 // done with temp vector
1747 _allFixups.clear();
1748
1749 // add unwind info
1750 _file->_unwindInfos.reserve(countOfFDEs+countOfCUs);
1751 for(uint32_t i=0; i < countOfCFIs; ++i) {
1752 if ( cfiArray[i].isCIE )
1753 continue;
1754 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS ) {
1755 ld::Atom::UnwindInfo info;
1756 info.startOffset = 0;
1757 info.unwindInfo = cfiArray[i].u.fdeInfo.compactUnwindInfo;
1758 _file->_unwindInfos.push_back(info);
1759 Atom<A>* func = findAtomByAddress(cfiArray[i].u.fdeInfo.function.targetAddress);
1760 func->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1761 //fprintf(stderr, "cu from dwarf =0x%08X, atom=%s\n", info.unwindInfo, func->name());
1762 }
1763 }
1764 // apply compact infos in __LD,__compact_unwind section to each function
1765 // if function also has dwarf unwind, CU will override it
1766 Atom<A>* lastFunc = NULL;
1767 uint32_t lastEnd = 0;
1768 for(uint32_t i=0; i < countOfCUs; ++i) {
1769 typename CUSection<A>::Info* info = &cuInfoArray[i];
1770 assert(info->function != NULL);
1771 ld::Atom::UnwindInfo ui;
1772 ui.startOffset = info->functionStartAddress - info->function->objectAddress();
1773 ui.unwindInfo = info->compactUnwindInfo;
1774 _file->_unwindInfos.push_back(ui);
1775 // don't override with converted cu with "use dwarf" cu, if forcing dwarf conversion
1776 if ( !_forceDwarfConversion || !CUSection<A>::encodingMeansUseDwarf(info->compactUnwindInfo) ) {
1777 //fprintf(stderr, "cu=0x%08X, atom=%s\n", ui.unwindInfo, info->function->name());
1778 // if previous is for same function, extend range
1779 if ( info->function == lastFunc ) {
1780 if ( lastEnd != ui.startOffset ) {
1781 if ( lastEnd < ui.startOffset )
1782 warning("__LD,__compact_unwind entries for %s have a gap at offset 0x%0X", info->function->name(), lastEnd);
1783 else
1784 warning("__LD,__compact_unwind entries for %s overlap at offset 0x%0X", info->function->name(), lastEnd);
1785 }
1786 lastFunc->extendUnwindInfoRange();
1787 }
1788 else
1789 info->function->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1790 lastFunc = info->function;
1791 lastEnd = ui.startOffset + info->rangeLength;
1792 }
1793 }
1794
1795 // parse dwarf debug info to get line info
1796 this->parseDebugInfo();
1797
1798 return _file;
1799 }
1800
1801
1802
1803 template <> uint8_t Parser<x86>::loadCommandSizeMask() { return 0x03; }
1804 template <> uint8_t Parser<x86_64>::loadCommandSizeMask() { return 0x07; }
1805 template <> uint8_t Parser<arm>::loadCommandSizeMask() { return 0x03; }
1806 template <> uint8_t Parser<arm64>::loadCommandSizeMask() { return 0x07; }
1807
1808 template <typename A>
1809 bool Parser<A>::parseLoadCommands()
1810 {
1811 const macho_header<P>* header = (const macho_header<P>*)_fileContent;
1812
1813 // set File attributes
1814 _file->_canScatterAtoms = (header->flags() & MH_SUBSECTIONS_VIA_SYMBOLS);
1815 _file->_cpuSubType = header->cpusubtype();
1816
1817 const macho_segment_command<P>* segment = NULL;
1818 const uint8_t* const endOfFile = _fileContent + _fileLength;
1819 const uint32_t cmd_count = header->ncmds();
1820 // <rdar://problem/5394172> an empty .o file with zero load commands will crash linker
1821 if ( cmd_count == 0 )
1822 return false;
1823 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1824 const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1825 const macho_load_command<P>* cmd = cmds;
1826 for (uint32_t i = 0; i < cmd_count; ++i) {
1827 uint32_t size = cmd->cmdsize();
1828 if ( (size & this->loadCommandSizeMask()) != 0 )
1829 throwf("load command #%d has a unaligned size", i);
1830 const uint8_t* endOfCmd = ((uint8_t*)cmd)+cmd->cmdsize();
1831 if ( endOfCmd > (uint8_t*)cmdsEnd )
1832 throwf("load command #%d extends beyond the end of the load commands", i);
1833 if ( endOfCmd > endOfFile )
1834 throwf("load command #%d extends beyond the end of the file", i);
1835 switch (cmd->cmd()) {
1836 case LC_SYMTAB:
1837 {
1838 const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
1839 _symbolCount = symtab->nsyms();
1840 _symbols = (const macho_nlist<P>*)(_fileContent + symtab->symoff());
1841 _strings = (char*)_fileContent + symtab->stroff();
1842 _stringsSize = symtab->strsize();
1843 if ( (symtab->symoff() + _symbolCount*sizeof(macho_nlist<P>)) > _fileLength )
1844 throw "mach-o symbol table extends beyond end of file";
1845 if ( (_strings + _stringsSize) > (char*)endOfFile )
1846 throw "mach-o string pool extends beyond end of file";
1847 if ( _indirectTable == NULL ) {
1848 if ( _undefinedEndIndex == 0 ) {
1849 _undefinedStartIndex = 0;
1850 _undefinedEndIndex = symtab->nsyms();
1851 }
1852 }
1853 }
1854 break;
1855 case LC_DYSYMTAB:
1856 {
1857 const macho_dysymtab_command<P>* dsymtab = (macho_dysymtab_command<P>*)cmd;
1858 _indirectTable = (uint32_t*)(_fileContent + dsymtab->indirectsymoff());
1859 _indirectTableCount = dsymtab->nindirectsyms();
1860 if ( &_indirectTable[_indirectTableCount] > (uint32_t*)endOfFile )
1861 throw "indirect symbol table extends beyond end of file";
1862 _undefinedStartIndex = dsymtab->iundefsym();
1863 _undefinedEndIndex = _undefinedStartIndex + dsymtab->nundefsym();
1864 }
1865 break;
1866 case LC_UUID:
1867 _hasUUID = true;
1868 break;
1869 case LC_DATA_IN_CODE:
1870 {
1871 const macho_linkedit_data_command<P>* dc = (macho_linkedit_data_command<P>*)cmd;
1872 _dataInCodeStart = (macho_data_in_code_entry<P>*)(_fileContent + dc->dataoff());
1873 _dataInCodeEnd = (macho_data_in_code_entry<P>*)(_fileContent + dc->dataoff() + dc->datasize());
1874 if ( _dataInCodeEnd > (macho_data_in_code_entry<P>*)endOfFile )
1875 throw "LC_DATA_IN_CODE table extends beyond end of file";
1876 }
1877 break;
1878 case LC_LINKER_OPTION:
1879 {
1880 const macho_linker_option_command<P>* loc = (macho_linker_option_command<P>*)cmd;
1881 const char* buffer = loc->buffer();
1882 _file->_linkerOptions.resize(_file->_linkerOptions.size() + 1);
1883 std::vector<const char*>& vec = _file->_linkerOptions.back();
1884 for (uint32_t j=0; j < loc->count(); ++j) {
1885 vec.push_back(buffer);
1886 buffer += strlen(buffer) + 1;
1887 }
1888 if ( buffer > ((char*)cmd + loc->cmdsize()) )
1889 throw "malformed LC_LINKER_OPTION";
1890 }
1891 break;
1892 case LC_LINKER_OPTIMIZATION_HINTS:
1893 {
1894 const macho_linkedit_data_command<P>* loh = (macho_linkedit_data_command<P>*)cmd;
1895 _lohStart = _fileContent + loh->dataoff();
1896 _lohEnd = _fileContent + loh->dataoff() + loh->datasize();
1897 if ( _lohEnd > endOfFile )
1898 throw "LC_LINKER_OPTIMIZATION_HINTS table extends beyond end of file";
1899 }
1900 break;
1901 default:
1902 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1903 if ( segment != NULL )
1904 throw "more than one LC_SEGMENT found in object file";
1905 segment = (macho_segment_command<P>*)cmd;
1906 }
1907 break;
1908 }
1909 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1910 if ( cmd > cmdsEnd )
1911 throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1912 }
1913
1914 // record range of sections
1915 if ( segment == NULL )
1916 throw "missing LC_SEGMENT";
1917 _sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1918 _machOSectionsCount = segment->nsects();
1919
1920 return true;
1921 }
1922
1923
1924 template <typename A>
1925 void Parser<A>::prescanSymbolTable()
1926 {
1927 _tentativeDefinitionCount = 0;
1928 _absoluteSymbolCount = 0;
1929 _symbolsInSections = 0;
1930 _hasDataInCodeLabels = false;
1931 for (uint32_t i=0; i < this->_symbolCount; ++i) {
1932 const macho_nlist<P>& sym = symbolFromIndex(i);
1933 // ignore stabs
1934 if ( (sym.n_type() & N_STAB) != 0 )
1935 continue;
1936
1937 // look at undefines
1938 const char* symbolName = this->nameFromSymbol(sym);
1939 if ( (sym.n_type() & N_TYPE) == N_UNDF ) {
1940 if ( sym.n_value() != 0 ) {
1941 // count tentative definitions
1942 ++_tentativeDefinitionCount;
1943 }
1944 else if ( strncmp(symbolName, "___dtrace_", 10) == 0 ) {
1945 // any undefined starting with __dtrace_*$ that is not ___dtrace_probe$* or ___dtrace_isenabled$*
1946 // is extra provider info
1947 if ( (strncmp(&symbolName[10], "probe$", 6) != 0) && (strncmp(&symbolName[10], "isenabled$", 10) != 0) ) {
1948 _dtraceProviderInfo.push_back(symbolName);
1949 }
1950 }
1951 continue;
1952 }
1953
1954 // count absolute symbols
1955 if ( (sym.n_type() & N_TYPE) == N_ABS ) {
1956 const char* absName = this->nameFromSymbol(sym);
1957 // ignore .objc_class_name_* symbols
1958 if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
1959 _AppleObjc = true;
1960 continue;
1961 }
1962 // ignore .objc_class_name_* symbols
1963 if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
1964 continue;
1965 // ignore empty *.eh symbols
1966 if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
1967 continue;
1968 ++_absoluteSymbolCount;
1969 }
1970
1971 // only look at definitions
1972 if ( (sym.n_type() & N_TYPE) != N_SECT )
1973 continue;
1974
1975 // 'L' labels do not denote atom breaks
1976 if ( symbolName[0] == 'L' ) {
1977 // <rdar://problem/9218847> Formalize data in code with L$start$ labels
1978 if ( strncmp(symbolName, "L$start$", 8) == 0 )
1979 _hasDataInCodeLabels = true;
1980 continue;
1981 }
1982 // how many def syms in each section
1983 if ( sym.n_sect() > _machOSectionsCount )
1984 throw "bad n_sect in symbol table";
1985
1986 _symbolsInSections++;
1987 }
1988 }
1989
1990 template <typename A>
1991 int Parser<A>::sectionIndexSorter(void* extra, const void* l, const void* r)
1992 {
1993 Parser<A>* parser = (Parser<A>*)extra;
1994 const uint32_t* left = (uint32_t*)l;
1995 const uint32_t* right = (uint32_t*)r;
1996 const macho_section<P>* leftSect = parser->machOSectionFromSectionIndex(*left);
1997 const macho_section<P>* rightSect = parser->machOSectionFromSectionIndex(*right);
1998
1999 // can't just return difference because 64-bit diff does not fit in 32-bit return type
2000 int64_t result = leftSect->addr() - rightSect->addr();
2001 if ( result == 0 ) {
2002 // two sections with same start address
2003 // one with zero size goes first
2004 bool leftEmpty = ( leftSect->size() == 0 );
2005 bool rightEmpty = ( rightSect->size() == 0 );
2006 if ( leftEmpty != rightEmpty ) {
2007 return ( rightEmpty ? 1 : -1 );
2008 }
2009 if ( !leftEmpty && !rightEmpty )
2010 throwf("overlapping sections");
2011 // both empty, so chose file order
2012 return ( rightSect - leftSect );
2013 }
2014 else if ( result < 0 )
2015 return -1;
2016 else
2017 return 1;
2018 }
2019
2020 template <typename A>
2021 void Parser<A>::makeSortedSectionsArray(uint32_t array[])
2022 {
2023 const bool log = false;
2024
2025 if ( log ) {
2026 fprintf(stderr, "unsorted sections:\n");
2027 for(unsigned int i=0; i < _machOSectionsCount; ++i )
2028 fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[i].addr(), _sectionsStart[i].segname(), _sectionsStart[i].sectname());
2029 }
2030
2031 // sort by symbol table address
2032 for (uint32_t i=0; i < _machOSectionsCount; ++i)
2033 array[i] = i;
2034 ::qsort_r(array, _machOSectionsCount, sizeof(uint32_t), this, &sectionIndexSorter);
2035
2036 if ( log ) {
2037 fprintf(stderr, "sorted sections:\n");
2038 for(unsigned int i=0; i < _machOSectionsCount; ++i )
2039 fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[array[i]].addr(), _sectionsStart[array[i]].segname(), _sectionsStart[array[i]].sectname());
2040 }
2041 }
2042
2043
2044
2045 template <typename A>
2046 int Parser<A>::symbolIndexSorter(void* extra, const void* l, const void* r)
2047 {
2048 ParserAndSectionsArray* extraInfo = (ParserAndSectionsArray*)extra;
2049 Parser<A>* parser = extraInfo->parser;
2050 const uint32_t* sortedSectionsArray = extraInfo->sortedSectionsArray;
2051 const uint32_t* left = (uint32_t*)l;
2052 const uint32_t* right = (uint32_t*)r;
2053 const macho_nlist<P>& leftSym = parser->symbolFromIndex(*left);
2054 const macho_nlist<P>& rightSym = parser->symbolFromIndex(*right);
2055 // can't just return difference because 64-bit diff does not fit in 32-bit return type
2056 int64_t result = leftSym.n_value() - rightSym.n_value();
2057 if ( result == 0 ) {
2058 // two symbols with same address
2059 // if in different sections, sort earlier section first
2060 if ( leftSym.n_sect() != rightSym.n_sect() ) {
2061 for (uint32_t i=0; i < parser->machOSectionCount(); ++i) {
2062 if ( sortedSectionsArray[i]+1 == leftSym.n_sect() )
2063 return -1;
2064 if ( sortedSectionsArray[i]+1 == rightSym.n_sect() )
2065 return 1;
2066 }
2067 }
2068 // two symbols in same section, means one is an alias
2069 // if one is ltmp*, make it an alias (sort first)
2070 const char* leftName = parser->nameFromSymbol(leftSym);
2071 const char* rightName = parser->nameFromSymbol(rightSym);
2072 bool leftIsTmp = strncmp(leftName, "ltmp", 4);
2073 bool rightIsTmp = strncmp(rightName, "ltmp", 4);
2074 if ( leftIsTmp != rightIsTmp ) {
2075 return (rightIsTmp ? -1 : 1);
2076 }
2077
2078 // if only one is global, make the other an alias (sort first)
2079 if ( (leftSym.n_type() & N_EXT) != (rightSym.n_type() & N_EXT) ) {
2080 if ( (rightSym.n_type() & N_EXT) != 0 )
2081 return -1;
2082 else
2083 return 1;
2084 }
2085 // if both are global, sort alphabetically. earlier one will be the alias
2086 return ( strcmp(rightName, leftName) );
2087 }
2088 else if ( result < 0 )
2089 return -1;
2090 else
2091 return 1;
2092 }
2093
2094
2095 template <typename A>
2096 void Parser<A>::makeSortedSymbolsArray(uint32_t array[], const uint32_t sectionArray[])
2097 {
2098 const bool log = false;
2099
2100 uint32_t* p = array;
2101 for (uint32_t i=0; i < this->_symbolCount; ++i) {
2102 const macho_nlist<P>& sym = symbolFromIndex(i);
2103 // ignore stabs
2104 if ( (sym.n_type() & N_STAB) != 0 )
2105 continue;
2106
2107 // only look at definitions
2108 if ( (sym.n_type() & N_TYPE) != N_SECT )
2109 continue;
2110
2111 // 'L' labels do not denote atom breaks
2112 const char* symbolName = this->nameFromSymbol(sym);
2113 if ( symbolName[0] == 'L' )
2114 continue;
2115
2116 // how many def syms in each section
2117 if ( sym.n_sect() > _machOSectionsCount )
2118 throw "bad n_sect in symbol table";
2119
2120 // append to array
2121 *p++ = i;
2122 }
2123 assert(p == &array[_symbolsInSections] && "second pass over symbol table yield a different number of symbols");
2124
2125 // sort by symbol table address
2126 ParserAndSectionsArray extra = { this, sectionArray };
2127 ::qsort_r(array, _symbolsInSections, sizeof(uint32_t), &extra, &symbolIndexSorter);
2128
2129
2130 // look for two symbols at same address
2131 _overlappingSymbols = false;
2132 for (unsigned int i=1; i < _symbolsInSections; ++i) {
2133 if ( symbolFromIndex(array[i-1]).n_value() == symbolFromIndex(array[i]).n_value() ) {
2134 //fprintf(stderr, "overlapping symbols at 0x%08llX\n", symbolFromIndex(array[i-1]).n_value());
2135 _overlappingSymbols = true;
2136 break;
2137 }
2138 }
2139
2140 if ( log ) {
2141 fprintf(stderr, "sorted symbols:\n");
2142 for(unsigned int i=0; i < _symbolsInSections; ++i )
2143 fprintf(stderr, "0x%09llX symIndex=%d sectNum=%2d, %s\n", symbolFromIndex(array[i]).n_value(), array[i], symbolFromIndex(array[i]).n_sect(), nameFromSymbol(symbolFromIndex(array[i])) );
2144 }
2145 }
2146
2147 template <typename A>
2148 void Parser<A>::makeSections()
2149 {
2150 // classify each section by type
2151 // compute how many Section objects will be needed and total size for all
2152 unsigned int totalSectionsSize = 0;
2153 uint8_t machOSectsStorage[sizeof(MachOSectionAndSectionClass<P>)*(_machOSectionsCount+2)]; // also room for tentative-defs and absolute symbols
2154 // allocate raw storage for all section objects on stack
2155 MachOSectionAndSectionClass<P>* machOSects = (MachOSectionAndSectionClass<P>*)machOSectsStorage;
2156 unsigned int count = 0;
2157 for (uint32_t i=0; i < _machOSectionsCount; ++i) {
2158 const macho_section<P>* sect = &_sectionsStart[i];
2159 if ( (sect->flags() & S_ATTR_DEBUG) != 0 ) {
2160 if ( strcmp(sect->segname(), "__DWARF") == 0 ) {
2161 // note that .o file has dwarf
2162 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoDwarf;
2163 // save off iteresting dwarf sections
2164 if ( strcmp(sect->sectname(), "__debug_info") == 0 )
2165 _file->_dwarfDebugInfoSect = sect;
2166 else if ( strcmp(sect->sectname(), "__debug_abbrev") == 0 )
2167 _file->_dwarfDebugAbbrevSect = sect;
2168 else if ( strcmp(sect->sectname(), "__debug_line") == 0 )
2169 _file->_dwarfDebugLineSect = sect;
2170 else if ( strcmp(sect->sectname(), "__debug_str") == 0 )
2171 _file->_dwarfDebugStringSect = sect;
2172 // linker does not propagate dwarf sections to output file
2173 continue;
2174 }
2175 else if ( strcmp(sect->segname(), "__LD") == 0 ) {
2176 if ( strncmp(sect->sectname(), "__compact_unwind", 16) == 0 ) {
2177 machOSects[count].sect = sect;
2178 totalSectionsSize += sizeof(CUSection<A>);
2179 machOSects[count++].type = sectionTypeCompactUnwind;
2180 continue;
2181 }
2182 }
2183 }
2184 // ignore empty __OBJC sections
2185 if ( (sect->size() == 0) && (strcmp(sect->segname(), "__OBJC") == 0) )
2186 continue;
2187 // objc image info section is really attributes and not content
2188 if ( ((strcmp(sect->sectname(), "__image_info") == 0) && (strcmp(sect->segname(), "__OBJC") == 0))
2189 || ((strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0) && (strcmp(sect->segname(), "__DATA") == 0)) ) {
2190 // struct objc_image_info {
2191 // uint32_t version; // initially 0
2192 // uint32_t flags;
2193 // };
2194 // #define OBJC_IMAGE_SUPPORTS_GC 2
2195 // #define OBJC_IMAGE_GC_ONLY 4
2196 // #define OBJC_IMAGE_IS_SIMULATED 32
2197 //
2198 const uint32_t* contents = (uint32_t*)(_file->fileContent()+sect->offset());
2199 if ( (sect->size() >= 8) && (contents[0] == 0) ) {
2200 uint32_t flags = E::get32(contents[1]);
2201 if ( (flags & 4) == 4 )
2202 _file->_objConstraint = ld::File::objcConstraintGC;
2203 else if ( (flags & 2) == 2 )
2204 _file->_objConstraint = ld::File::objcConstraintRetainReleaseOrGC;
2205 else if ( (flags & 32) == 32 )
2206 _file->_objConstraint = ld::File::objcConstraintRetainReleaseForSimulator;
2207 else
2208 _file->_objConstraint = ld::File::objcConstraintRetainRelease;
2209 if ( sect->size() > 8 ) {
2210 warning("section %s/%s has unexpectedly large size %llu in %s",
2211 sect->segname(), Section<A>::makeSectionName(sect), sect->size(), _file->path());
2212 }
2213 }
2214 else {
2215 warning("can't parse %s/%s section in %s", sect->segname(), Section<A>::makeSectionName(sect), _file->path());
2216 }
2217 continue;
2218 }
2219 machOSects[count].sect = sect;
2220 switch ( sect->flags() & SECTION_TYPE ) {
2221 case S_SYMBOL_STUBS:
2222 if ( _stubsSectionNum == 0 ) {
2223 _stubsSectionNum = i+1;
2224 _stubsMachOSection = sect;
2225 }
2226 else
2227 assert(1 && "multiple S_SYMBOL_STUBS sections");
2228 case S_LAZY_SYMBOL_POINTERS:
2229 break;
2230 case S_4BYTE_LITERALS:
2231 totalSectionsSize += sizeof(Literal4Section<A>);
2232 machOSects[count++].type = sectionTypeLiteral4;
2233 break;
2234 case S_8BYTE_LITERALS:
2235 totalSectionsSize += sizeof(Literal8Section<A>);
2236 machOSects[count++].type = sectionTypeLiteral8;
2237 break;
2238 case S_16BYTE_LITERALS:
2239 totalSectionsSize += sizeof(Literal16Section<A>);
2240 machOSects[count++].type = sectionTypeLiteral16;
2241 break;
2242 case S_NON_LAZY_SYMBOL_POINTERS:
2243 totalSectionsSize += sizeof(NonLazyPointerSection<A>);
2244 machOSects[count++].type = sectionTypeNonLazy;
2245 break;
2246 case S_LITERAL_POINTERS:
2247 if ( (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__cls_refs") == 0) ) {
2248 totalSectionsSize += sizeof(Objc1ClassReferences<A>);
2249 machOSects[count++].type = sectionTypeObjC1ClassRefs;
2250 }
2251 else {
2252 totalSectionsSize += sizeof(PointerToCStringSection<A>);
2253 machOSects[count++].type = sectionTypeCStringPointer;
2254 }
2255 break;
2256 case S_CSTRING_LITERALS:
2257 totalSectionsSize += sizeof(CStringSection<A>);
2258 machOSects[count++].type = sectionTypeCString;
2259 break;
2260 case S_MOD_INIT_FUNC_POINTERS:
2261 case S_MOD_TERM_FUNC_POINTERS:
2262 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
2263 case S_INTERPOSING:
2264 case S_ZEROFILL:
2265 case S_REGULAR:
2266 case S_COALESCED:
2267 case S_THREAD_LOCAL_REGULAR:
2268 case S_THREAD_LOCAL_ZEROFILL:
2269 if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__eh_frame") == 0) ) {
2270 totalSectionsSize += sizeof(CFISection<A>);
2271 machOSects[count++].type = sectionTypeCFI;
2272 }
2273 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__cfstring") == 0) ) {
2274 totalSectionsSize += sizeof(CFStringSection<A>);
2275 machOSects[count++].type = sectionTypeCFString;
2276 }
2277 else if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__ustring") == 0) ) {
2278 totalSectionsSize += sizeof(UTF16StringSection<A>);
2279 machOSects[count++].type = sectionTypeUTF16Strings;
2280 }
2281 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strncmp(sect->sectname(), "__objc_classrefs", 16) == 0) ) {
2282 totalSectionsSize += sizeof(ObjC2ClassRefsSection<A>);
2283 machOSects[count++].type = sectionTypeObjC2ClassRefs;
2284 }
2285 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__objc_catlist") == 0) ) {
2286 totalSectionsSize += sizeof(ObjC2CategoryListSection<A>);
2287 machOSects[count++].type = typeObjC2CategoryList;
2288 }
2289 else if ( _AppleObjc && (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__class") == 0) ) {
2290 totalSectionsSize += sizeof(ObjC1ClassSection<A>);
2291 machOSects[count++].type = sectionTypeObjC1Classes;
2292 }
2293 else {
2294 totalSectionsSize += sizeof(SymboledSection<A>);
2295 machOSects[count++].type = sectionTypeSymboled;
2296 }
2297 break;
2298 case S_THREAD_LOCAL_VARIABLES:
2299 totalSectionsSize += sizeof(TLVDefsSection<A>);
2300 machOSects[count++].type = sectionTypeTLVDefs;
2301 break;
2302 case S_THREAD_LOCAL_VARIABLE_POINTERS:
2303 default:
2304 throwf("unknown section type %d", sect->flags() & SECTION_TYPE);
2305 }
2306 }
2307
2308 // sort by address (mach-o object files don't aways have sections sorted)
2309 ::qsort(machOSects, count, sizeof(MachOSectionAndSectionClass<P>), MachOSectionAndSectionClass<P>::sorter);
2310
2311 // we will synthesize a dummy Section<A> object for tentative definitions
2312 if ( _tentativeDefinitionCount > 0 ) {
2313 totalSectionsSize += sizeof(TentativeDefinitionSection<A>);
2314 machOSects[count++].type = sectionTypeTentativeDefinitions;
2315 }
2316
2317 // we will synthesize a dummy Section<A> object for Absolute symbols
2318 if ( _absoluteSymbolCount > 0 ) {
2319 totalSectionsSize += sizeof(AbsoluteSymbolSection<A>);
2320 machOSects[count++].type = sectionTypeAbsoluteSymbols;
2321 }
2322
2323 // allocate one block for all Section objects as well as pointers to each
2324 uint8_t* space = new uint8_t[totalSectionsSize+count*sizeof(Section<A>*)];
2325 _file->_sectionsArray = (Section<A>**)space;
2326 _file->_sectionsArrayCount = count;
2327 Section<A>** objects = _file->_sectionsArray;
2328 space += count*sizeof(Section<A>*);
2329 for (uint32_t i=0; i < count; ++i) {
2330 switch ( machOSects[i].type ) {
2331 case sectionTypeIgnore:
2332 break;
2333 case sectionTypeLiteral4:
2334 *objects++ = new (space) Literal4Section<A>(*this, *_file, machOSects[i].sect);
2335 space += sizeof(Literal4Section<A>);
2336 break;
2337 case sectionTypeLiteral8:
2338 *objects++ = new (space) Literal8Section<A>(*this, *_file, machOSects[i].sect);
2339 space += sizeof(Literal8Section<A>);
2340 break;
2341 case sectionTypeLiteral16:
2342 *objects++ = new (space) Literal16Section<A>(*this, *_file, machOSects[i].sect);
2343 space += sizeof(Literal16Section<A>);
2344 break;
2345 case sectionTypeNonLazy:
2346 *objects++ = new (space) NonLazyPointerSection<A>(*this, *_file, machOSects[i].sect);
2347 space += sizeof(NonLazyPointerSection<A>);
2348 break;
2349 case sectionTypeCFI:
2350 _EHFrameSection = new (space) CFISection<A>(*this, *_file, machOSects[i].sect);
2351 *objects++ = _EHFrameSection;
2352 space += sizeof(CFISection<A>);
2353 break;
2354 case sectionTypeCString:
2355 *objects++ = new (space) CStringSection<A>(*this, *_file, machOSects[i].sect);
2356 space += sizeof(CStringSection<A>);
2357 break;
2358 case sectionTypeCStringPointer:
2359 *objects++ = new (space) PointerToCStringSection<A>(*this, *_file, machOSects[i].sect);
2360 space += sizeof(PointerToCStringSection<A>);
2361 break;
2362 case sectionTypeObjC1ClassRefs:
2363 *objects++ = new (space) Objc1ClassReferences<A>(*this, *_file, machOSects[i].sect);
2364 space += sizeof(Objc1ClassReferences<A>);
2365 break;
2366 case sectionTypeUTF16Strings:
2367 *objects++ = new (space) UTF16StringSection<A>(*this, *_file, machOSects[i].sect);
2368 space += sizeof(UTF16StringSection<A>);
2369 break;
2370 case sectionTypeCFString:
2371 *objects++ = new (space) CFStringSection<A>(*this, *_file, machOSects[i].sect);
2372 space += sizeof(CFStringSection<A>);
2373 break;
2374 case sectionTypeObjC2ClassRefs:
2375 *objects++ = new (space) ObjC2ClassRefsSection<A>(*this, *_file, machOSects[i].sect);
2376 space += sizeof(ObjC2ClassRefsSection<A>);
2377 break;
2378 case typeObjC2CategoryList:
2379 *objects++ = new (space) ObjC2CategoryListSection<A>(*this, *_file, machOSects[i].sect);
2380 space += sizeof(ObjC2CategoryListSection<A>);
2381 break;
2382 case sectionTypeObjC1Classes:
2383 *objects++ = new (space) ObjC1ClassSection<A>(*this, *_file, machOSects[i].sect);
2384 space += sizeof(ObjC1ClassSection<A>);
2385 break;
2386 case sectionTypeSymboled:
2387 *objects++ = new (space) SymboledSection<A>(*this, *_file, machOSects[i].sect);
2388 space += sizeof(SymboledSection<A>);
2389 break;
2390 case sectionTypeTLVDefs:
2391 *objects++ = new (space) TLVDefsSection<A>(*this, *_file, machOSects[i].sect);
2392 space += sizeof(TLVDefsSection<A>);
2393 break;
2394 case sectionTypeCompactUnwind:
2395 _compactUnwindSection = new (space) CUSection<A>(*this, *_file, machOSects[i].sect);
2396 *objects++ = _compactUnwindSection;
2397 space += sizeof(CUSection<A>);
2398 break;
2399 case sectionTypeTentativeDefinitions:
2400 *objects++ = new (space) TentativeDefinitionSection<A>(*this, *_file);
2401 space += sizeof(TentativeDefinitionSection<A>);
2402 break;
2403 case sectionTypeAbsoluteSymbols:
2404 _absoluteSection = new (space) AbsoluteSymbolSection<A>(*this, *_file);
2405 *objects++ = _absoluteSection;
2406 space += sizeof(AbsoluteSymbolSection<A>);
2407 break;
2408 default:
2409 throw "internal error uknown SectionType";
2410 }
2411 }
2412 }
2413
2414
2415 template <typename A>
2416 Section<A>* Parser<A>::sectionForAddress(typename A::P::uint_t addr)
2417 {
2418 for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2419 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2420 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2421 if ( sect != NULL ) {
2422 if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
2423 return _file->_sectionsArray[i];
2424 }
2425 }
2426 }
2427 // not strictly in any section
2428 // may be in a zero length section
2429 for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2430 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2431 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2432 if ( sect != NULL ) {
2433 if ( (sect->addr() == addr) && (sect->size() == 0) ) {
2434 return _file->_sectionsArray[i];
2435 }
2436 }
2437 }
2438
2439 throwf("sectionForAddress(0x%llX) address not in any section", (uint64_t)addr);
2440 }
2441
2442 template <typename A>
2443 Section<A>* Parser<A>::sectionForNum(unsigned int num)
2444 {
2445 for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2446 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2447 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2448 if ( sect != NULL ) {
2449 if ( num == (unsigned int)((sect - _sectionsStart)+1) )
2450 return _file->_sectionsArray[i];
2451 }
2452 }
2453 throwf("sectionForNum(%u) section number not for any section", num);
2454 }
2455
2456 template <typename A>
2457 Atom<A>* Parser<A>::findAtomByAddress(pint_t addr)
2458 {
2459 Section<A>* section = this->sectionForAddress(addr);
2460 return section->findAtomByAddress(addr);
2461 }
2462
2463 template <typename A>
2464 Atom<A>* Parser<A>::findAtomByAddressOrNullIfStub(pint_t addr)
2465 {
2466 if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) )
2467 return NULL;
2468 return findAtomByAddress(addr);
2469 }
2470
2471 template <typename A>
2472 Atom<A>* Parser<A>::findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom)
2473 {
2474 if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2475 // target is a stub, remove indirection
2476 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2477 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2478 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2479 // can't be to external weak symbol
2480 assert( (this->combineFromSymbol(sym) != ld::Atom::combineByName) || (this->scopeFromSymbol(sym) != ld::Atom::scopeGlobal) );
2481 *offsetInAtom = 0;
2482 return this->findAtomByName(this->nameFromSymbol(sym));
2483 }
2484 Atom<A>* target = this->findAtomByAddress(addr);
2485 *offsetInAtom = addr - target->_objAddress;
2486 return target;
2487 }
2488
2489 template <typename A>
2490 Atom<A>* Parser<A>::findAtomByName(const char* name)
2491 {
2492 uint8_t* p = _file->_atomsArray;
2493 for(int i=_file->_atomsArrayCount; i > 0; --i) {
2494 Atom<A>* atom = (Atom<A>*)p;
2495 if ( strcmp(name, atom->name()) == 0 )
2496 return atom;
2497 p += sizeof(Atom<A>);
2498 }
2499 return NULL;
2500 }
2501
2502 template <typename A>
2503 void Parser<A>::findTargetFromAddress(pint_t addr, TargetDesc& target)
2504 {
2505 if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2506 // target is a stub, remove indirection
2507 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2508 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2509 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2510 target.atom = NULL;
2511 target.name = this->nameFromSymbol(sym);
2512 target.weakImport = this->weakImportFromSymbol(sym);
2513 target.addend = 0;
2514 return;
2515 }
2516 Section<A>* section = this->sectionForAddress(addr);
2517 target.atom = section->findAtomByAddress(addr);
2518 target.addend = addr - target.atom->_objAddress;
2519 target.weakImport = false;
2520 target.name = NULL;
2521 }
2522
2523 template <typename A>
2524 void Parser<A>::findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target)
2525 {
2526 findTargetFromAddress(baseAddr, target);
2527 target.addend = addr - target.atom->_objAddress;
2528 }
2529
2530 template <typename A>
2531 void Parser<A>::findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum, TargetDesc& target)
2532 {
2533 if ( sectNum == R_ABS ) {
2534 // target is absolute symbol that corresponds to addr
2535 if ( _absoluteSection != NULL ) {
2536 target.atom = _absoluteSection->findAbsAtomForValue(addr);
2537 if ( target.atom != NULL ) {
2538 target.name = NULL;
2539 target.weakImport = false;
2540 target.addend = 0;
2541 return;
2542 }
2543 }
2544 throwf("R_ABS reloc but no absolute symbol at target address");
2545 }
2546
2547 if ( hasStubsSection() && (stubsSectionNum() == sectNum) ) {
2548 // target is a stub, remove indirection
2549 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2550 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2551 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2552 // use direct reference when stub is to a static function
2553 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (this->nameFromSymbol(sym)[0] == 'L')) ) {
2554 this->findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
2555 }
2556 else {
2557 target.atom = NULL;
2558 target.name = this->nameFromSymbol(sym);
2559 target.weakImport = this->weakImportFromSymbol(sym);
2560 target.addend = 0;
2561 }
2562 return;
2563 }
2564 Section<A>* section = this->sectionForNum(sectNum);
2565 target.atom = section->findAtomByAddress(addr);
2566 if ( target.atom == NULL ) {
2567 typedef typename A::P::sint_t sint_t;
2568 sint_t a = (sint_t)addr;
2569 sint_t sectStart = (sint_t)(section->machoSection()->addr());
2570 sint_t sectEnd = sectStart + section->machoSection()->size();
2571 if ( a < sectStart ) {
2572 // target address is before start of section, so must be negative addend
2573 target.atom = section->findAtomByAddress(sectStart);
2574 target.addend = a - sectStart;
2575 target.weakImport = false;
2576 target.name = NULL;
2577 return;
2578 }
2579 else if ( a >= sectEnd ) {
2580 target.atom = section->findAtomByAddress(sectEnd-1);
2581 target.addend = a - sectEnd;
2582 target.weakImport = false;
2583 target.name = NULL;
2584 return;
2585 }
2586 }
2587 assert(target.atom != NULL);
2588 target.addend = addr - target.atom->_objAddress;
2589 target.weakImport = false;
2590 target.name = NULL;
2591 }
2592
2593 template <typename A>
2594 void Parser<A>::addDtraceExtraInfos(const SourceLocation& src, const char* providerName)
2595 {
2596 // for every ___dtrace_stability$* and ___dtrace_typedefs$* undefine with
2597 // a matching provider name, add a by-name kDtraceTypeReference at probe site
2598 const char* dollar = strchr(providerName, '$');
2599 if ( dollar != NULL ) {
2600 int providerNameLen = dollar-providerName+1;
2601 for ( std::vector<const char*>::iterator it = _dtraceProviderInfo.begin(); it != _dtraceProviderInfo.end(); ++it) {
2602 const char* typeDollar = strchr(*it, '$');
2603 if ( typeDollar != NULL ) {
2604 if ( strncmp(typeDollar+1, providerName, providerNameLen) == 0 ) {
2605 addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindDtraceExtra,false, *it);
2606 }
2607 }
2608 }
2609 }
2610 }
2611
2612 template <typename A>
2613 const char* Parser<A>::scanSymbolTableForAddress(uint64_t addr)
2614 {
2615 uint64_t closestSymAddr = 0;
2616 const char* closestSymName = NULL;
2617 for (uint32_t i=0; i < this->_symbolCount; ++i) {
2618 const macho_nlist<P>& sym = symbolFromIndex(i);
2619 // ignore stabs
2620 if ( (sym.n_type() & N_STAB) != 0 )
2621 continue;
2622
2623 // only look at definitions
2624 if ( (sym.n_type() & N_TYPE) != N_SECT )
2625 continue;
2626
2627 // return with exact match
2628 if ( sym.n_value() == addr ) {
2629 const char* name = nameFromSymbol(sym);
2630 if ( strncmp(name, "ltmp", 4) != 0 )
2631 return name;
2632 // treat 'ltmp*' labels as close match
2633 closestSymAddr = sym.n_value();
2634 closestSymName = name;
2635 }
2636
2637 // record closest seen so far
2638 if ( (sym.n_value() < addr) && ((sym.n_value() > closestSymAddr) || (closestSymName == NULL)) )
2639 closestSymName = nameFromSymbol(sym);
2640 }
2641
2642 return (closestSymName != NULL) ? closestSymName : "unknown";
2643 }
2644
2645
2646 template <typename A>
2647 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind setKind, const TargetDesc& target)
2648 {
2649 // some fixup pairs can be combined
2650 ld::Fixup::Cluster cl = ld::Fixup::k1of3;
2651 ld::Fixup::Kind firstKind = ld::Fixup::kindSetTargetAddress;
2652 bool combined = false;
2653 if ( target.addend == 0 ) {
2654 cl = ld::Fixup::k1of1;
2655 combined = true;
2656 switch ( setKind ) {
2657 case ld::Fixup::kindStoreLittleEndian32:
2658 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian32;
2659 break;
2660 case ld::Fixup::kindStoreLittleEndian64:
2661 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian64;
2662 break;
2663 case ld::Fixup::kindStoreBigEndian32:
2664 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian32;
2665 break;
2666 case ld::Fixup::kindStoreBigEndian64:
2667 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian64;
2668 break;
2669 case ld::Fixup::kindStoreX86BranchPCRel32:
2670 firstKind = ld::Fixup::kindStoreTargetAddressX86BranchPCRel32;
2671 break;
2672 case ld::Fixup::kindStoreX86PCRel32:
2673 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32;
2674 break;
2675 case ld::Fixup::kindStoreX86PCRel32GOTLoad:
2676 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32GOTLoad;
2677 break;
2678 case ld::Fixup::kindStoreX86PCRel32TLVLoad:
2679 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32TLVLoad;
2680 break;
2681 case ld::Fixup::kindStoreX86Abs32TLVLoad:
2682 firstKind = ld::Fixup::kindStoreTargetAddressX86Abs32TLVLoad;
2683 break;
2684 case ld::Fixup::kindStoreARMBranch24:
2685 firstKind = ld::Fixup::kindStoreTargetAddressARMBranch24;
2686 break;
2687 case ld::Fixup::kindStoreThumbBranch22:
2688 firstKind = ld::Fixup::kindStoreTargetAddressThumbBranch22;
2689 break;
2690 #if SUPPORT_ARCH_arm64
2691 case ld::Fixup::kindStoreARM64Branch26:
2692 firstKind = ld::Fixup::kindStoreTargetAddressARM64Branch26;
2693 break;
2694 case ld::Fixup::kindStoreARM64Page21:
2695 firstKind = ld::Fixup::kindStoreTargetAddressARM64Page21;
2696 break;
2697 case ld::Fixup::kindStoreARM64PageOff12:
2698 firstKind = ld::Fixup::kindStoreTargetAddressARM64PageOff12;
2699 break;
2700 case ld::Fixup::kindStoreARM64GOTLoadPage21:
2701 firstKind = ld::Fixup::kindStoreTargetAddressARM64GOTLoadPage21;
2702 break;
2703 case ld::Fixup::kindStoreARM64GOTLoadPageOff12:
2704 firstKind = ld::Fixup::kindStoreTargetAddressARM64GOTLoadPageOff12;
2705 break;
2706 case ld::Fixup::kindStoreARM64TLVPLoadPage21:
2707 firstKind = ld::Fixup::kindStoreTargetAddressARM64TLVPLoadPage21;
2708 break;
2709 case ld::Fixup::kindStoreARM64TLVPLoadPageOff12:
2710 firstKind = ld::Fixup::kindStoreTargetAddressARM64TLVPLoadPageOff12;
2711 break;
2712 #endif
2713 default:
2714 combined = false;
2715 cl = ld::Fixup::k1of2;
2716 break;
2717 }
2718 }
2719
2720 if ( target.atom != NULL ) {
2721 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2722 addFixup(src, cl, firstKind, target.atom);
2723 }
2724 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2725 addFixup(src, cl, firstKind, ld::Fixup::bindingByContentBound, target.atom);
2726 }
2727 else if ( (src.atom->section().type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
2728 // backing string in CFStrings should always be direct
2729 addFixup(src, cl, firstKind, target.atom);
2730 }
2731 else if ( (src.atom == target.atom) && (target.atom->combine() == ld::Atom::combineByName) ) {
2732 // reference to self should always be direct
2733 addFixup(src, cl, firstKind, target.atom);
2734 }
2735 else {
2736 // change direct fixup to by-name fixup
2737 addFixup(src, cl, firstKind, false, target.atom->name());
2738 }
2739 }
2740 else {
2741 addFixup(src, cl, firstKind, target.weakImport, target.name);
2742 }
2743 if ( target.addend == 0 ) {
2744 if ( ! combined )
2745 addFixup(src, ld::Fixup::k2of2, setKind);
2746 }
2747 else {
2748 addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, target.addend);
2749 addFixup(src, ld::Fixup::k3of3, setKind);
2750 }
2751 }
2752
2753 template <typename A>
2754 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase)
2755 {
2756 ld::Fixup::Cluster cl = (target.addend == 0) ? ld::Fixup::k1of4 : ld::Fixup::k1of5;
2757 if ( target.atom != NULL ) {
2758 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2759 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.atom);
2760 }
2761 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2762 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
2763 }
2764 else {
2765 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
2766 }
2767 }
2768 else {
2769 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.weakImport, target.name);
2770 }
2771 if ( target.addend == 0 ) {
2772 assert(picBase.atom != NULL);
2773 addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2774 addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, picBase.addend);
2775 addFixup(src, ld::Fixup::k4of4, kind);
2776 }
2777 else {
2778 addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
2779 addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2780 addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, picBase.addend);
2781 addFixup(src, ld::Fixup::k5of5, kind);
2782 }
2783 }
2784
2785
2786
2787 template <typename A>
2788 uint32_t TentativeDefinitionSection<A>::computeAtomCount(class Parser<A>& parser,
2789 struct Parser<A>::LabelAndCFIBreakIterator& it,
2790 const struct Parser<A>::CFI_CU_InfoArrays&)
2791 {
2792 return parser.tentativeDefinitionCount();
2793 }
2794
2795 template <typename A>
2796 uint32_t TentativeDefinitionSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2797 struct Parser<A>::LabelAndCFIBreakIterator& it,
2798 const struct Parser<A>::CFI_CU_InfoArrays&)
2799 {
2800 this->_beginAtoms = (Atom<A>*)p;
2801 uint32_t count = 0;
2802 for (uint32_t i=parser.undefinedStartIndex(); i < parser.undefinedEndIndex(); ++i) {
2803 const macho_nlist<P>& sym = parser.symbolFromIndex(i);
2804 if ( ((sym.n_type() & N_TYPE) == N_UNDF) && (sym.n_value() != 0) ) {
2805 uint64_t size = sym.n_value();
2806 uint8_t alignP2 = GET_COMM_ALIGN(sym.n_desc());
2807 if ( alignP2 == 0 ) {
2808 // common symbols align to their size
2809 // that is, a 4-byte common aligns to 4-bytes
2810 // if this size is not a power of two,
2811 // then round up to the next power of two
2812 alignP2 = 63 - (uint8_t)__builtin_clzll(size);
2813 if ( size != (1ULL << alignP2) )
2814 ++alignP2;
2815 }
2816 // limit alignment of extremely large commons to 2^15 bytes (8-page)
2817 if ( alignP2 > 15 )
2818 alignP2 = 15;
2819 Atom<A>* allocatedSpace = (Atom<A>*)p;
2820 new (allocatedSpace) Atom<A>(*this, parser.nameFromSymbol(sym), (pint_t)ULLONG_MAX, size,
2821 ld::Atom::definitionTentative, ld::Atom::combineByName,
2822 parser.scopeFromSymbol(sym), ld::Atom::typeZeroFill, ld::Atom::symbolTableIn,
2823 parser.dontDeadStripFromSymbol(sym), false, false, ld::Atom::Alignment(alignP2) );
2824 p += sizeof(Atom<A>);
2825 ++count;
2826 }
2827 }
2828 this->_endAtoms = (Atom<A>*)p;
2829 return count;
2830 }
2831
2832
2833 template <typename A>
2834 uint32_t AbsoluteSymbolSection<A>::computeAtomCount(class Parser<A>& parser,
2835 struct Parser<A>::LabelAndCFIBreakIterator& it,
2836 const struct Parser<A>::CFI_CU_InfoArrays&)
2837 {
2838 return parser.absoluteSymbolCount();
2839 }
2840
2841 template <typename A>
2842 uint32_t AbsoluteSymbolSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2843 struct Parser<A>::LabelAndCFIBreakIterator& it,
2844 const struct Parser<A>::CFI_CU_InfoArrays&)
2845 {
2846 this->_beginAtoms = (Atom<A>*)p;
2847 uint32_t count = 0;
2848 for (uint32_t i=0; i < parser.symbolCount(); ++i) {
2849 const macho_nlist<P>& sym = parser.symbolFromIndex(i);
2850 if ( (sym.n_type() & N_TYPE) != N_ABS )
2851 continue;
2852 const char* absName = parser.nameFromSymbol(sym);
2853 // ignore .objc_class_name_* symbols
2854 if ( strncmp(absName, ".objc_class_name_", 17) == 0 )
2855 continue;
2856 // ignore .objc_class_name_* symbols
2857 if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
2858 continue;
2859 // ignore empty *.eh symbols
2860 if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
2861 continue;
2862
2863 Atom<A>* allocatedSpace = (Atom<A>*)p;
2864 new (allocatedSpace) Atom<A>(*this, parser, sym, 0);
2865 p += sizeof(Atom<A>);
2866 ++count;
2867 }
2868 this->_endAtoms = (Atom<A>*)p;
2869 return count;
2870 }
2871
2872 template <typename A>
2873 Atom<A>* AbsoluteSymbolSection<A>::findAbsAtomForValue(typename A::P::uint_t value)
2874 {
2875 Atom<A>* end = this->_endAtoms;
2876 for(Atom<A>* p = this->_beginAtoms; p < end; ++p) {
2877 if ( p->_objAddress == value )
2878 return p;
2879 }
2880 return NULL;
2881 }
2882
2883
2884 template <typename A>
2885 uint32_t Parser<A>::indirectSymbol(uint32_t indirectIndex)
2886 {
2887 if ( indirectIndex >= _indirectTableCount )
2888 throw "indirect symbol index out of range";
2889 return E::get32(_indirectTable[indirectIndex]);
2890 }
2891
2892 template <typename A>
2893 const macho_nlist<typename A::P>& Parser<A>::symbolFromIndex(uint32_t index)
2894 {
2895 if ( index > _symbolCount )
2896 throw "symbol index out of range";
2897 return _symbols[index];
2898 }
2899
2900 template <typename A>
2901 const macho_section<typename A::P>* Parser<A>::machOSectionFromSectionIndex(uint32_t index)
2902 {
2903 if ( index >= _machOSectionsCount )
2904 throw "section index out of range";
2905 return &_sectionsStart[index];
2906 }
2907
2908 template <typename A>
2909 uint32_t Parser<A>::symbolIndexFromIndirectSectionAddress(pint_t addr, const macho_section<P>* sect)
2910 {
2911 uint32_t elementSize = 0;
2912 switch ( sect->flags() & SECTION_TYPE ) {
2913 case S_SYMBOL_STUBS:
2914 elementSize = sect->reserved2();
2915 break;
2916 case S_LAZY_SYMBOL_POINTERS:
2917 case S_NON_LAZY_SYMBOL_POINTERS:
2918 elementSize = sizeof(pint_t);
2919 break;
2920 default:
2921 throw "section does not use inirect symbol table";
2922 }
2923 uint32_t indexInSection = (addr - sect->addr()) / elementSize;
2924 uint32_t indexIntoIndirectTable = sect->reserved1() + indexInSection;
2925 return this->indirectSymbol(indexIntoIndirectTable);
2926 }
2927
2928
2929
2930 template <typename A>
2931 const char* Parser<A>::nameFromSymbol(const macho_nlist<P>& sym)
2932 {
2933 return &_strings[sym.n_strx()];
2934 }
2935
2936 template <typename A>
2937 ld::Atom::Scope Parser<A>::scopeFromSymbol(const macho_nlist<P>& sym)
2938 {
2939 if ( (sym.n_type() & N_EXT) == 0 )
2940 return ld::Atom::scopeTranslationUnit;
2941 else if ( (sym.n_type() & N_PEXT) != 0 )
2942 return ld::Atom::scopeLinkageUnit;
2943 else if ( this->nameFromSymbol(sym)[0] == 'l' ) // since all 'l' symbols will be remove, don't make them global
2944 return ld::Atom::scopeLinkageUnit;
2945 else
2946 return ld::Atom::scopeGlobal;
2947 }
2948
2949 template <typename A>
2950 ld::Atom::Definition Parser<A>::definitionFromSymbol(const macho_nlist<P>& sym)
2951 {
2952 switch ( sym.n_type() & N_TYPE ) {
2953 case N_ABS:
2954 return ld::Atom::definitionAbsolute;
2955 case N_SECT:
2956 return ld::Atom::definitionRegular;
2957 case N_UNDF:
2958 if ( sym.n_value() != 0 )
2959 return ld::Atom::definitionTentative;
2960 }
2961 throw "definitionFromSymbol() bad symbol";
2962 }
2963
2964 template <typename A>
2965 ld::Atom::Combine Parser<A>::combineFromSymbol(const macho_nlist<P>& sym)
2966 {
2967 if ( sym.n_desc() & N_WEAK_DEF )
2968 return ld::Atom::combineByName;
2969 else
2970 return ld::Atom::combineNever;
2971 }
2972
2973
2974 template <typename A>
2975 ld::Atom::SymbolTableInclusion Parser<A>::inclusionFromSymbol(const macho_nlist<P>& sym)
2976 {
2977 const char* symbolName = nameFromSymbol(sym);
2978 // labels beginning with 'l' (lowercase ell) are automatically removed in final linked images <rdar://problem/4571042>
2979 // labels beginning with 'L' should have been stripped by the assembler, so are stripped now
2980 if ( sym.n_desc() & REFERENCED_DYNAMICALLY )
2981 return ld::Atom::symbolTableInAndNeverStrip;
2982 else if ( symbolName[0] == 'l' )
2983 return ld::Atom::symbolTableNotInFinalLinkedImages;
2984 else if ( symbolName[0] == 'L' )
2985 return ld::Atom::symbolTableNotIn;
2986 else
2987 return ld::Atom::symbolTableIn;
2988 }
2989
2990 template <typename A>
2991 bool Parser<A>::dontDeadStripFromSymbol(const macho_nlist<P>& sym)
2992 {
2993 return ( (sym.n_desc() & (N_NO_DEAD_STRIP|REFERENCED_DYNAMICALLY)) != 0 );
2994 }
2995
2996 template <typename A>
2997 bool Parser<A>::isThumbFromSymbol(const macho_nlist<P>& sym)
2998 {
2999 return ( sym.n_desc() & N_ARM_THUMB_DEF );
3000 }
3001
3002 template <typename A>
3003 bool Parser<A>::weakImportFromSymbol(const macho_nlist<P>& sym)
3004 {
3005 return ( ((sym.n_type() & N_TYPE) == N_UNDF) && ((sym.n_desc() & N_WEAK_REF) != 0) );
3006 }
3007
3008 template <typename A>
3009 bool Parser<A>::resolverFromSymbol(const macho_nlist<P>& sym)
3010 {
3011 return ( sym.n_desc() & N_SYMBOL_RESOLVER );
3012 }
3013
3014
3015 /* Skip over a LEB128 value (signed or unsigned). */
3016 static void
3017 skip_leb128 (const uint8_t ** offset, const uint8_t * end)
3018 {
3019 while (*offset != end && **offset >= 0x80)
3020 (*offset)++;
3021 if (*offset != end)
3022 (*offset)++;
3023 }
3024
3025 /* Read a ULEB128 into a 64-bit word. Return (uint64_t)-1 on overflow
3026 or error. On overflow, skip past the rest of the uleb128. */
3027 static uint64_t
3028 read_uleb128 (const uint8_t ** offset, const uint8_t * end)
3029 {
3030 uint64_t result = 0;
3031 int bit = 0;
3032
3033 do {
3034 uint64_t b;
3035
3036 if (*offset == end)
3037 return (uint64_t) -1;
3038
3039 b = **offset & 0x7f;
3040
3041 if (bit >= 64 || b << bit >> bit != b)
3042 result = (uint64_t) -1;
3043 else
3044 result |= b << bit, bit += 7;
3045 } while (*(*offset)++ >= 0x80);
3046 return result;
3047 }
3048
3049
3050 /* Skip over a DWARF attribute of form FORM. */
3051 template <typename A>
3052 bool Parser<A>::skip_form(const uint8_t ** offset, const uint8_t * end, uint64_t form,
3053 uint8_t addr_size, bool dwarf64)
3054 {
3055 int64_t sz=0;
3056
3057 switch (form)
3058 {
3059 case DW_FORM_addr:
3060 sz = addr_size;
3061 break;
3062
3063 case DW_FORM_block2:
3064 if (end - *offset < 2)
3065 return false;
3066 sz = 2 + A::P::E::get16(*(uint16_t*)offset);
3067 break;
3068
3069 case DW_FORM_block4:
3070 if (end - *offset < 4)
3071 return false;
3072 sz = 2 + A::P::E::get32(*(uint32_t*)offset);
3073 break;
3074
3075 case DW_FORM_data2:
3076 case DW_FORM_ref2:
3077 sz = 2;
3078 break;
3079
3080 case DW_FORM_data4:
3081 case DW_FORM_ref4:
3082 sz = 4;
3083 break;
3084
3085 case DW_FORM_data8:
3086 case DW_FORM_ref8:
3087 sz = 8;
3088 break;
3089
3090 case DW_FORM_string:
3091 while (*offset != end && **offset)
3092 ++*offset;
3093 case DW_FORM_data1:
3094 case DW_FORM_flag:
3095 case DW_FORM_ref1:
3096 sz = 1;
3097 break;
3098
3099 case DW_FORM_block:
3100 sz = read_uleb128 (offset, end);
3101 break;
3102
3103 case DW_FORM_block1:
3104 if (*offset == end)
3105 return false;
3106 sz = 1 + **offset;
3107 break;
3108
3109 case DW_FORM_sdata:
3110 case DW_FORM_udata:
3111 case DW_FORM_ref_udata:
3112 skip_leb128 (offset, end);
3113 return true;
3114
3115 case DW_FORM_strp:
3116 case DW_FORM_ref_addr:
3117 sz = 4;
3118 break;
3119
3120 case DW_FORM_sec_offset:
3121 sz = sizeof(typename A::P::uint_t);
3122 break;
3123
3124 case DW_FORM_exprloc:
3125 sz = read_uleb128 (offset, end);
3126 break;
3127
3128 case DW_FORM_flag_present:
3129 sz = 0;
3130 break;
3131
3132 case DW_FORM_ref_sig8:
3133 sz = 8;
3134 break;
3135
3136 default:
3137 return false;
3138 }
3139 if (end - *offset < sz)
3140 return false;
3141 *offset += sz;
3142 return true;
3143 }
3144
3145
3146 template <typename A>
3147 const char* Parser<A>::getDwarfString(uint64_t form, const uint8_t* p)
3148 {
3149 if ( form == DW_FORM_string )
3150 return (const char*)p;
3151 else if ( form == DW_FORM_strp ) {
3152 uint32_t offset = E::get32(*((uint32_t*)p));
3153 const char* dwarfStrings = (char*)_file->fileContent() + _file->_dwarfDebugStringSect->offset();
3154 if ( offset > _file->_dwarfDebugStringSect->size() ) {
3155 warning("unknown dwarf DW_FORM_strp (offset=0x%08X) is too big in %s\n", offset, this->_path);
3156 return NULL;
3157 }
3158 return &dwarfStrings[offset];
3159 }
3160 warning("unknown dwarf string encoding (form=%lld) in %s\n", form, this->_path);
3161 return NULL;
3162 }
3163
3164
3165 template <typename A>
3166 struct AtomAndLineInfo {
3167 Atom<A>* atom;
3168 ld::Atom::LineInfo info;
3169 };
3170
3171
3172 // <rdar://problem/5591394> Add support to ld64 for N_FUN stabs when used for symbolic constants
3173 // Returns whether a stabStr belonging to an N_FUN stab represents a
3174 // symbolic constant rather than a function
3175 template <typename A>
3176 bool Parser<A>::isConstFunStabs(const char *stabStr)
3177 {
3178 const char* colon;
3179 // N_FUN can be used for both constants and for functions. In case it's a constant,
3180 // the format of the stabs string is "symname:c=<value>;"
3181 // ':' cannot appear in the symbol name, except if it's an Objective-C method
3182 // (in which case the symbol name starts with + or -, and then it's definitely
3183 // not a constant)
3184 return (stabStr != NULL) && (stabStr[0] != '+') && (stabStr[0] != '-')
3185 && ((colon = strchr(stabStr, ':')) != NULL)
3186 && (colon[1] == 'c') && (colon[2] == '=');
3187 }
3188
3189
3190 template <typename A>
3191 void Parser<A>::parseDebugInfo()
3192 {
3193 // check for dwarf __debug_info section
3194 if ( _file->_dwarfDebugInfoSect == NULL ) {
3195 // if no DWARF debug info, look for stabs
3196 this->parseStabs();
3197 return;
3198 }
3199 if ( _file->_dwarfDebugInfoSect->size() == 0 )
3200 return;
3201
3202 uint64_t stmtList;
3203 const char* tuDir;
3204 const char* tuName;
3205 if ( !read_comp_unit(&tuName, &tuDir, &stmtList) ) {
3206 // if can't parse dwarf, warn and give up
3207 _file->_dwarfTranslationUnitPath = NULL;
3208 warning("can't parse dwarf compilation unit info in %s", _path);
3209 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoNone;
3210 return;
3211 }
3212 if ( (tuName != NULL) && (tuName[0] == '/') ) {
3213 _file->_dwarfTranslationUnitPath = tuName;
3214 }
3215 else if ( (tuDir != NULL) && (tuName != NULL) ) {
3216 asprintf((char**)&(_file->_dwarfTranslationUnitPath), "%s/%s", tuDir, tuName);
3217 }
3218 else if ( tuDir == NULL ) {
3219 _file->_dwarfTranslationUnitPath = tuName;
3220 }
3221 else {
3222 _file->_dwarfTranslationUnitPath = NULL;
3223 }
3224
3225 // add line number info to atoms from dwarf
3226 std::vector<AtomAndLineInfo<A> > entries;
3227 entries.reserve(64);
3228 if ( _file->_debugInfoKind == ld::relocatable::File::kDebugInfoDwarf ) {
3229 // file with just data will have no __debug_line info
3230 if ( (_file->_dwarfDebugLineSect != NULL) && (_file->_dwarfDebugLineSect->size() != 0) ) {
3231 // validate stmt_list
3232 if ( (stmtList != (uint64_t)-1) && (stmtList < _file->_dwarfDebugLineSect->size()) ) {
3233 const uint8_t* debug_line = (uint8_t*)_file->fileContent() + _file->_dwarfDebugLineSect->offset();
3234 struct line_reader_data* lines = line_open(&debug_line[stmtList],
3235 _file->_dwarfDebugLineSect->size() - stmtList, E::little_endian);
3236 struct line_info result;
3237 Atom<A>* curAtom = NULL;
3238 uint32_t curAtomOffset = 0;
3239 uint32_t curAtomAddress = 0;
3240 uint32_t curAtomSize = 0;
3241 std::map<uint32_t,const char*> dwarfIndexToFile;
3242 if ( lines != NULL ) {
3243 while ( line_next(lines, &result, line_stop_pc) ) {
3244 //fprintf(stderr, "curAtom=%p, result.pc=0x%llX, result.line=%llu, result.end_of_sequence=%d,"
3245 // " curAtomAddress=0x%X, curAtomSize=0x%X\n",
3246 // curAtom, result.pc, result.line, result.end_of_sequence, curAtomAddress, curAtomSize);
3247 // work around weird debug line table compiler generates if no functions in __text section
3248 if ( (curAtom == NULL) && (result.pc == 0) && result.end_of_sequence && (result.file == 1))
3249 continue;
3250 // for performance, see if in next pc is in current atom
3251 if ( (curAtom != NULL) && (curAtomAddress <= result.pc) && (result.pc < (curAtomAddress+curAtomSize)) ) {
3252 curAtomOffset = result.pc - curAtomAddress;
3253 }
3254 // or pc at end of current atom
3255 else if ( result.end_of_sequence && (curAtom != NULL) && (result.pc == (curAtomAddress+curAtomSize)) ) {
3256 curAtomOffset = result.pc - curAtomAddress;
3257 }
3258 // or only one function that is a one line function
3259 else if ( result.end_of_sequence && (curAtom == NULL) && (this->findAtomByAddress(0) != NULL) && (result.pc == this->findAtomByAddress(0)->size()) ) {
3260 curAtom = this->findAtomByAddress(0);
3261 curAtomOffset = result.pc - curAtom->objectAddress();
3262 curAtomAddress = curAtom->objectAddress();
3263 curAtomSize = curAtom->size();
3264 }
3265 else {
3266 // do slow look up of atom by address
3267 try {
3268 curAtom = this->findAtomByAddress(result.pc);
3269 }
3270 catch (...) {
3271 // in case of bug in debug info, don't abort link, just limp on
3272 curAtom = NULL;
3273 }
3274 if ( curAtom == NULL )
3275 break; // file has line info but no functions
3276 if ( result.end_of_sequence && (curAtomAddress+curAtomSize < result.pc) ) {
3277 // a one line function can be returned by line_next() as one entry with pc at end of blob
3278 // look for alt atom starting at end of previous atom
3279 uint32_t previousEnd = curAtomAddress+curAtomSize;
3280 Atom<A>* alt = this->findAtomByAddressOrNullIfStub(previousEnd);
3281 if ( alt == NULL )
3282 continue; // ignore spurious debug info for stubs
3283 if ( result.pc <= alt->objectAddress() + alt->size() ) {
3284 curAtom = alt;
3285 curAtomOffset = result.pc - alt->objectAddress();
3286 curAtomAddress = alt->objectAddress();
3287 curAtomSize = alt->size();
3288 }
3289 else {
3290 curAtomOffset = result.pc - curAtom->objectAddress();
3291 curAtomAddress = curAtom->objectAddress();
3292 curAtomSize = curAtom->size();
3293 }
3294 }
3295 else {
3296 curAtomOffset = result.pc - curAtom->objectAddress();
3297 curAtomAddress = curAtom->objectAddress();
3298 curAtomSize = curAtom->size();
3299 }
3300 }
3301 const char* filename;
3302 std::map<uint32_t,const char*>::iterator pos = dwarfIndexToFile.find(result.file);
3303 if ( pos == dwarfIndexToFile.end() ) {
3304 filename = line_file(lines, result.file);
3305 dwarfIndexToFile[result.file] = filename;
3306 }
3307 else {
3308 filename = pos->second;
3309 }
3310 // only record for ~8000 line info records per function
3311 if ( curAtom->roomForMoreLineInfoCount() ) {
3312 AtomAndLineInfo<A> entry;
3313 entry.atom = curAtom;
3314 entry.info.atomOffset = curAtomOffset;
3315 entry.info.fileName = filename;
3316 entry.info.lineNumber = result.line;
3317 //fprintf(stderr, "addr=0x%08llX, line=%lld, file=%s, atom=%s, atom.size=0x%X, end=%d\n",
3318 // result.pc, result.line, filename, curAtom->name(), curAtomSize, result.end_of_sequence);
3319 entries.push_back(entry);
3320 curAtom->incrementLineInfoCount();
3321 }
3322 if ( result.end_of_sequence ) {
3323 curAtom = NULL;
3324 }
3325 }
3326 line_free(lines);
3327 }
3328 }
3329 }
3330 }
3331
3332 // assign line info start offset for each atom
3333 uint8_t* p = _file->_atomsArray;
3334 uint32_t liOffset = 0;
3335 for(int i=_file->_atomsArrayCount; i > 0; --i) {
3336 Atom<A>* atom = (Atom<A>*)p;
3337 atom->_lineInfoStartIndex = liOffset;
3338 liOffset += atom->_lineInfoCount;
3339 atom->_lineInfoCount = 0;
3340 p += sizeof(Atom<A>);
3341 }
3342 assert(liOffset == entries.size());
3343 _file->_lineInfos.reserve(liOffset);
3344
3345 // copy each line info for each atom
3346 for (typename std::vector<AtomAndLineInfo<A> >::iterator it = entries.begin(); it != entries.end(); ++it) {
3347 uint32_t slot = it->atom->_lineInfoStartIndex + it->atom->_lineInfoCount;
3348 _file->_lineInfos[slot] = it->info;
3349 it->atom->_lineInfoCount++;
3350 }
3351
3352 // done with temp vector
3353 entries.clear();
3354 }
3355
3356 template <typename A>
3357 void Parser<A>::parseStabs()
3358 {
3359 // scan symbol table for stabs entries
3360 Atom<A>* currentAtom = NULL;
3361 pint_t currentAtomAddress = 0;
3362 enum { start, inBeginEnd, inFun } state = start;
3363 for (uint32_t symbolIndex = 0; symbolIndex < _symbolCount; ++symbolIndex ) {
3364 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
3365 bool useStab = true;
3366 uint8_t type = sym.n_type();
3367 const char* symString = (sym.n_strx() != 0) ? this->nameFromSymbol(sym) : NULL;
3368 if ( (type & N_STAB) != 0 ) {
3369 _file->_debugInfoKind = (_hasUUID ? ld::relocatable::File::kDebugInfoStabsUUID : ld::relocatable::File::kDebugInfoStabs);
3370 ld::relocatable::File::Stab stab;
3371 stab.atom = NULL;
3372 stab.type = type;
3373 stab.other = sym.n_sect();
3374 stab.desc = sym.n_desc();
3375 stab.value = sym.n_value();
3376 stab.string = NULL;
3377 switch (state) {
3378 case start:
3379 switch (type) {
3380 case N_BNSYM:
3381 // beginning of function block
3382 state = inBeginEnd;
3383 // fall into case to lookup atom by addresss
3384 case N_LCSYM:
3385 case N_STSYM:
3386 currentAtomAddress = sym.n_value();
3387 currentAtom = this->findAtomByAddress(currentAtomAddress);
3388 if ( currentAtom != NULL ) {
3389 stab.atom = currentAtom;
3390 stab.string = symString;
3391 }
3392 else {
3393 fprintf(stderr, "can't find atom for stabs BNSYM at %08llX in %s",
3394 (uint64_t)sym.n_value(), _path);
3395 }
3396 break;
3397 case N_SO:
3398 case N_OSO:
3399 case N_OPT:
3400 case N_LSYM:
3401 case N_RSYM:
3402 case N_PSYM:
3403 // not associated with an atom, just copy
3404 stab.string = symString;
3405 break;
3406 case N_GSYM:
3407 {
3408 // n_value field is NOT atom address ;-(
3409 // need to find atom by name match
3410 const char* colon = strchr(symString, ':');
3411 if ( colon != NULL ) {
3412 // build underscore leading name
3413 int nameLen = colon - symString;
3414 char symName[nameLen+2];
3415 strlcpy(&symName[1], symString, nameLen+1);
3416 symName[0] = '_';
3417 symName[nameLen+1] = '\0';
3418 currentAtom = this->findAtomByName(symName);
3419 if ( currentAtom != NULL ) {
3420 stab.atom = currentAtom;
3421 stab.string = symString;
3422 }
3423 }
3424 else {
3425 // might be a debug-note without trailing :G()
3426 currentAtom = this->findAtomByName(symString);
3427 if ( currentAtom != NULL ) {
3428 stab.atom = currentAtom;
3429 stab.string = symString;
3430 }
3431 }
3432 if ( stab.atom == NULL ) {
3433 // ld_classic added bogus GSYM stabs for old style dtrace probes
3434 if ( (strncmp(symString, "__dtrace_probe$", 15) != 0) )
3435 warning("can't find atom for N_GSYM stabs %s in %s", symString, _path);
3436 useStab = false;
3437 }
3438 break;
3439 }
3440 case N_FUN:
3441 if ( isConstFunStabs(symString) ) {
3442 // constant not associated with a function
3443 stab.string = symString;
3444 }
3445 else {
3446 // old style stabs without BNSYM
3447 state = inFun;
3448 currentAtomAddress = sym.n_value();
3449 currentAtom = this->findAtomByAddress(currentAtomAddress);
3450 if ( currentAtom != NULL ) {
3451 stab.atom = currentAtom;
3452 stab.string = symString;
3453 }
3454 else {
3455 warning("can't find atom for stabs FUN at %08llX in %s",
3456 (uint64_t)currentAtomAddress, _path);
3457 }
3458 }
3459 break;
3460 case N_SOL:
3461 case N_SLINE:
3462 stab.string = symString;
3463 // old stabs
3464 break;
3465 case N_BINCL:
3466 case N_EINCL:
3467 case N_EXCL:
3468 stab.string = symString;
3469 // -gfull built .o file
3470 break;
3471 default:
3472 warning("unknown stabs type 0x%X in %s", type, _path);
3473 }
3474 break;
3475 case inBeginEnd:
3476 stab.atom = currentAtom;
3477 switch (type) {
3478 case N_ENSYM:
3479 state = start;
3480 currentAtom = NULL;
3481 break;
3482 case N_LCSYM:
3483 case N_STSYM:
3484 {
3485 Atom<A>* nestedAtom = this->findAtomByAddress(sym.n_value());
3486 if ( nestedAtom != NULL ) {
3487 stab.atom = nestedAtom;
3488 stab.string = symString;
3489 }
3490 else {
3491 warning("can't find atom for stabs 0x%X at %08llX in %s",
3492 type, (uint64_t)sym.n_value(), _path);
3493 }
3494 break;
3495 }
3496 case N_LBRAC:
3497 case N_RBRAC:
3498 case N_SLINE:
3499 // adjust value to be offset in atom
3500 stab.value -= currentAtomAddress;
3501 default:
3502 stab.string = symString;
3503 break;
3504 }
3505 break;
3506 case inFun:
3507 switch (type) {
3508 case N_FUN:
3509 if ( isConstFunStabs(symString) ) {
3510 stab.atom = currentAtom;
3511 stab.string = symString;
3512 }
3513 else {
3514 if ( sym.n_sect() != 0 ) {
3515 // found another start stab, must be really old stabs...
3516 currentAtomAddress = sym.n_value();
3517 currentAtom = this->findAtomByAddress(currentAtomAddress);
3518 if ( currentAtom != NULL ) {
3519 stab.atom = currentAtom;
3520 stab.string = symString;
3521 }
3522 else {
3523 warning("can't find atom for stabs FUN at %08llX in %s",
3524 (uint64_t)currentAtomAddress, _path);
3525 }
3526 }
3527 else {
3528 // found ending stab, switch back to start state
3529 stab.string = symString;
3530 stab.atom = currentAtom;
3531 state = start;
3532 currentAtom = NULL;
3533 }
3534 }
3535 break;
3536 case N_LBRAC:
3537 case N_RBRAC:
3538 case N_SLINE:
3539 // adjust value to be offset in atom
3540 stab.value -= currentAtomAddress;
3541 stab.atom = currentAtom;
3542 break;
3543 case N_SO:
3544 stab.string = symString;
3545 state = start;
3546 break;
3547 default:
3548 stab.atom = currentAtom;
3549 stab.string = symString;
3550 break;
3551 }
3552 break;
3553 }
3554 // add to list of stabs for this .o file
3555 if ( useStab )
3556 _file->_stabs.push_back(stab);
3557 }
3558 }
3559 }
3560
3561
3562
3563 // Look at the compilation unit DIE and determine
3564 // its NAME, compilation directory (in COMP_DIR) and its
3565 // line number information offset (in STMT_LIST). NAME and COMP_DIR
3566 // may be NULL (especially COMP_DIR) if they are not in the .o file;
3567 // STMT_LIST will be (uint64_t) -1.
3568 //
3569 // At present this assumes that there's only one compilation unit DIE.
3570 //
3571 template <typename A>
3572 bool Parser<A>::read_comp_unit(const char ** name, const char ** comp_dir,
3573 uint64_t *stmt_list)
3574 {
3575 const uint8_t * debug_info;
3576 const uint8_t * debug_abbrev;
3577 const uint8_t * di;
3578 const uint8_t * da;
3579 const uint8_t * end;
3580 const uint8_t * enda;
3581 uint64_t sz;
3582 uint16_t vers;
3583 uint64_t abbrev_base;
3584 uint64_t abbrev;
3585 uint8_t address_size;
3586 bool dwarf64;
3587
3588 *name = NULL;
3589 *comp_dir = NULL;
3590 *stmt_list = (uint64_t) -1;
3591
3592 if ( (_file->_dwarfDebugInfoSect == NULL) || (_file->_dwarfDebugAbbrevSect == NULL) )
3593 return false;
3594
3595 debug_info = (uint8_t*)_file->fileContent() + _file->_dwarfDebugInfoSect->offset();
3596 debug_abbrev = (uint8_t*)_file->fileContent() + _file->_dwarfDebugAbbrevSect->offset();
3597 di = debug_info;
3598
3599 if (_file->_dwarfDebugInfoSect->size() < 12)
3600 /* Too small to be a real debug_info section. */
3601 return false;
3602 sz = A::P::E::get32(*(uint32_t*)di);
3603 di += 4;
3604 dwarf64 = sz == 0xffffffff;
3605 if (dwarf64)
3606 sz = A::P::E::get64(*(uint64_t*)di), di += 8;
3607 else if (sz > 0xffffff00)
3608 /* Unknown dwarf format. */
3609 return false;
3610
3611 /* Verify claimed size. */
3612 if (sz + (di - debug_info) > _file->_dwarfDebugInfoSect->size() || sz <= (dwarf64 ? 23 : 11))
3613 return false;
3614
3615 vers = A::P::E::get16(*(uint16_t*)di);
3616 if (vers < 2 || vers > 4)
3617 /* DWARF version wrong for this code.
3618 Chances are we could continue anyway, but we don't know for sure. */
3619 return false;
3620 di += 2;
3621
3622 /* Find the debug_abbrev section. */
3623 abbrev_base = dwarf64 ? A::P::E::get64(*(uint64_t*)di) : A::P::E::get32(*(uint32_t*)di);
3624 di += dwarf64 ? 8 : 4;
3625
3626 if (abbrev_base > _file->_dwarfDebugAbbrevSect->size())
3627 return false;
3628 da = debug_abbrev + abbrev_base;
3629 enda = debug_abbrev + _file->_dwarfDebugAbbrevSect->size();
3630
3631 address_size = *di++;
3632
3633 /* Find the abbrev number we're looking for. */
3634 end = di + sz;
3635 abbrev = read_uleb128 (&di, end);
3636 if (abbrev == (uint64_t) -1)
3637 return false;
3638
3639 /* Skip through the debug_abbrev section looking for that abbrev. */
3640 for (;;)
3641 {
3642 uint64_t this_abbrev = read_uleb128 (&da, enda);
3643 uint64_t attr;
3644
3645 if (this_abbrev == abbrev)
3646 /* This is almost always taken. */
3647 break;
3648 skip_leb128 (&da, enda); /* Skip the tag. */
3649 if (da == enda)
3650 return false;
3651 da++; /* Skip the DW_CHILDREN_* value. */
3652
3653 do {
3654 attr = read_uleb128 (&da, enda);
3655 skip_leb128 (&da, enda);
3656 } while (attr != 0 && attr != (uint64_t) -1);
3657 if (attr != 0)
3658 return false;
3659 }
3660
3661 /* Check that the abbrev is one for a DW_TAG_compile_unit. */
3662 if (read_uleb128 (&da, enda) != DW_TAG_compile_unit)
3663 return false;
3664 if (da == enda)
3665 return false;
3666 da++; /* Skip the DW_CHILDREN_* value. */
3667
3668 /* Now, go through the DIE looking for DW_AT_name,
3669 DW_AT_comp_dir, and DW_AT_stmt_list. */
3670 for (;;)
3671 {
3672 uint64_t attr = read_uleb128 (&da, enda);
3673 uint64_t form = read_uleb128 (&da, enda);
3674
3675 if (attr == (uint64_t) -1)
3676 return false;
3677 else if (attr == 0)
3678 return true;
3679
3680 if (form == DW_FORM_indirect)
3681 form = read_uleb128 (&di, end);
3682
3683 if (attr == DW_AT_name)
3684 *name = getDwarfString(form, di);
3685 else if (attr == DW_AT_comp_dir)
3686 *comp_dir = getDwarfString(form, di);
3687 else if (attr == DW_AT_stmt_list && form == DW_FORM_data4)
3688 *stmt_list = A::P::E::get32(*(uint32_t*)di);
3689 else if (attr == DW_AT_stmt_list && form == DW_FORM_data8)
3690 *stmt_list = A::P::E::get64(*(uint64_t*)di);
3691 if (! skip_form (&di, end, form, address_size, dwarf64))
3692 return false;
3693 }
3694 }
3695
3696
3697
3698 template <typename A>
3699 File<A>::~File()
3700 {
3701 free(_sectionsArray);
3702 free(_atomsArray);
3703 }
3704
3705 template <typename A>
3706 const char* File<A>::translationUnitSource() const
3707 {
3708 return _dwarfTranslationUnitPath;
3709 }
3710
3711
3712
3713 template <typename A>
3714 bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
3715 {
3716 handler.doFile(*this);
3717 uint8_t* p = _atomsArray;
3718 for(int i=_atomsArrayCount; i > 0; --i) {
3719 handler.doAtom(*((Atom<A>*)p));
3720 p += sizeof(Atom<A>);
3721 }
3722 return (_atomsArrayCount != 0);
3723 }
3724
3725 template <typename A>
3726 const char* Section<A>::makeSegmentName(const macho_section<typename A::P>* sect)
3727 {
3728 // mach-o section record only has room for 16-byte seg/sect names
3729 // so a 16-byte name has no trailing zero
3730 const char* name = sect->segname();
3731 if ( strlen(name) < 16 )
3732 return name;
3733 char* tmp = new char[17];
3734 strlcpy(tmp, name, 17);
3735 return tmp;
3736 }
3737
3738 template <typename A>
3739 const char* Section<A>::makeSectionName(const macho_section<typename A::P>* sect)
3740 {
3741 const char* name = sect->sectname();
3742 if ( strlen(name) < 16 )
3743 return name;
3744
3745 // special case common long section names so we don't have to malloc
3746 if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
3747 return "__objc_classrefs";
3748 if ( strncmp(sect->sectname(), "__objc_classlist", 16) == 0 )
3749 return "__objc_classlist";
3750 if ( strncmp(sect->sectname(), "__objc_nlclslist", 16) == 0 )
3751 return "__objc_nlclslist";
3752 if ( strncmp(sect->sectname(), "__objc_nlcatlist", 16) == 0 )
3753 return "__objc_nlcatlist";
3754 if ( strncmp(sect->sectname(), "__objc_protolist", 16) == 0 )
3755 return "__objc_protolist";
3756 if ( strncmp(sect->sectname(), "__objc_protorefs", 16) == 0 )
3757 return "__objc_protorefs";
3758 if ( strncmp(sect->sectname(), "__objc_superrefs", 16) == 0 )
3759 return "__objc_superrefs";
3760 if ( strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0 )
3761 return "__objc_imageinfo";
3762 if ( strncmp(sect->sectname(), "__objc_stringobj", 16) == 0 )
3763 return "__objc_stringobj";
3764 if ( strncmp(sect->sectname(), "__gcc_except_tab", 16) == 0 )
3765 return "__gcc_except_tab";
3766
3767 char* tmp = new char[17];
3768 strlcpy(tmp, name, 17);
3769 return tmp;
3770 }
3771
3772 template <typename A>
3773 bool Section<A>::readable(const macho_section<typename A::P>* sect)
3774 {
3775 return true;
3776 }
3777
3778 template <typename A>
3779 bool Section<A>::writable(const macho_section<typename A::P>* sect)
3780 {
3781 // mach-o .o files do not contain segment permissions
3782 // we just know TEXT is special
3783 return ( strcmp(sect->segname(), "__TEXT") != 0 );
3784 }
3785
3786 template <typename A>
3787 bool Section<A>::exectuable(const macho_section<typename A::P>* sect)
3788 {
3789 // mach-o .o files do not contain segment permissions
3790 // we just know TEXT is special
3791 return ( strcmp(sect->segname(), "__TEXT") == 0 );
3792 }
3793
3794
3795 template <typename A>
3796 ld::Section::Type Section<A>::sectionType(const macho_section<typename A::P>* sect)
3797 {
3798 switch ( sect->flags() & SECTION_TYPE ) {
3799 case S_ZEROFILL:
3800 return ld::Section::typeZeroFill;
3801 case S_CSTRING_LITERALS:
3802 if ( (strcmp(sect->sectname(), "__cstring") == 0) && (strcmp(sect->segname(), "__TEXT") == 0) )
3803 return ld::Section::typeCString;
3804 else
3805 return ld::Section::typeNonStdCString;
3806 case S_4BYTE_LITERALS:
3807 return ld::Section::typeLiteral4;
3808 case S_8BYTE_LITERALS:
3809 return ld::Section::typeLiteral8;
3810 case S_LITERAL_POINTERS:
3811 return ld::Section::typeCStringPointer;
3812 case S_NON_LAZY_SYMBOL_POINTERS:
3813 return ld::Section::typeNonLazyPointer;
3814 case S_LAZY_SYMBOL_POINTERS:
3815 return ld::Section::typeLazyPointer;
3816 case S_SYMBOL_STUBS:
3817 return ld::Section::typeStub;
3818 case S_MOD_INIT_FUNC_POINTERS:
3819 return ld::Section::typeInitializerPointers;
3820 case S_MOD_TERM_FUNC_POINTERS:
3821 return ld::Section::typeTerminatorPointers;
3822 case S_INTERPOSING:
3823 return ld::Section::typeUnclassified;
3824 case S_16BYTE_LITERALS:
3825 return ld::Section::typeLiteral16;
3826 case S_REGULAR:
3827 case S_COALESCED:
3828 if ( sect->flags() & S_ATTR_PURE_INSTRUCTIONS ) {
3829 return ld::Section::typeCode;
3830 }
3831 else if ( strcmp(sect->segname(), "__TEXT") == 0 ) {
3832 if ( strcmp(sect->sectname(), "__eh_frame") == 0 )
3833 return ld::Section::typeCFI;
3834 else if ( strcmp(sect->sectname(), "__ustring") == 0 )
3835 return ld::Section::typeUTF16Strings;
3836 else if ( strcmp(sect->sectname(), "__textcoal_nt") == 0 )
3837 return ld::Section::typeCode;
3838 else if ( strcmp(sect->sectname(), "__StaticInit") == 0 )
3839 return ld::Section::typeCode;
3840 else if ( strcmp(sect->sectname(), "__constructor") == 0 )
3841 return ld::Section::typeInitializerPointers;
3842 }
3843 else if ( strcmp(sect->segname(), "__DATA") == 0 ) {
3844 if ( strcmp(sect->sectname(), "__cfstring") == 0 )
3845 return ld::Section::typeCFString;
3846 else if ( strcmp(sect->sectname(), "__dyld") == 0 )
3847 return ld::Section::typeDyldInfo;
3848 else if ( strcmp(sect->sectname(), "__program_vars") == 0 )
3849 return ld::Section::typeDyldInfo;
3850 else if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
3851 return ld::Section::typeObjCClassRefs;
3852 else if ( strcmp(sect->sectname(), "__objc_catlist") == 0 )
3853 return ld::Section::typeObjC2CategoryList;
3854 }
3855 else if ( strcmp(sect->segname(), "__OBJC") == 0 ) {
3856 if ( strcmp(sect->sectname(), "__class") == 0 )
3857 return ld::Section::typeObjC1Classes;
3858 }
3859 break;
3860 case S_THREAD_LOCAL_REGULAR:
3861 return ld::Section::typeTLVInitialValues;
3862 case S_THREAD_LOCAL_ZEROFILL:
3863 return ld::Section::typeTLVZeroFill;
3864 case S_THREAD_LOCAL_VARIABLES:
3865 return ld::Section::typeTLVDefs;
3866 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
3867 return ld::Section::typeTLVInitializerPointers;
3868 }
3869 return ld::Section::typeUnclassified;
3870 }
3871
3872
3873 template <typename A>
3874 Atom<A>* Section<A>::findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end)
3875 {
3876 // do a binary search of atom array
3877 uint32_t atomCount = end - start;
3878 Atom<A>* base = start;
3879 for (uint32_t n = atomCount; n > 0; n /= 2) {
3880 Atom<A>* pivot = &base[n/2];
3881 pint_t atomStartAddr = pivot->_objAddress;
3882 pint_t atomEndAddr = atomStartAddr + pivot->_size;
3883 if ( atomStartAddr <= addr ) {
3884 // address in normal atom
3885 if (addr < atomEndAddr)
3886 return pivot;
3887 // address in "end" label (but not in alias)
3888 if ( (pivot->_size == 0) && (addr == atomEndAddr) && !pivot->isAlias() )
3889 return pivot;
3890 }
3891 if ( addr >= atomEndAddr ) {
3892 // key > pivot
3893 // move base to atom after pivot
3894 base = &pivot[1];
3895 --n;
3896 }
3897 else {
3898 // key < pivot
3899 // keep same base
3900 }
3901 }
3902 return NULL;
3903 }
3904
3905 template <typename A>
3906 ld::Atom::Alignment Section<A>::alignmentForAddress(pint_t addr)
3907 {
3908 const uint32_t sectionAlignment = this->_machOSection->align();
3909 uint32_t modulus = (addr % (1 << sectionAlignment));
3910 if ( modulus > 0xFFFF )
3911 warning("alignment for symbol at address 0x%08llX in %s exceeds 2^16", (uint64_t)addr, this->file().path());
3912 return ld::Atom::Alignment(sectionAlignment, modulus);
3913 }
3914
3915 template <typename A>
3916 uint32_t Section<A>::sectionNum(class Parser<A>& parser) const
3917 {
3918 if ( _machOSection == NULL )
3919 return 0;
3920 else
3921 return 1 + (this->_machOSection - parser.firstMachOSection());
3922 }
3923
3924 // arm does not have zero cost exceptions
3925 template <> uint32_t CFISection<arm>::cfiCount() { return 0; }
3926
3927 template <typename A>
3928 uint32_t CFISection<A>::cfiCount()
3929 {
3930 // create ObjectAddressSpace object for use by libunwind
3931 OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
3932 return libunwind::CFI_Parser<OAS>::getCFICount(oas,
3933 this->_machOSection->addr(), this->_machOSection->size());
3934 }
3935
3936 template <typename A>
3937 void CFISection<A>::warnFunc(void* ref, uint64_t funcAddr, const char* msg)
3938 {
3939 Parser<A>* parser = (Parser<A>*)ref;
3940 if ( ! parser->warnUnwindConversionProblems() )
3941 return;
3942 if ( funcAddr != CFI_INVALID_ADDRESS ) {
3943 // atoms are not constructed yet, so scan symbol table for labels
3944 const char* name = parser->scanSymbolTableForAddress(funcAddr);
3945 warning("could not create compact unwind for %s: %s", name, msg);
3946 }
3947 else {
3948 warning("could not create compact unwind: %s", msg);
3949 }
3950 }
3951
3952 template <>
3953 bool CFISection<x86_64>::needsRelocating()
3954 {
3955 return true;
3956 }
3957
3958 template <>
3959 bool CFISection<arm64>::needsRelocating()
3960 {
3961 return true;
3962 }
3963
3964 template <typename A>
3965 bool CFISection<A>::needsRelocating()
3966 {
3967 return false;
3968 }
3969
3970 template <>
3971 void CFISection<x86_64>::cfiParse(class Parser<x86_64>& parser, uint8_t* buffer,
3972 libunwind::CFI_Atom_Info<CFISection<x86_64>::OAS>::CFI_Atom_Info cfiArray[],
3973 uint32_t& count, const pint_t cuStarts[], uint32_t cuCount)
3974 {
3975 // copy __eh_frame data to buffer
3976 memcpy(buffer, file().fileContent() + this->_machOSection->offset(), this->_machOSection->size());
3977
3978 // and apply relocations
3979 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + this->_machOSection->reloff());
3980 const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
3981 for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
3982 uint64_t value = 0;
3983 switch ( reloc->r_type() ) {
3984 case X86_64_RELOC_SUBTRACTOR:
3985 value = 0 - parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3986 ++reloc;
3987 if ( reloc->r_extern() )
3988 value += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3989 break;
3990 case X86_64_RELOC_UNSIGNED:
3991 value = parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3992 break;
3993 case X86_64_RELOC_GOT:
3994 // this is used for the reference to the personality function in CIEs
3995 // store the symbol number of the personality function for later use as a Fixup
3996 value = reloc->r_symbolnum();
3997 break;
3998 default:
3999 fprintf(stderr, "CFISection::cfiParse() unexpected relocation type at r_address=0x%08X\n", reloc->r_address());
4000 break;
4001 }
4002 uint64_t* p64;
4003 uint32_t* p32;
4004 switch ( reloc->r_length() ) {
4005 case 3:
4006 p64 = (uint64_t*)&buffer[reloc->r_address()];
4007 E::set64(*p64, value + E::get64(*p64));
4008 break;
4009 case 2:
4010 p32 = (uint32_t*)&buffer[reloc->r_address()];
4011 E::set32(*p32, value + E::get32(*p32));
4012 break;
4013 default:
4014 fprintf(stderr, "CFISection::cfiParse() unexpected relocation size at r_address=0x%08X\n", reloc->r_address());
4015 break;
4016 }
4017 }
4018
4019 // create ObjectAddressSpace object for use by libunwind
4020 OAS oas(*this, buffer);
4021
4022 // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
4023 const char* msg;
4024 msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86_64>::parseCFIs(
4025 oas, this->_machOSection->addr(), this->_machOSection->size(),
4026 cuStarts, cuCount, parser.keepDwarfUnwind(), parser.forceDwarfConversion(), parser.neverConvertDwarf(),
4027 cfiArray, count, (void*)&parser, warnFunc);
4028 if ( msg != NULL )
4029 throwf("malformed __eh_frame section: %s", msg);
4030 }
4031
4032 template <>
4033 void CFISection<x86>::cfiParse(class Parser<x86>& parser, uint8_t* buffer,
4034 libunwind::CFI_Atom_Info<CFISection<x86>::OAS>::CFI_Atom_Info cfiArray[],
4035 uint32_t& count, const pint_t cuStarts[], uint32_t cuCount)
4036 {
4037 // create ObjectAddressSpace object for use by libunwind
4038 OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
4039
4040 // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
4041 const char* msg;
4042 msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86>::parseCFIs(
4043 oas, this->_machOSection->addr(), this->_machOSection->size(),
4044 cuStarts, cuCount, parser.keepDwarfUnwind(), parser.forceDwarfConversion(), parser.neverConvertDwarf(),
4045 cfiArray, count, (void*)&parser, warnFunc);
4046 if ( msg != NULL )
4047 throwf("malformed __eh_frame section: %s", msg);
4048 }
4049
4050
4051
4052
4053 template <>
4054 void CFISection<arm>::cfiParse(class Parser<arm>& parser, uint8_t* buffer,
4055 libunwind::CFI_Atom_Info<CFISection<arm>::OAS>::CFI_Atom_Info cfiArray[],
4056 uint32_t& count, const pint_t cuStarts[], uint32_t cuCount)
4057 {
4058 // arm does not use zero cost exceptions
4059 assert(count == 0);
4060 }
4061
4062 template <>
4063 void CFISection<arm64>::cfiParse(class Parser<arm64>& parser, uint8_t* buffer,
4064 libunwind::CFI_Atom_Info<CFISection<arm64>::OAS>::CFI_Atom_Info cfiArray[],
4065 uint32_t& count, const pint_t cuStarts[], uint32_t cuCount)
4066 {
4067 // copy __eh_frame data to buffer
4068 memcpy(buffer, file().fileContent() + this->_machOSection->offset(), this->_machOSection->size());
4069
4070 // and apply relocations
4071 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + this->_machOSection->reloff());
4072 const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
4073 for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
4074 uint64_t* p64 = (uint64_t*)&buffer[reloc->r_address()];
4075 uint32_t* p32 = (uint32_t*)&buffer[reloc->r_address()];
4076 uint32_t addend32 = E::get32(*p32);
4077 uint64_t addend64 = E::get64(*p64);
4078 uint64_t value = 0;
4079 switch ( reloc->r_type() ) {
4080 case ARM64_RELOC_SUBTRACTOR:
4081 value = 0 - parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4082 ++reloc;
4083 if ( reloc->r_extern() )
4084 value += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4085 break;
4086 case ARM64_RELOC_UNSIGNED:
4087 value = parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4088 break;
4089 case ARM64_RELOC_POINTER_TO_GOT:
4090 // this is used for the reference to the personality function in CIEs
4091 // store the symbol number of the personality function for later use as a Fixup
4092 value = reloc->r_symbolnum();
4093 addend32 = 0;
4094 addend64 = 0;
4095 break;
4096 default:
4097 fprintf(stderr, "CFISection::cfiParse() unexpected relocation type at r_address=0x%08X\n", reloc->r_address());
4098 break;
4099 }
4100 switch ( reloc->r_length() ) {
4101 case 3:
4102 E::set64(*p64, value + addend64);
4103 break;
4104 case 2:
4105 E::set32(*p32, value + addend32);
4106 break;
4107 default:
4108 fprintf(stderr, "CFISection::cfiParse() unexpected relocation size at r_address=0x%08X\n", reloc->r_address());
4109 break;
4110 }
4111 }
4112
4113
4114 // create ObjectAddressSpace object for use by libunwind
4115 OAS oas(*this, buffer);
4116
4117 // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
4118 const char* msg;
4119 msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_arm64>::parseCFIs(
4120 oas, this->_machOSection->addr(), this->_machOSection->size(),
4121 cuStarts, cuCount, parser.keepDwarfUnwind(), parser.forceDwarfConversion(), parser.neverConvertDwarf(),
4122 cfiArray, count, (void*)&parser, warnFunc);
4123 if ( msg != NULL )
4124 throwf("malformed __eh_frame section: %s", msg);
4125 }
4126
4127
4128 template <typename A>
4129 uint32_t CFISection<A>::computeAtomCount(class Parser<A>& parser,
4130 struct Parser<A>::LabelAndCFIBreakIterator& it,
4131 const struct Parser<A>::CFI_CU_InfoArrays& cfis)
4132 {
4133 return cfis.cfiCount;
4134 }
4135
4136
4137
4138 template <typename A>
4139 uint32_t CFISection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4140 struct Parser<A>::LabelAndCFIBreakIterator& it,
4141 const struct Parser<A>::CFI_CU_InfoArrays& cfis)
4142 {
4143 this->_beginAtoms = (Atom<A>*)p;
4144 // walk CFI_Atom_Info array and create atom for each entry
4145 const CFI_Atom_Info* start = &cfis.cfiArray[0];
4146 const CFI_Atom_Info* end = &cfis.cfiArray[cfis.cfiCount];
4147 for(const CFI_Atom_Info* a=start; a < end; ++a) {
4148 Atom<A>* space = (Atom<A>*)p;
4149 new (space) Atom<A>(*this, (a->isCIE ? "CIE" : "FDE"), a->address, a->size,
4150 ld::Atom::definitionRegular, ld::Atom::combineNever, ld::Atom::scopeTranslationUnit,
4151 ld::Atom::typeCFI, ld::Atom::symbolTableNotInFinalLinkedImages,
4152 false, false, false, ld::Atom::Alignment(0));
4153 p += sizeof(Atom<A>);
4154 }
4155 this->_endAtoms = (Atom<A>*)p;
4156 return cfis.cfiCount;
4157 }
4158
4159
4160 template <> bool CFISection<x86_64>::bigEndian() { return false; }
4161 template <> bool CFISection<x86>::bigEndian() { return false; }
4162 template <> bool CFISection<arm>::bigEndian() { return false; }
4163 template <> bool CFISection<arm64>::bigEndian() { return false; }
4164
4165
4166 template <>
4167 void CFISection<x86_64>::addCiePersonalityFixups(class Parser<x86_64>& parser, const CFI_Atom_Info* cieInfo)
4168 {
4169 uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
4170 if ( personalityEncoding == 0x9B ) {
4171 // compiler always produces X86_64_RELOC_GOT with addend of 4 to personality function
4172 // CFISection<x86_64>::cfiParse() set targetAddress to be symbolIndex + 4 + addressInCIE
4173 uint32_t symbolIndex = cieInfo->u.cieInfo.personality.targetAddress - 4
4174 - cieInfo->address - cieInfo->u.cieInfo.personality.offsetInCFI;
4175 const macho_nlist<P>& sym = parser.symbolFromIndex(symbolIndex);
4176 const char* personalityName = parser.nameFromSymbol(sym);
4177
4178 Atom<x86_64>* cieAtom = this->findAtomByAddress(cieInfo->address);
4179 Parser<x86_64>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
4180 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, personalityName);
4181 parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, 4);
4182 parser.addFixup(src, ld::Fixup::k3of3, ld::Fixup::kindStoreX86PCRel32GOT);
4183 }
4184 else if ( personalityEncoding != 0 ) {
4185 throwf("unsupported address encoding (%02X) of personality function in CIE",
4186 personalityEncoding);
4187 }
4188 }
4189
4190 template <>
4191 void CFISection<x86>::addCiePersonalityFixups(class Parser<x86>& parser, const CFI_Atom_Info* cieInfo)
4192 {
4193 uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
4194 if ( (personalityEncoding == 0x9B) || (personalityEncoding == 0x90) ) {
4195 uint32_t offsetInCFI = cieInfo->u.cieInfo.personality.offsetInCFI;
4196 uint32_t nlpAddr = cieInfo->u.cieInfo.personality.targetAddress;
4197 Atom<x86>* cieAtom = this->findAtomByAddress(cieInfo->address);
4198 Atom<x86>* nlpAtom = parser.findAtomByAddress(nlpAddr);
4199 assert(nlpAtom->contentType() == ld::Atom::typeNonLazyPointer);
4200 Parser<x86>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
4201
4202 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, nlpAtom);
4203 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
4204 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, offsetInCFI);
4205 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
4206 }
4207 else if ( personalityEncoding != 0 ) {
4208 throwf("unsupported address encoding (%02X) of personality function in CIE", personalityEncoding);
4209 }
4210 }
4211
4212
4213
4214 #if SUPPORT_ARCH_arm64
4215 template <>
4216 void CFISection<arm64>::addCiePersonalityFixups(class Parser<arm64>& parser, const CFI_Atom_Info* cieInfo)
4217 {
4218 uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
4219 if ( personalityEncoding == 0x9B ) {
4220 // compiler always produces ARM64_RELOC_GOT r_pcrel=1 to personality function
4221 // CFISection<arm64>::cfiParse() set targetAddress to be symbolIndex + addressInCIE
4222 uint32_t symbolIndex = cieInfo->u.cieInfo.personality.targetAddress
4223 - cieInfo->address - cieInfo->u.cieInfo.personality.offsetInCFI;
4224 const macho_nlist<P>& sym = parser.symbolFromIndex(symbolIndex);
4225 const char* personalityName = parser.nameFromSymbol(sym);
4226
4227 Atom<arm64>* cieAtom = this->findAtomByAddress(cieInfo->address);
4228 Parser<arm64>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
4229 parser.addFixup(src, ld::Fixup::k1of2, ld::Fixup::kindSetTargetAddress, false, personalityName);
4230 parser.addFixup(src, ld::Fixup::k2of2, ld::Fixup::kindStoreARM64PCRelToGOT);
4231 }
4232 else if ( personalityEncoding != 0 ) {
4233 throwf("unsupported address encoding (%02X) of personality function in CIE",
4234 personalityEncoding);
4235 }
4236 }
4237 #endif
4238
4239 template <typename A>
4240 void CFISection<A>::addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo)
4241 {
4242 assert(0 && "addCiePersonalityFixups() not implemented for arch");
4243 }
4244
4245 template <typename A>
4246 void CFISection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cfis)
4247 {
4248 ld::Fixup::Kind store32 = bigEndian() ? ld::Fixup::kindStoreBigEndian32 : ld::Fixup::kindStoreLittleEndian32;
4249 ld::Fixup::Kind store64 = bigEndian() ? ld::Fixup::kindStoreBigEndian64 : ld::Fixup::kindStoreLittleEndian64;
4250
4251 // add all references for FDEs, including implicit group references
4252 const CFI_Atom_Info* end = &cfis.cfiArray[cfis.cfiCount];
4253 for(const CFI_Atom_Info* p = &cfis.cfiArray[0]; p < end; ++p) {
4254 if ( p->isCIE ) {
4255 // add reference to personality function if used
4256 if ( p->u.cieInfo.personality.targetAddress != CFI_INVALID_ADDRESS ) {
4257 this->addCiePersonalityFixups(parser, p);
4258 }
4259 }
4260 else {
4261 // find FDE Atom
4262 Atom<A>* fdeAtom = this->findAtomByAddress(p->address);
4263 // find function Atom
4264 Atom<A>* functionAtom = parser.findAtomByAddress(p->u.fdeInfo.function.targetAddress);
4265 // find CIE Atom
4266 Atom<A>* cieAtom = this->findAtomByAddress(p->u.fdeInfo.cie.targetAddress);
4267 // find LSDA Atom
4268 Atom<A>* lsdaAtom = NULL;
4269 if ( p->u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS ) {
4270 lsdaAtom = parser.findAtomByAddress(p->u.fdeInfo.lsda.targetAddress);
4271 }
4272 // add reference from FDE to CIE (always 32-bit pc-rel)
4273 typename Parser<A>::SourceLocation fdeToCieSrc(fdeAtom, p->u.fdeInfo.cie.offsetInCFI);
4274 parser.addFixup(fdeToCieSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, fdeAtom);
4275 parser.addFixup(fdeToCieSrc, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, p->u.fdeInfo.cie.offsetInCFI);
4276 parser.addFixup(fdeToCieSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
4277 parser.addFixup(fdeToCieSrc, ld::Fixup::k4of4, store32, cieAtom);
4278
4279 // add reference from FDE to function
4280 typename Parser<A>::SourceLocation fdeToFuncSrc(fdeAtom, p->u.fdeInfo.function.offsetInCFI);
4281 switch (p->u.fdeInfo.function.encodingOfTargetAddress) {
4282 case DW_EH_PE_pcrel|DW_EH_PE_ptr:
4283 if ( sizeof(typename A::P::uint_t) == 8 ) {
4284 parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
4285 parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4286 parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
4287 parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store64);
4288 break;
4289 }
4290 // else fall into 32-bit case
4291 case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
4292 parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
4293 parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4294 parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
4295 parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store32);
4296 break;
4297 default:
4298 throw "unsupported encoding in FDE of pointer to function";
4299 }
4300
4301 // add reference from FDE to LSDA
4302 typename Parser<A>::SourceLocation fdeToLsdaSrc(fdeAtom, p->u.fdeInfo.lsda.offsetInCFI);
4303 if ( lsdaAtom != NULL ) {
4304 switch (p->u.fdeInfo.lsda.encodingOfTargetAddress) {
4305 case DW_EH_PE_pcrel|DW_EH_PE_ptr:
4306 if ( sizeof(typename A::P::uint_t) == 8 ) {
4307 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
4308 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4309 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
4310 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store64);
4311 break;
4312 }
4313 // else fall into 32-bit case
4314 case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
4315 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
4316 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4317 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
4318 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store32);
4319 break;
4320 default:
4321 throw "unsupported encoding in FDE of pointer to LSDA";
4322 }
4323 }
4324
4325 // FDE is in group lead by function atom
4326 typename Parser<A>::SourceLocation fdeSrc(functionAtom,0);
4327 parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateFDE, fdeAtom);
4328
4329 // LSDA is in group lead by function atom
4330 if ( lsdaAtom != NULL ) {
4331 parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, lsdaAtom);
4332 }
4333 }
4334 }
4335 }
4336
4337
4338
4339
4340 template <typename A>
4341 const void* CFISection<A>::OAS::mappedAddress(pint_t addr)
4342 {
4343 if ( (_ehFrameStartAddr <= addr) && (addr < _ehFrameEndAddr) )
4344 return &_ehFrameContent[addr-_ehFrameStartAddr];
4345 else {
4346 // requested bytes are not in __eh_frame section
4347 // this can occur when examining the instruction bytes in the __text
4348 File<A>& file = _ehFrameSection.file();
4349 for (uint32_t i=0; i < file._sectionsArrayCount; ++i ) {
4350 const macho_section<typename A::P>* sect = file._sectionsArray[i]->machoSection();
4351 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
4352 if ( sect != NULL ) {
4353 if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
4354 return file.fileContent() + sect->offset() + addr - sect->addr();
4355 }
4356 }
4357 }
4358 throwf("__eh_frame parsing problem. Can't find target of reference to address 0x%08llX", (uint64_t)addr);
4359 }
4360 }
4361
4362
4363 template <typename A>
4364 uint64_t CFISection<A>::OAS::getULEB128(pint_t& logicalAddr, pint_t end)
4365 {
4366 uintptr_t size = (end - logicalAddr);
4367 libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
4368 libunwind::LocalAddressSpace::pint_t sladdr = laddr;
4369 uint64_t result = libunwind::LocalAddressSpace::getULEB128(laddr, laddr+size);
4370 logicalAddr += (laddr-sladdr);
4371 return result;
4372 }
4373
4374 template <typename A>
4375 int64_t CFISection<A>::OAS::getSLEB128(pint_t& logicalAddr, pint_t end)
4376 {
4377 uintptr_t size = (end - logicalAddr);
4378 libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
4379 libunwind::LocalAddressSpace::pint_t sladdr = laddr;
4380 int64_t result = libunwind::LocalAddressSpace::getSLEB128(laddr, laddr+size);
4381 logicalAddr += (laddr-sladdr);
4382 return result;
4383 }
4384
4385 template <typename A>
4386 typename A::P::uint_t CFISection<A>::OAS::getEncodedP(pint_t& addr, pint_t end, uint8_t encoding)
4387 {
4388 pint_t startAddr = addr;
4389 pint_t p = addr;
4390 pint_t result;
4391
4392 // first get value
4393 switch (encoding & 0x0F) {
4394 case DW_EH_PE_ptr:
4395 result = getP(addr);
4396 p += sizeof(pint_t);
4397 addr = (pint_t)p;
4398 break;
4399 case DW_EH_PE_uleb128:
4400 result = getULEB128(addr, end);
4401 break;
4402 case DW_EH_PE_udata2:
4403 result = get16(addr);
4404 p += 2;
4405 addr = (pint_t)p;
4406 break;
4407 case DW_EH_PE_udata4:
4408 result = get32(addr);
4409 p += 4;
4410 addr = (pint_t)p;
4411 break;
4412 case DW_EH_PE_udata8:
4413 result = get64(addr);
4414 p += 8;
4415 addr = (pint_t)p;
4416 break;
4417 case DW_EH_PE_sleb128:
4418 result = getSLEB128(addr, end);
4419 break;
4420 case DW_EH_PE_sdata2:
4421 result = (int16_t)get16(addr);
4422 p += 2;
4423 addr = (pint_t)p;
4424 break;
4425 case DW_EH_PE_sdata4:
4426 result = (int32_t)get32(addr);
4427 p += 4;
4428 addr = (pint_t)p;
4429 break;
4430 case DW_EH_PE_sdata8:
4431 result = get64(addr);
4432 p += 8;
4433 addr = (pint_t)p;
4434 break;
4435 default:
4436 throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4437 }
4438
4439 // then add relative offset
4440 switch ( encoding & 0x70 ) {
4441 case DW_EH_PE_absptr:
4442 // do nothing
4443 break;
4444 case DW_EH_PE_pcrel:
4445 result += startAddr;
4446 break;
4447 case DW_EH_PE_textrel:
4448 throw "DW_EH_PE_textrel pointer encoding not supported";
4449 break;
4450 case DW_EH_PE_datarel:
4451 throw "DW_EH_PE_datarel pointer encoding not supported";
4452 break;
4453 case DW_EH_PE_funcrel:
4454 throw "DW_EH_PE_funcrel pointer encoding not supported";
4455 break;
4456 case DW_EH_PE_aligned:
4457 throw "DW_EH_PE_aligned pointer encoding not supported";
4458 break;
4459 default:
4460 throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4461 break;
4462 }
4463
4464 // Note: DW_EH_PE_indirect is only used in CIEs to refernce the personality pointer
4465 // When parsing .o files that pointer contains zero, so we don't to return that.
4466 // Instead we skip the dereference and return the address of the pointer.
4467 // if ( encoding & DW_EH_PE_indirect )
4468 // result = getP(result);
4469
4470 return result;
4471 }
4472
4473 template <>
4474 const char* CUSection<x86_64>::personalityName(class Parser<x86_64>& parser, const macho_relocation_info<x86_64::P>* reloc)
4475 {
4476 if ( reloc->r_extern() ) {
4477 assert((reloc->r_type() == X86_64_RELOC_UNSIGNED) && "wrong reloc type on personality column in __compact_unwind section");
4478 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4479 return parser.nameFromSymbol(sym);
4480 }
4481 else {
4482 const pint_t* content = (pint_t*)(this->file().fileContent() + this->_machOSection->offset() + reloc->r_address());
4483 pint_t personalityAddr = *content;
4484 Section<x86_64>* personalitySection = parser.sectionForAddress(personalityAddr);
4485 assert((personalitySection->type() == ld::Section::typeCode) && "personality column in __compact_unwind section is not pointer to function");
4486 // atoms may not be constructed yet, so scan symbol table for labels
4487 const char* name = parser.scanSymbolTableForAddress(personalityAddr);
4488 return name;
4489 }
4490 }
4491
4492 template <>
4493 const char* CUSection<x86>::personalityName(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
4494 {
4495 if ( reloc->r_extern() ) {
4496 assert((reloc->r_type() == GENERIC_RELOC_VANILLA) && "wrong reloc type on personality column in __compact_unwind section");
4497 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4498 return parser.nameFromSymbol(sym);
4499 }
4500 else {
4501 // support __LD, __compact_unwind personality entries which are pointer to personality non-lazy pointer
4502 const pint_t* content = (pint_t*)(this->file().fileContent() + this->_machOSection->offset() + reloc->r_address());
4503 pint_t nlPointerAddr = *content;
4504 Section<x86>* nlSection = parser.sectionForAddress(nlPointerAddr);
4505 if ( nlSection->type() == ld::Section::typeCode ) {
4506 // personality function is defined in this .o file, so this is a direct reference to it
4507 // atoms may not be constructed yet, so scan symbol table for labels
4508 const char* name = parser.scanSymbolTableForAddress(nlPointerAddr);
4509 return name;
4510 }
4511 else {
4512 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(nlPointerAddr, nlSection->machoSection());
4513 const macho_nlist<P>& nlSymbol = parser.symbolFromIndex(symIndex);
4514 return parser.nameFromSymbol(nlSymbol);
4515 }
4516 }
4517 }
4518
4519 #if SUPPORT_ARCH_arm64
4520 template <>
4521 const char* CUSection<arm64>::personalityName(class Parser<arm64>& parser, const macho_relocation_info<arm64::P>* reloc)
4522 {
4523 if ( reloc->r_extern() ) {
4524 assert((reloc->r_type() == ARM64_RELOC_UNSIGNED) && "wrong reloc type on personality column in __compact_unwind section");
4525 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4526 return parser.nameFromSymbol(sym);
4527 }
4528 else {
4529 const pint_t* content = (pint_t*)(this->file().fileContent() + this->_machOSection->offset() + reloc->r_address());
4530 pint_t personalityAddr = *content;
4531 Section<arm64>* personalitySection = parser.sectionForAddress(personalityAddr);
4532 assert((personalitySection->type() == ld::Section::typeCode) && "personality column in __compact_unwind section is not pointer to function");
4533 // atoms may not be constructed yet, so scan symbol table for labels
4534 const char* name = parser.scanSymbolTableForAddress(personalityAddr);
4535 return name;
4536 }
4537 }
4538 #endif
4539
4540 template <typename A>
4541 const char* CUSection<A>::personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
4542 {
4543 return NULL;
4544 }
4545
4546 template <>
4547 bool CUSection<x86>::encodingMeansUseDwarf(compact_unwind_encoding_t enc)
4548 {
4549 return ((enc & UNWIND_X86_MODE_MASK) == UNWIND_X86_MODE_DWARF);
4550 }
4551
4552 template <>
4553 bool CUSection<x86_64>::encodingMeansUseDwarf(compact_unwind_encoding_t enc)
4554 {
4555 return ((enc & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_DWARF);
4556 }
4557
4558 #if SUPPORT_ARCH_arm_any
4559 template <>
4560 bool CUSection<arm>::encodingMeansUseDwarf(compact_unwind_encoding_t enc)
4561 {
4562 return false;
4563 }
4564 #endif
4565
4566 #if SUPPORT_ARCH_arm64
4567 template <>
4568 bool CUSection<arm64>::encodingMeansUseDwarf(compact_unwind_encoding_t enc)
4569 {
4570 return ((enc & UNWIND_ARM64_MODE_MASK) == UNWIND_ARM64_MODE_DWARF);
4571 }
4572 #endif
4573
4574 template <typename A>
4575 int CUSection<A>::infoSorter(const void* l, const void* r)
4576 {
4577 // sort references by symbol index, then address
4578 const Info* left = (Info*)l;
4579 const Info* right = (Info*)r;
4580 if ( left->functionSymbolIndex == right->functionSymbolIndex )
4581 return (left->functionStartAddress - right->functionStartAddress);
4582 else
4583 return (left->functionSymbolIndex - right->functionSymbolIndex);
4584 }
4585
4586 template <typename A>
4587 void CUSection<A>::parse(class Parser<A>& parser, uint32_t cnt, Info array[])
4588 {
4589 // walk section content and copy to Info array
4590 const macho_compact_unwind_entry<P>* const entries = (macho_compact_unwind_entry<P>*)(this->file().fileContent() + this->_machOSection->offset());
4591 for (uint32_t i=0; i < cnt; ++i) {
4592 Info* info = &array[i];
4593 const macho_compact_unwind_entry<P>* entry = &entries[i];
4594 info->functionStartAddress = entry->codeStart();
4595 info->functionSymbolIndex = 0xFFFFFFFF;
4596 info->rangeLength = entry->codeLen();
4597 info->compactUnwindInfo = entry->compactUnwindInfo();
4598 info->personality = NULL;
4599 info->lsdaAddress = entry->lsda();
4600 info->function = NULL;
4601 info->lsda = NULL;
4602 if ( (info->compactUnwindInfo & UNWIND_PERSONALITY_MASK) != 0 )
4603 warning("no bits should be set in UNWIND_PERSONALITY_MASK of compact unwind encoding in __LD,__compact_unwind section");
4604 if ( info->lsdaAddress != 0 ) {
4605 info->compactUnwindInfo |= UNWIND_HAS_LSDA;
4606 }
4607 }
4608
4609 // scan relocs, extern relocs are needed for personality references (possibly for function/lsda refs??)
4610 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(this->file().fileContent() + this->_machOSection->reloff());
4611 const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
4612 for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
4613 if ( reloc->r_extern() ) {
4614 // only expect external relocs on some colummns
4615 if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::personalityFieldOffset() ) {
4616 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4617 array[entryIndex].personality = this->personalityName(parser, reloc);
4618 }
4619 else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::lsdaFieldOffset() ) {
4620 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4621 const macho_nlist<P>& lsdaSym = parser.symbolFromIndex(reloc->r_symbolnum());
4622 if ( (lsdaSym.n_type() & N_TYPE) == N_SECT )
4623 array[entryIndex].lsdaAddress = lsdaSym.n_value();
4624 else
4625 warning("unexpected extern relocation to lsda in __compact_unwind section");
4626 }
4627 else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::codeStartFieldOffset() ) {
4628 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4629 array[entryIndex].functionSymbolIndex = reloc->r_symbolnum();
4630 array[entryIndex].functionStartAddress += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4631 }
4632 else {
4633 warning("unexpected extern relocation in __compact_unwind section");
4634 }
4635 }
4636 else {
4637 if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::personalityFieldOffset() ) {
4638 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4639 array[entryIndex].personality = this->personalityName(parser, reloc);
4640 }
4641 }
4642 }
4643
4644 // sort array by function start address so unwind infos will be contiguous for a given function
4645 ::qsort(array, cnt, sizeof(Info), infoSorter);
4646 }
4647
4648 template <typename A>
4649 uint32_t CUSection<A>::count()
4650 {
4651 const macho_section<P>* machoSect = this->machoSection();
4652 if ( (machoSect->size() % sizeof(macho_compact_unwind_entry<P>)) != 0 )
4653 throw "malformed __LD,__compact_unwind section, bad length";
4654
4655 return machoSect->size() / sizeof(macho_compact_unwind_entry<P>);
4656 }
4657
4658 template <typename A>
4659 void CUSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cus)
4660 {
4661 Info* const arrayStart = cus.cuArray;
4662 Info* const arrayEnd = &cus.cuArray[cus.cuCount];
4663 for (Info* info=arrayStart; info < arrayEnd; ++info) {
4664 // find function atom from address
4665 info->function = parser.findAtomByAddress(info->functionStartAddress);
4666 // find lsda atom from address
4667 if ( info->lsdaAddress != 0 ) {
4668 info->lsda = parser.findAtomByAddress(info->lsdaAddress);
4669 // add lsda subordinate
4670 typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4671 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, info->lsda);
4672 }
4673 if ( info->personality != NULL ) {
4674 // add personality subordinate
4675 typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4676 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinatePersonality, false, info->personality);
4677 }
4678 }
4679
4680 }
4681
4682 template <typename A>
4683 SymboledSection<A>::SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
4684 : Section<A>(f, s), _type(ld::Atom::typeUnclassified)
4685 {
4686 switch ( s->flags() & SECTION_TYPE ) {
4687 case S_ZEROFILL:
4688 _type = ld::Atom::typeZeroFill;
4689 break;
4690 case S_MOD_INIT_FUNC_POINTERS:
4691 _type = ld::Atom::typeInitializerPointers;
4692 break;
4693 case S_MOD_TERM_FUNC_POINTERS:
4694 _type = ld::Atom::typeTerminatorPointers;
4695 break;
4696 case S_THREAD_LOCAL_VARIABLES:
4697 _type = ld::Atom::typeTLV;
4698 break;
4699 case S_THREAD_LOCAL_ZEROFILL:
4700 _type = ld::Atom::typeTLVZeroFill;
4701 break;
4702 case S_THREAD_LOCAL_REGULAR:
4703 _type = ld::Atom::typeTLVInitialValue;
4704 break;
4705 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
4706 _type = ld::Atom::typeTLVInitializerPointers;
4707 break;
4708 case S_REGULAR:
4709 if ( strncmp(s->sectname(), "__gcc_except_tab", 16) == 0 )
4710 _type = ld::Atom::typeLSDA;
4711 else if ( this->type() == ld::Section::typeInitializerPointers )
4712 _type = ld::Atom::typeInitializerPointers;
4713 break;
4714 }
4715 }
4716
4717
4718 template <typename A>
4719 bool SymboledSection<A>::dontDeadStrip()
4720 {
4721 switch ( _type ) {
4722 case ld::Atom::typeInitializerPointers:
4723 case ld::Atom::typeTerminatorPointers:
4724 return true;
4725 default:
4726 // model an object file without MH_SUBSECTIONS_VIA_SYMBOLS as one in which nothing can be dead stripped
4727 if ( ! this->_file.canScatterAtoms() )
4728 return true;
4729 // call inherited
4730 return Section<A>::dontDeadStrip();
4731 }
4732 return false;
4733 }
4734
4735
4736 template <typename A>
4737 uint32_t SymboledSection<A>::computeAtomCount(class Parser<A>& parser,
4738 struct Parser<A>::LabelAndCFIBreakIterator& it,
4739 const struct Parser<A>::CFI_CU_InfoArrays&)
4740 {
4741 const pint_t startAddr = this->_machOSection->addr();
4742 const pint_t endAddr = startAddr + this->_machOSection->size();
4743 const uint32_t sectNum = this->sectionNum(parser);
4744
4745 uint32_t count = 0;
4746 pint_t addr;
4747 pint_t size;
4748 const macho_nlist<P>* sym;
4749 while ( it.next(parser, *this, sectNum, startAddr, endAddr, &addr, &size, &sym) ) {
4750 ++count;
4751 }
4752 //fprintf(stderr, "computeAtomCount(%s,%s) => %d\n", this->segmentName(), this->sectionName(), count);
4753 return count;
4754 }
4755
4756 template <typename A>
4757 uint32_t SymboledSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4758 struct Parser<A>::LabelAndCFIBreakIterator& it,
4759 const struct Parser<A>::CFI_CU_InfoArrays&)
4760 {
4761 this->_beginAtoms = (Atom<A>*)p;
4762
4763 //fprintf(stderr, "SymboledSection::appendAtoms() in section %s\n", this->_machOSection->sectname());
4764 const pint_t startAddr = this->_machOSection->addr();
4765 const pint_t endAddr = startAddr + this->_machOSection->size();
4766 const uint32_t sectNum = this->sectionNum(parser);
4767
4768 uint32_t count = 0;
4769 pint_t addr;
4770 pint_t size;
4771 const macho_nlist<P>* label;
4772 while ( it.next(parser, *this, sectNum, startAddr, endAddr, &addr, &size, &label) ) {
4773 Atom<A>* allocatedSpace = (Atom<A>*)p;
4774 // is break because of label or CFI?
4775 if ( label != NULL ) {
4776 // The size is computed based on the address of the next label (or the end of the section for the last label)
4777 // If there are two labels at the same address, we want them one to be an alias of the other.
4778 // If the label is at the end of a section, it is has zero size, but is not an alias
4779 const bool isAlias = ( (size == 0) && (addr < endAddr) );
4780 new (allocatedSpace) Atom<A>(*this, parser, *label, size, isAlias);
4781 if ( isAlias )
4782 this->_hasAliases = true;
4783 }
4784 else {
4785 ld::Atom::SymbolTableInclusion inclusion = ld::Atom::symbolTableNotIn;
4786 ld::Atom::ContentType ctype = this->contentType();
4787 if ( ctype == ld::Atom::typeLSDA )
4788 inclusion = ld::Atom::symbolTableInWithRandomAutoStripLabel;
4789 new (allocatedSpace) Atom<A>(*this, "anon", addr, size, ld::Atom::definitionRegular, ld::Atom::combineNever,
4790 ld::Atom::scopeTranslationUnit, ctype, inclusion,
4791 this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
4792 }
4793 p += sizeof(Atom<A>);
4794 ++count;
4795 }
4796
4797 this->_endAtoms = (Atom<A>*)p;
4798 return count;
4799 }
4800
4801
4802 template <>
4803 ld::Atom::SymbolTableInclusion ImplicitSizeSection<arm64>::symbolTableInclusion()
4804 {
4805 return ld::Atom::symbolTableInWithRandomAutoStripLabel;
4806 }
4807
4808 template <typename A>
4809 ld::Atom::SymbolTableInclusion ImplicitSizeSection<A>::symbolTableInclusion()
4810 {
4811 return ld::Atom::symbolTableNotIn;
4812 }
4813
4814
4815 template <typename A>
4816 uint32_t ImplicitSizeSection<A>::computeAtomCount(class Parser<A>& parser,
4817 struct Parser<A>::LabelAndCFIBreakIterator& it,
4818 const struct Parser<A>::CFI_CU_InfoArrays&)
4819 {
4820 uint32_t count = 0;
4821 const macho_section<P>* sect = this->machoSection();
4822 const pint_t startAddr = sect->addr();
4823 const pint_t endAddr = startAddr + sect->size();
4824 for (pint_t addr = startAddr; addr < endAddr; addr += elementSizeAtAddress(addr) ) {
4825 if ( useElementAt(parser, it, addr) )
4826 ++count;
4827 }
4828 if ( it.fileHasOverlappingSymbols && (sect->size() != 0) && (this->combine(parser, startAddr) == ld::Atom::combineByNameAndContent) ) {
4829 // if there are multiple labels in this section for the same address, then clone them into multi atoms
4830 pint_t prevSymbolAddr = (pint_t)(-1);
4831 uint8_t prevSymbolSectNum = 0;
4832 bool prevIgnore = false;
4833 for(uint32_t i=0; i < it.sortedSymbolCount; ++i) {
4834 const macho_nlist<P>& sym = parser.symbolFromIndex(it.sortedSymbolIndexes[i]);
4835 const pint_t symbolAddr = sym.n_value();
4836 const uint8_t symbolSectNum = sym.n_sect();
4837 const bool ignore = this->ignoreLabel(parser.nameFromSymbol(sym));
4838 if ( !ignore && !prevIgnore && (symbolAddr == prevSymbolAddr) && (prevSymbolSectNum == symbolSectNum) && (symbolSectNum == this->sectionNum(parser)) ) {
4839 ++count;
4840 }
4841 prevSymbolAddr = symbolAddr;
4842 prevSymbolSectNum = symbolSectNum;
4843 prevIgnore = ignore;
4844 }
4845 }
4846 return count;
4847 }
4848
4849 template <typename A>
4850 uint32_t ImplicitSizeSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4851 struct Parser<A>::LabelAndCFIBreakIterator& it,
4852 const struct Parser<A>::CFI_CU_InfoArrays&)
4853 {
4854 this->_beginAtoms = (Atom<A>*)p;
4855
4856 const macho_section<P>* sect = this->machoSection();
4857 const pint_t startAddr = sect->addr();
4858 const pint_t endAddr = startAddr + sect->size();
4859 const uint32_t sectNum = this->sectionNum(parser);
4860 //fprintf(stderr, "ImplicitSizeSection::appendAtoms() in section %s\n", sect->sectname());
4861 uint32_t count = 0;
4862 pint_t foundAddr;
4863 pint_t size;
4864 const macho_nlist<P>* foundLabel;
4865 Atom<A>* allocatedSpace;
4866 while ( it.next(parser, *this, sectNum, startAddr, endAddr, &foundAddr, &size, &foundLabel) ) {
4867 if ( foundLabel != NULL ) {
4868 bool skip = false;
4869 pint_t labeledAtomSize = this->elementSizeAtAddress(foundAddr);
4870 allocatedSpace = (Atom<A>*)p;
4871 if ( this->ignoreLabel(parser.nameFromSymbol(*foundLabel)) ) {
4872 if ( size == 0 ) {
4873 // <rdar://problem/10018737>
4874 // a size of zero means there is another label at same location
4875 // and we are supposed to ignore this label
4876 skip = true;
4877 }
4878 else {
4879 //fprintf(stderr, " 0x%08llX make annon, size=%lld\n", (uint64_t)foundAddr, (uint64_t)size);
4880 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, foundAddr), foundAddr,
4881 this->elementSizeAtAddress(foundAddr), this->definition(),
4882 this->combine(parser, foundAddr), this->scopeAtAddress(parser, foundAddr),
4883 this->contentType(), this->symbolTableInclusion(),
4884 this->dontDeadStrip(), false, false, this->alignmentForAddress(foundAddr));
4885 }
4886 }
4887 else {
4888 // make named atom for label
4889 //fprintf(stderr, " 0x%08llX make labeled\n", (uint64_t)foundAddr);
4890 new (allocatedSpace) Atom<A>(*this, parser, *foundLabel, labeledAtomSize);
4891 }
4892 if ( !skip ) {
4893 ++count;
4894 p += sizeof(Atom<A>);
4895 foundAddr += labeledAtomSize;
4896 size -= labeledAtomSize;
4897 }
4898 }
4899 // some number of anonymous atoms
4900 for (pint_t addr = foundAddr; addr < (foundAddr+size); addr += elementSizeAtAddress(addr) ) {
4901 // make anon atoms for area before label
4902 if ( this->useElementAt(parser, it, addr) ) {
4903 //fprintf(stderr, " 0x%08llX make annon, size=%lld\n", (uint64_t)addr, (uint64_t)elementSizeAtAddress(addr));
4904 allocatedSpace = (Atom<A>*)p;
4905 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, addr), addr, this->elementSizeAtAddress(addr),
4906 this->definition(), this->combine(parser, addr), this->scopeAtAddress(parser, addr),
4907 this->contentType(), this->symbolTableInclusion(),
4908 this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
4909 ++count;
4910 p += sizeof(Atom<A>);
4911 }
4912 }
4913 }
4914
4915 this->_endAtoms = (Atom<A>*)p;
4916
4917 return count;
4918 }
4919
4920
4921 template <typename A>
4922 unsigned long Literal4Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4923 {
4924 const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
4925 return *literalContent;
4926 }
4927
4928 template <typename A>
4929 bool Literal4Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4930 const ld::IndirectBindingTable& ind) const
4931 {
4932 assert(this->type() == rhs.section().type());
4933 const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
4934
4935 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4936 assert(rhsAtom != NULL);
4937 if ( rhsAtom != NULL ) {
4938 const uint32_t* rhsLiteralContent = (uint32_t*)rhsAtom->contentPointer();
4939 return (*literalContent == *rhsLiteralContent);
4940 }
4941 return false;
4942 }
4943
4944
4945 template <typename A>
4946 unsigned long Literal8Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4947 {
4948 #if __LP64__
4949 const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4950 return *literalContent;
4951 #else
4952 unsigned long hash = 5381;
4953 const uint8_t* byteContent = atom->contentPointer();
4954 for (int i=0; i < 8; ++i) {
4955 hash = hash * 33 + byteContent[i];
4956 }
4957 return hash;
4958 #endif
4959 }
4960
4961 template <typename A>
4962 bool Literal8Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4963 const ld::IndirectBindingTable& ind) const
4964 {
4965 if ( rhs.section().type() != ld::Section::typeLiteral8 )
4966 return false;
4967 assert(this->type() == rhs.section().type());
4968 const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4969
4970 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4971 assert(rhsAtom != NULL);
4972 if ( rhsAtom != NULL ) {
4973 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
4974 return (*literalContent == *rhsLiteralContent);
4975 }
4976 return false;
4977 }
4978
4979
4980 template <typename A>
4981 unsigned long Literal16Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4982 {
4983 unsigned long hash = 5381;
4984 const uint8_t* byteContent = atom->contentPointer();
4985 for (int i=0; i < 16; ++i) {
4986 hash = hash * 33 + byteContent[i];
4987 }
4988 return hash;
4989 }
4990
4991 template <typename A>
4992 bool Literal16Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4993 const ld::IndirectBindingTable& ind) const
4994 {
4995 if ( rhs.section().type() != ld::Section::typeLiteral16 )
4996 return false;
4997 assert(this->type() == rhs.section().type());
4998 const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4999
5000 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5001 assert(rhsAtom != NULL);
5002 if ( rhsAtom != NULL ) {
5003 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
5004 return ((literalContent[0] == rhsLiteralContent[0]) && (literalContent[1] == rhsLiteralContent[1]));
5005 }
5006 return false;
5007 }
5008
5009
5010
5011 template <typename A>
5012 typename A::P::uint_t CStringSection<A>::elementSizeAtAddress(pint_t addr)
5013 {
5014 const macho_section<P>* sect = this->machoSection();
5015 const char* stringContent = (char*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
5016 return strlen(stringContent) + 1;
5017 }
5018
5019 template <typename A>
5020 bool CStringSection<A>::useElementAt(Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
5021 {
5022 return true;
5023 }
5024
5025 template <typename A>
5026 bool CStringSection<A>::ignoreLabel(const char* label) const
5027 {
5028 return (label[0] == 'L') || (label[0] == 'l');
5029 }
5030
5031
5032 template <typename A>
5033 Atom<A>* CStringSection<A>::findAtomByAddress(pint_t addr)
5034 {
5035 Atom<A>* result = this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms);
5036 return result;
5037 }
5038
5039 template <typename A>
5040 unsigned long CStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5041 {
5042 unsigned long hash = 5381;
5043 const char* stringContent = (char*)atom->contentPointer();
5044 for (const char* s = stringContent; *s != '\0'; ++s) {
5045 hash = hash * 33 + *s;
5046 }
5047 return hash;
5048 }
5049
5050
5051 template <typename A>
5052 bool CStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5053 const ld::IndirectBindingTable& ind) const
5054 {
5055 if ( rhs.section().type() != ld::Section::typeCString )
5056 return false;
5057 assert(this->type() == rhs.section().type());
5058 assert(strcmp(this->sectionName(), rhs.section().sectionName())== 0);
5059 assert(strcmp(this->segmentName(), rhs.section().segmentName())== 0);
5060 const char* stringContent = (char*)atom->contentPointer();
5061
5062 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5063 assert(rhsAtom != NULL);
5064 if ( rhsAtom != NULL ) {
5065 if ( atom->_size != rhsAtom->_size )
5066 return false;
5067 const char* rhsStringContent = (char*)rhsAtom->contentPointer();
5068 return (strcmp(stringContent, rhsStringContent) == 0);
5069 }
5070 return false;
5071 }
5072
5073
5074 template <>
5075 ld::Fixup::Kind NonLazyPointerSection<x86>::fixupKind()
5076 {
5077 return ld::Fixup::kindStoreLittleEndian32;
5078 }
5079
5080 template <>
5081 ld::Fixup::Kind NonLazyPointerSection<arm>::fixupKind()
5082 {
5083 return ld::Fixup::kindStoreLittleEndian32;
5084 }
5085
5086 template <>
5087 ld::Fixup::Kind NonLazyPointerSection<arm64>::fixupKind()
5088 {
5089 return ld::Fixup::kindStoreLittleEndian64;
5090 }
5091
5092
5093 template <>
5094 void NonLazyPointerSection<x86_64>::makeFixups(class Parser<x86_64>& parser, const struct Parser<x86_64>::CFI_CU_InfoArrays&)
5095 {
5096 assert(0 && "x86_64 should not have non-lazy-pointer sections in .o files");
5097 }
5098
5099 template <typename A>
5100 void NonLazyPointerSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
5101 {
5102 // add references for each NLP atom based on indirect symbol table
5103 const macho_section<P>* sect = this->machoSection();
5104 const pint_t endAddr = sect->addr() + sect->size();
5105 for( pint_t addr = sect->addr(); addr < endAddr; addr += sizeof(pint_t)) {
5106 typename Parser<A>::SourceLocation src;
5107 typename Parser<A>::TargetDesc target;
5108 src.atom = this->findAtomByAddress(addr);
5109 src.offsetInAtom = 0;
5110 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
5111 target.atom = NULL;
5112 target.name = NULL;
5113 target.weakImport = false;
5114 target.addend = 0;
5115 if ( symIndex == INDIRECT_SYMBOL_LOCAL ) {
5116 // use direct reference for local symbols
5117 const pint_t* nlpContent = (pint_t*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
5118 pint_t targetAddr = P::getP(*nlpContent);
5119 target.atom = parser.findAtomByAddress(targetAddr);
5120 target.weakImport = false;
5121 target.addend = (targetAddr - target.atom->objectAddress());
5122 // <rdar://problem/8385011> if pointer to thumb function, mask of thumb bit (not an addend of +1)
5123 if ( target.atom->isThumb() )
5124 target.addend &= (-2);
5125 assert(src.atom->combine() == ld::Atom::combineNever);
5126 }
5127 else {
5128 const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
5129 // use direct reference for local symbols
5130 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
5131 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5132 assert(src.atom->combine() == ld::Atom::combineNever);
5133 }
5134 else {
5135 target.name = parser.nameFromSymbol(sym);
5136 target.weakImport = parser.weakImportFromSymbol(sym);
5137 assert(src.atom->combine() == ld::Atom::combineByNameAndReferences);
5138 }
5139 }
5140 parser.addFixups(src, this->fixupKind(), target);
5141 }
5142 }
5143
5144 template <typename A>
5145 ld::Atom::Combine NonLazyPointerSection<A>::combine(Parser<A>& parser, pint_t addr)
5146 {
5147 const macho_section<P>* sect = this->machoSection();
5148 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
5149 if ( symIndex == INDIRECT_SYMBOL_LOCAL)
5150 return ld::Atom::combineNever;
5151
5152 // don't coalesce non-lazy-pointers to local symbols
5153 const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
5154 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) )
5155 return ld::Atom::combineNever;
5156
5157 return ld::Atom::combineByNameAndReferences;
5158 }
5159
5160 template <typename A>
5161 const char* NonLazyPointerSection<A>::targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind)
5162 {
5163 assert(atom->combine() == ld::Atom::combineByNameAndReferences);
5164 assert(atom->fixupCount() == 1);
5165 ld::Fixup::iterator fit = atom->fixupsBegin();
5166 const char* name = NULL;
5167 switch ( fit->binding ) {
5168 case ld::Fixup::bindingByNameUnbound:
5169 name = fit->u.name;
5170 break;
5171 case ld::Fixup::bindingByContentBound:
5172 name = fit->u.target->name();
5173 break;
5174 case ld::Fixup::bindingsIndirectlyBound:
5175 name = ind.indirectName(fit->u.bindingIndex);
5176 break;
5177 default:
5178 assert(0);
5179 }
5180 assert(name != NULL);
5181 return name;
5182 }
5183
5184 template <typename A>
5185 unsigned long NonLazyPointerSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5186 {
5187 assert(atom->combine() == ld::Atom::combineByNameAndReferences);
5188 unsigned long hash = 9508;
5189 for (const char* s = this->targetName(atom, ind); *s != '\0'; ++s) {
5190 hash = hash * 33 + *s;
5191 }
5192 return hash;
5193 }
5194
5195 template <typename A>
5196 bool NonLazyPointerSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5197 const ld::IndirectBindingTable& indirectBindingTable) const
5198 {
5199 if ( rhs.section().type() != ld::Section::typeNonLazyPointer )
5200 return false;
5201 assert(this->type() == rhs.section().type());
5202 // there can be many non-lazy pointer in different section names
5203 // we only want to coalesce in same section name
5204 if ( *this != rhs.section() )
5205 return false;
5206 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5207 assert(rhsAtom != NULL);
5208 const char* thisName = this->targetName(atom, indirectBindingTable);
5209 const char* rhsName = this->targetName(rhsAtom, indirectBindingTable);
5210 return (strcmp(thisName, rhsName) == 0);
5211 }
5212
5213 template <typename A>
5214 ld::Atom::Scope NonLazyPointerSection<A>::scopeAtAddress(Parser<A>& parser, pint_t addr)
5215 {
5216 const macho_section<P>* sect = this->machoSection();
5217 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
5218 if ( symIndex == INDIRECT_SYMBOL_LOCAL)
5219 return ld::Atom::scopeTranslationUnit;
5220 else
5221 return ld::Atom::scopeLinkageUnit;
5222 }
5223
5224
5225 template <typename A>
5226 const uint8_t* CFStringSection<A>::targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
5227 ContentType* ct, unsigned int* count)
5228 {
5229 *ct = contentUnknown;
5230 for (ld::Fixup::iterator fit=atom->fixupsBegin(), end=atom->fixupsEnd(); fit != end; ++fit) {
5231 const ld::Atom* targetAtom = NULL;
5232 switch ( fit->binding ) {
5233 case ld::Fixup::bindingByNameUnbound:
5234 // ignore reference to ___CFConstantStringClassReference
5235 // we are just looking for reference to backing string data
5236 assert(fit->offsetInAtom == 0);
5237 assert(strcmp(fit->u.name, "___CFConstantStringClassReference") == 0);
5238 break;
5239 case ld::Fixup::bindingDirectlyBound:
5240 case ld::Fixup::bindingByContentBound:
5241 targetAtom = fit->u.target;
5242 break;
5243 case ld::Fixup::bindingsIndirectlyBound:
5244 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5245 break;
5246 default:
5247 assert(0 && "bad binding type");
5248 }
5249 assert(targetAtom != NULL);
5250 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5251 if ( targetAtom->section().type() == ld::Section::typeCString ) {
5252 *ct = contentUTF8;
5253 *count = targetAtom->size();
5254 }
5255 else if ( targetAtom->section().type() == ld::Section::typeUTF16Strings ) {
5256 *ct = contentUTF16;
5257 *count = (targetAtom->size()+1)/2; // round up incase of buggy compiler that has only one trailing zero byte
5258 }
5259 else {
5260 *ct = contentUnknown;
5261 *count = 0;
5262 return NULL;
5263 }
5264 return target->contentPointer();
5265 }
5266 assert(0);
5267 return NULL;
5268 }
5269
5270 template <typename A>
5271 unsigned long CFStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5272 {
5273 // base hash of CFString on hash of cstring it wraps
5274 ContentType cType;
5275 unsigned long hash;
5276 unsigned int charCount;
5277 const uint8_t* content = this->targetContent(atom, ind, &cType, &charCount);
5278 switch ( cType ) {
5279 case contentUTF8:
5280 hash = 9408;
5281 for (const char* s = (char*)content; *s != '\0'; ++s) {
5282 hash = hash * 33 + *s;
5283 }
5284 return hash;
5285 case contentUTF16:
5286 hash = 407955;
5287 --charCount; // don't add last 0x0000 to hash because some buggy compilers only have trailing single byte
5288 for (const uint16_t* s = (uint16_t*)content; charCount > 0; ++s, --charCount) {
5289 hash = hash * 1025 + *s;
5290 }
5291 return hash;
5292 case contentUnknown:
5293 // <rdar://problem/14134211> For malformed CFStrings, hash to address of atom so they have unique hashes
5294 return ULONG_MAX - (unsigned long)(atom);
5295 }
5296 return 0;
5297 }
5298
5299
5300 template <typename A>
5301 bool CFStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5302 const ld::IndirectBindingTable& indirectBindingTable) const
5303 {
5304 if ( atom == &rhs )
5305 return true;
5306 if ( rhs.section().type() != ld::Section::typeCFString)
5307 return false;
5308 assert(this->type() == rhs.section().type());
5309 assert(strcmp(this->sectionName(), "__cfstring") == 0);
5310
5311 ContentType thisType;
5312 unsigned int charCount;
5313 const uint8_t* cstringContent = this->targetContent(atom, indirectBindingTable, &thisType, &charCount);
5314 ContentType rhsType;
5315 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5316 assert(rhsAtom != NULL);
5317 unsigned int rhsCharCount;
5318 const uint8_t* rhsStringContent = this->targetContent(rhsAtom, indirectBindingTable, &rhsType, &rhsCharCount);
5319
5320 if ( thisType != rhsType )
5321 return false;
5322
5323 if ( thisType == contentUnknown )
5324 return false;
5325
5326 if ( rhsType == contentUnknown )
5327 return false;
5328
5329 // no need to compare content of pointers are already the same
5330 if ( cstringContent == rhsStringContent )
5331 return true;
5332
5333 // no need to compare content if size is different
5334 if ( charCount != rhsCharCount )
5335 return false;
5336
5337 switch ( thisType ) {
5338 case contentUTF8:
5339 return (strcmp((char*)cstringContent, (char*)rhsStringContent) == 0);
5340 case contentUTF16:
5341 {
5342 const uint16_t* cstringContent16 = (uint16_t*)cstringContent;
5343 const uint16_t* rhsStringContent16 = (uint16_t*)rhsStringContent;
5344 for (unsigned int i = 0; i < charCount; ++i) {
5345 if ( cstringContent16[i] != rhsStringContent16[i] )
5346 return false;
5347 }
5348 return true;
5349 }
5350 case contentUnknown:
5351 return false;
5352 }
5353 return false;
5354 }
5355
5356
5357 template <typename A>
5358 typename A::P::uint_t ObjC1ClassSection<A>::elementSizeAtAddress(pint_t addr)
5359 {
5360 // nominal size for each class is 48 bytes, but sometimes the compiler
5361 // over aligns and there is padding after class data
5362 const macho_section<P>* sct = this->machoSection();
5363 uint32_t align = 1 << sct->align();
5364 uint32_t size = ((12 * sizeof(pint_t)) + align-1) & (-align);
5365 return size;
5366 }
5367
5368 template <typename A>
5369 const char* ObjC1ClassSection<A>::unlabeledAtomName(Parser<A>& parser, pint_t addr)
5370 {
5371 // 8-bytes into class object is pointer to class name
5372 const macho_section<P>* sct = this->machoSection();
5373 uint32_t classObjcFileOffset = sct->offset() - sct->addr() + addr;
5374 const uint8_t* mappedFileContent = this->file().fileContent();
5375 pint_t nameAddr = P::getP(*((pint_t*)(mappedFileContent+classObjcFileOffset+2*sizeof(pint_t))));
5376
5377 // find section containing string address to get string bytes
5378 const macho_section<P>* const sections = parser.firstMachOSection();
5379 const uint32_t sectionCount = parser.machOSectionCount();
5380 for (uint32_t i=0; i < sectionCount; ++i) {
5381 const macho_section<P>* aSect = &sections[i];
5382 if ( (aSect->addr() <= nameAddr) && (nameAddr < (aSect->addr()+aSect->size())) ) {
5383 assert((aSect->flags() & SECTION_TYPE) == S_CSTRING_LITERALS);
5384 uint32_t nameFileOffset = aSect->offset() - aSect->addr() + nameAddr;
5385 const char* name = (char*)mappedFileContent + nameFileOffset;
5386 // spin through symbol table to find absolute symbol corresponding to this class
5387 for (uint32_t s=0; s < parser.symbolCount(); ++s) {
5388 const macho_nlist<P>& sym = parser.symbolFromIndex(s);
5389 if ( (sym.n_type() & N_TYPE) != N_ABS )
5390 continue;
5391 const char* absName = parser.nameFromSymbol(sym);
5392 if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
5393 if ( strcmp(&absName[17], name) == 0 )
5394 return absName;
5395 }
5396 }
5397 assert(0 && "obj class name not found in symbol table");
5398 }
5399 }
5400 assert(0 && "obj class name not found");
5401 return "unknown objc class";
5402 }
5403
5404
5405 template <typename A>
5406 const char* ObjC2ClassRefsSection<A>::targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5407 {
5408 assert(atom->fixupCount() == 1);
5409 ld::Fixup::iterator fit = atom->fixupsBegin();
5410 const char* className = NULL;
5411 switch ( fit->binding ) {
5412 case ld::Fixup::bindingByNameUnbound:
5413 className = fit->u.name;
5414 break;
5415 case ld::Fixup::bindingDirectlyBound:
5416 case ld::Fixup::bindingByContentBound:
5417 className = fit->u.target->name();
5418 break;
5419 case ld::Fixup::bindingsIndirectlyBound:
5420 className = ind.indirectName(fit->u.bindingIndex);
5421 break;
5422 default:
5423 assert(0 && "unsupported binding in objc2 class ref section");
5424 }
5425 assert(className != NULL);
5426 return className;
5427 }
5428
5429
5430 template <typename A>
5431 unsigned long ObjC2ClassRefsSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5432 {
5433 unsigned long hash = 978;
5434 for (const char* s = targetClassName(atom, ind); *s != '\0'; ++s) {
5435 hash = hash * 33 + *s;
5436 }
5437 return hash;
5438 }
5439
5440 template <typename A>
5441 bool ObjC2ClassRefsSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5442 const ld::IndirectBindingTable& indirectBindingTable) const
5443 {
5444 assert(this->type() == rhs.section().type());
5445 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5446 assert(rhsAtom != NULL);
5447 const char* thisClassName = targetClassName(atom, indirectBindingTable);
5448 const char* rhsClassName = targetClassName(rhsAtom, indirectBindingTable);
5449 return (strcmp(thisClassName, rhsClassName) == 0);
5450 }
5451
5452
5453 template <typename A>
5454 const char* Objc1ClassReferences<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5455 {
5456 assert(atom->fixupCount() == 2);
5457 ld::Fixup::iterator fit = atom->fixupsBegin();
5458 if ( fit->kind == ld::Fixup::kindSetTargetAddress )
5459 ++fit;
5460 const ld::Atom* targetAtom = NULL;
5461 switch ( fit->binding ) {
5462 case ld::Fixup::bindingByContentBound:
5463 targetAtom = fit->u.target;
5464 break;
5465 case ld::Fixup::bindingsIndirectlyBound:
5466 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5467 if ( targetAtom == NULL ) {
5468 fprintf(stderr, "missing target named %s\n", ind.indirectName(fit->u.bindingIndex));
5469 }
5470 break;
5471 default:
5472 assert(0);
5473 }
5474 assert(targetAtom != NULL);
5475 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5476 assert(target != NULL);
5477 return (char*)target->contentPointer();
5478 }
5479
5480
5481 template <typename A>
5482 const char* PointerToCStringSection<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5483 {
5484 assert(atom->fixupCount() == 1);
5485 ld::Fixup::iterator fit = atom->fixupsBegin();
5486 const ld::Atom* targetAtom = NULL;
5487 switch ( fit->binding ) {
5488 case ld::Fixup::bindingByContentBound:
5489 targetAtom = fit->u.target;
5490 break;
5491 case ld::Fixup::bindingsIndirectlyBound:
5492 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5493 break;
5494 case ld::Fixup::bindingDirectlyBound:
5495 targetAtom = fit->u.target;
5496 break;
5497 default:
5498 assert(0 && "unsupported reference to selector");
5499 }
5500 assert(targetAtom != NULL);
5501 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5502 assert(target != NULL);
5503 assert(target->contentType() == ld::Atom::typeCString);
5504 return (char*)target->contentPointer();
5505 }
5506
5507 template <typename A>
5508 unsigned long PointerToCStringSection<A>::contentHash(const class Atom<A>* atom,
5509 const ld::IndirectBindingTable& indirectBindingTable) const
5510 {
5511 // make hash from section name and target cstring name
5512 unsigned long hash = 123;
5513 for (const char* s = this->sectionName(); *s != '\0'; ++s) {
5514 hash = hash * 33 + *s;
5515 }
5516 for (const char* s = this->targetCString(atom, indirectBindingTable); *s != '\0'; ++s) {
5517 hash = hash * 33 + *s;
5518 }
5519 return hash;
5520 }
5521
5522 template <typename A>
5523 bool PointerToCStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5524 const ld::IndirectBindingTable& indirectBindingTable) const
5525 {
5526 assert(this->type() == rhs.section().type());
5527 // there can be pointers-to-cstrings in different section names
5528 // we only want to coalesce in same section name
5529 if ( *this != rhs.section() )
5530 return false;
5531
5532 // get string content for this
5533 const char* cstringContent = this->targetCString(atom, indirectBindingTable);
5534 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5535 assert(rhsAtom != NULL);
5536 const char* rhsCstringContent = this->targetCString(rhsAtom, indirectBindingTable);
5537
5538 assert(cstringContent != NULL);
5539 assert(rhsCstringContent != NULL);
5540 return (strcmp(cstringContent, rhsCstringContent) == 0);
5541 }
5542
5543
5544
5545 template <typename A>
5546 unsigned long UTF16StringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5547 {
5548 unsigned long hash = 5381;
5549 const uint16_t* stringContent = (uint16_t*)atom->contentPointer();
5550 // some buggy compilers end utf16 data with single byte, so don't use last word in hash computation
5551 unsigned int count = (atom->size()/2) - 1;
5552 for (const uint16_t* s = stringContent; count > 0; ++s, --count) {
5553 hash = hash * 33 + *s;
5554 }
5555 return hash;
5556 }
5557
5558 template <typename A>
5559 bool UTF16StringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5560 const ld::IndirectBindingTable& ind) const
5561 {
5562 if ( rhs.section().type() != ld::Section::typeUTF16Strings )
5563 return false;
5564 assert(0);
5565 return false;
5566 }
5567
5568
5569
5570
5571
5572
5573
5574 template <>
5575 uint32_t Section<x86_64>::x86_64PcRelOffset(uint8_t r_type)
5576 {
5577 switch ( r_type ) {
5578 case X86_64_RELOC_SIGNED:
5579 return 4;
5580 case X86_64_RELOC_SIGNED_1:
5581 return 5;
5582 case X86_64_RELOC_SIGNED_2:
5583 return 6;
5584 case X86_64_RELOC_SIGNED_4:
5585 return 8;
5586 }
5587 return 0;
5588 }
5589
5590
5591 template <>
5592 bool Section<x86_64>::addRelocFixup(class Parser<x86_64>& parser, const macho_relocation_info<P>* reloc)
5593 {
5594 const macho_section<P>* sect = this->machoSection();
5595 uint64_t srcAddr = sect->addr() + reloc->r_address();
5596 Parser<x86_64>::SourceLocation src;
5597 Parser<x86_64>::TargetDesc target;
5598 Parser<x86_64>::TargetDesc toTarget;
5599 src.atom = this->findAtomByAddress(srcAddr);
5600 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5601 const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
5602 uint64_t contentValue = 0;
5603 const macho_relocation_info<x86_64::P>* nextReloc = &reloc[1];
5604 bool result = false;
5605 bool useDirectBinding;
5606 switch ( reloc->r_length() ) {
5607 case 0:
5608 contentValue = *fixUpPtr;
5609 break;
5610 case 1:
5611 contentValue = (int64_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
5612 break;
5613 case 2:
5614 contentValue = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
5615 break;
5616 case 3:
5617 contentValue = E::get64(*((uint64_t*)fixUpPtr));
5618 break;
5619 }
5620 target.atom = NULL;
5621 target.name = NULL;
5622 target.weakImport = false;
5623 target.addend = 0;
5624 if ( reloc->r_extern() ) {
5625 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
5626 // use direct reference for local symbols
5627 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5628 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5629 target.addend += contentValue;
5630 }
5631 else {
5632 target.name = parser.nameFromSymbol(sym);
5633 target.weakImport = parser.weakImportFromSymbol(sym);
5634 target.addend = contentValue;
5635 }
5636 // cfstrings should always use direct reference to backing store
5637 if ( (this->type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
5638 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5639 target.addend = contentValue;
5640 }
5641 }
5642 else {
5643 if ( reloc->r_pcrel() )
5644 contentValue += srcAddr + x86_64PcRelOffset(reloc->r_type());
5645 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5646 }
5647 switch ( reloc->r_type() ) {
5648 case X86_64_RELOC_UNSIGNED:
5649 if ( reloc->r_pcrel() )
5650 throw "pcrel and X86_64_RELOC_UNSIGNED not supported";
5651 switch ( reloc->r_length() ) {
5652 case 0:
5653 case 1:
5654 throw "length < 2 and X86_64_RELOC_UNSIGNED not supported";
5655 case 2:
5656 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5657 break;
5658 case 3:
5659 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian64, target);
5660 break;
5661 }
5662 break;
5663 case X86_64_RELOC_SIGNED:
5664 case X86_64_RELOC_SIGNED_1:
5665 case X86_64_RELOC_SIGNED_2:
5666 case X86_64_RELOC_SIGNED_4:
5667 if ( ! reloc->r_pcrel() )
5668 throw "not pcrel and X86_64_RELOC_SIGNED* not supported";
5669 if ( reloc->r_length() != 2 )
5670 throw "length != 2 and X86_64_RELOC_SIGNED* not supported";
5671 switch ( reloc->r_type() ) {
5672 case X86_64_RELOC_SIGNED:
5673 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
5674 break;
5675 case X86_64_RELOC_SIGNED_1:
5676 if ( reloc->r_extern() )
5677 target.addend += 1;
5678 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_1, target);
5679 break;
5680 case X86_64_RELOC_SIGNED_2:
5681 if ( reloc->r_extern() )
5682 target.addend += 2;
5683 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_2, target);
5684 break;
5685 case X86_64_RELOC_SIGNED_4:
5686 if ( reloc->r_extern() )
5687 target.addend += 4;
5688 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_4, target);
5689 break;
5690 }
5691 break;
5692 case X86_64_RELOC_BRANCH:
5693 if ( ! reloc->r_pcrel() )
5694 throw "not pcrel and X86_64_RELOC_BRANCH not supported";
5695 switch ( reloc->r_length() ) {
5696 case 2:
5697 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5698 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
5699 parser.addDtraceExtraInfos(src, &target.name[16]);
5700 }
5701 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5702 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
5703 parser.addDtraceExtraInfos(src, &target.name[20]);
5704 }
5705 else {
5706 parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel32, target);
5707 }
5708 break;
5709 case 0:
5710 parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel8, target);
5711 break;
5712 default:
5713 throwf("length=%d and X86_64_RELOC_BRANCH not supported", reloc->r_length());
5714 }
5715 break;
5716 case X86_64_RELOC_GOT:
5717 if ( ! reloc->r_extern() )
5718 throw "not extern and X86_64_RELOC_GOT not supported";
5719 if ( ! reloc->r_pcrel() )
5720 throw "not pcrel and X86_64_RELOC_GOT not supported";
5721 if ( reloc->r_length() != 2 )
5722 throw "length != 2 and X86_64_RELOC_GOT not supported";
5723 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOT, target);
5724 break;
5725 case X86_64_RELOC_GOT_LOAD:
5726 if ( ! reloc->r_extern() )
5727 throw "not extern and X86_64_RELOC_GOT_LOAD not supported";
5728 if ( ! reloc->r_pcrel() )
5729 throw "not pcrel and X86_64_RELOC_GOT_LOAD not supported";
5730 if ( reloc->r_length() != 2 )
5731 throw "length != 2 and X86_64_RELOC_GOT_LOAD not supported";
5732 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOTLoad, target);
5733 break;
5734 case X86_64_RELOC_SUBTRACTOR:
5735 if ( reloc->r_pcrel() )
5736 throw "X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5737 if ( reloc->r_length() < 2 )
5738 throw "X86_64_RELOC_SUBTRACTOR must have r_length of 2 or 3";
5739 if ( !reloc->r_extern() )
5740 throw "X86_64_RELOC_SUBTRACTOR must have r_extern=1";
5741 if ( nextReloc->r_type() != X86_64_RELOC_UNSIGNED )
5742 throw "X86_64_RELOC_SUBTRACTOR must be followed by X86_64_RELOC_UNSIGNED";
5743 result = true;
5744 if ( nextReloc->r_pcrel() )
5745 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5746 if ( nextReloc->r_length() != reloc->r_length() )
5747 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR must have same r_length";
5748 if ( nextReloc->r_extern() ) {
5749 const macho_nlist<P>& sym = parser.symbolFromIndex(nextReloc->r_symbolnum());
5750 // use direct reference for local symbols
5751 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5752 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), toTarget);
5753 toTarget.addend = contentValue;
5754 useDirectBinding = true;
5755 }
5756 else {
5757 toTarget.name = parser.nameFromSymbol(sym);
5758 toTarget.weakImport = parser.weakImportFromSymbol(sym);
5759 toTarget.addend = contentValue;
5760 useDirectBinding = false;
5761 }
5762 }
5763 else {
5764 parser.findTargetFromAddressAndSectionNum(contentValue, nextReloc->r_symbolnum(), toTarget);
5765 useDirectBinding = (toTarget.atom->scope() == ld::Atom::scopeTranslationUnit);
5766 }
5767 if ( useDirectBinding )
5768 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.atom);
5769 else
5770 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.weakImport, toTarget.name);
5771 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, toTarget.addend);
5772 if ( target.atom == NULL )
5773 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, false, target.name);
5774 else
5775 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, target.atom);
5776 if ( reloc->r_length() == 2 )
5777 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
5778 else
5779 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian64);
5780 break;
5781 case X86_64_RELOC_TLV:
5782 if ( ! reloc->r_extern() )
5783 throw "not extern and X86_64_RELOC_TLV not supported";
5784 if ( ! reloc->r_pcrel() )
5785 throw "not pcrel and X86_64_RELOC_TLV not supported";
5786 if ( reloc->r_length() != 2 )
5787 throw "length != 2 and X86_64_RELOC_TLV not supported";
5788 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
5789 break;
5790 default:
5791 throwf("unknown relocation type %d", reloc->r_type());
5792 }
5793 return result;
5794 }
5795
5796
5797
5798 template <>
5799 bool Section<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<P>* reloc)
5800 {
5801 const macho_section<P>* sect = this->machoSection();
5802 uint32_t srcAddr;
5803 const uint8_t* fixUpPtr;
5804 uint32_t contentValue = 0;
5805 ld::Fixup::Kind kind = ld::Fixup::kindNone;
5806 Parser<x86>::SourceLocation src;
5807 Parser<x86>::TargetDesc target;
5808
5809 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
5810 srcAddr = sect->addr() + reloc->r_address();
5811 src.atom = this->findAtomByAddress(srcAddr);
5812 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5813 fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
5814 switch ( reloc->r_type() ) {
5815 case GENERIC_RELOC_VANILLA:
5816 switch ( reloc->r_length() ) {
5817 case 0:
5818 contentValue = (int32_t)(int8_t)*fixUpPtr;
5819 if ( reloc->r_pcrel() ) {
5820 kind = ld::Fixup::kindStoreX86BranchPCRel8;
5821 contentValue += srcAddr + sizeof(uint8_t);
5822 }
5823 else
5824 throw "r_length=0 and r_pcrel=0 not supported";
5825 break;
5826 case 1:
5827 contentValue = (int32_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
5828 if ( reloc->r_pcrel() ) {
5829 kind = ld::Fixup::kindStoreX86PCRel16;
5830 contentValue += srcAddr + sizeof(uint16_t);
5831 }
5832 else
5833 kind = ld::Fixup::kindStoreLittleEndian16;
5834 break;
5835 case 2:
5836 contentValue = E::get32(*((uint32_t*)fixUpPtr));
5837 if ( reloc->r_pcrel() ) {
5838 kind = ld::Fixup::kindStoreX86BranchPCRel32;
5839 contentValue += srcAddr + sizeof(uint32_t);
5840 }
5841 else
5842 kind = ld::Fixup::kindStoreLittleEndian32;
5843 break;
5844 case 3:
5845 throw "r_length=3 not supported";
5846 }
5847 if ( reloc->r_extern() ) {
5848 target.atom = NULL;
5849 const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
5850 target.name = parser.nameFromSymbol(targetSymbol);
5851 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
5852 target.addend = (int32_t)contentValue;
5853 }
5854 else {
5855 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5856 }
5857 if ( (kind == ld::Fixup::kindStoreX86BranchPCRel32) && (target.name != NULL) ) {
5858 if ( strncmp(target.name, "___dtrace_probe$", 16) == 0 ) {
5859 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
5860 parser.addDtraceExtraInfos(src, &target.name[16]);
5861 return false;
5862 }
5863 else if ( strncmp(target.name, "___dtrace_isenabled$", 20) == 0 ) {
5864 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
5865 parser.addDtraceExtraInfos(src, &target.name[20]);
5866 return false;
5867 }
5868 }
5869 parser.addFixups(src, kind, target);
5870 return false;
5871 break;
5872 case GENERIC_RLEOC_TLV:
5873 {
5874 if ( !reloc->r_extern() )
5875 throw "r_extern=0 and r_type=GENERIC_RLEOC_TLV not supported";
5876 if ( reloc->r_length() != 2 )
5877 throw "r_length!=2 and r_type=GENERIC_RLEOC_TLV not supported";
5878 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
5879 // use direct reference for local symbols
5880 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
5881 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5882 }
5883 else {
5884 target.atom = NULL;
5885 target.name = parser.nameFromSymbol(sym);
5886 target.weakImport = parser.weakImportFromSymbol(sym);
5887 }
5888 target.addend = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
5889 if ( reloc->r_pcrel() ) {
5890 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
5891 }
5892 else {
5893 parser.addFixups(src, ld::Fixup::kindStoreX86Abs32TLVLoad, target);
5894 }
5895 return false;
5896 }
5897 break;
5898 default:
5899 throwf("unsupported i386 relocation type (%d)", reloc->r_type());
5900 }
5901 }
5902 else {
5903 // scattered relocation
5904 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
5905 srcAddr = sect->addr() + sreloc->r_address();
5906 src.atom = this->findAtomByAddress(srcAddr);
5907 assert(src.atom != NULL);
5908 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5909 fixUpPtr = file().fileContent() + sect->offset() + sreloc->r_address();
5910 uint32_t relocValue = sreloc->r_value();
5911 bool result = false;
5912 // file format allows pair to be scattered or not
5913 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
5914 const macho_relocation_info<P>* nextReloc = &reloc[1];
5915 bool nextRelocIsPair = false;
5916 uint32_t nextRelocAddress = 0;
5917 uint32_t nextRelocValue = 0;
5918 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
5919 if ( nextReloc->r_type() == GENERIC_RELOC_PAIR ) {
5920 nextRelocIsPair = true;
5921 nextRelocAddress = nextReloc->r_address();
5922 result = true; // iterator should skip next reloc, since we've consumed it here
5923 }
5924 }
5925 else {
5926 if ( nextSReloc->r_type() == GENERIC_RELOC_PAIR ) {
5927 nextRelocIsPair = true;
5928 nextRelocAddress = nextSReloc->r_address();
5929 nextRelocValue = nextSReloc->r_value();
5930 }
5931 }
5932 switch (sreloc->r_type()) {
5933 case GENERIC_RELOC_VANILLA:
5934 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
5935 target.atom = parser.findAtomByAddress(relocValue);
5936 if ( sreloc->r_pcrel() ) {
5937 switch ( sreloc->r_length() ) {
5938 case 0:
5939 contentValue = srcAddr + 1 + *fixUpPtr;
5940 target.addend = (int32_t)contentValue - (int32_t)relocValue;
5941 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel8, target);
5942 break;
5943 case 1:
5944 contentValue = srcAddr + 2 + LittleEndian::get16(*((uint16_t*)fixUpPtr));
5945 target.addend = (int32_t)contentValue - (int32_t)relocValue;
5946 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel16, target);
5947 break;
5948 case 2:
5949 contentValue = srcAddr + 4 + LittleEndian::get32(*((uint32_t*)fixUpPtr));
5950 target.addend = (int32_t)contentValue - (int32_t)relocValue;
5951 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
5952 break;
5953 case 3:
5954 throw "unsupported r_length=3 for scattered pc-rel vanilla reloc";
5955 break;
5956 }
5957 }
5958 else {
5959 if ( sreloc->r_length() != 2 )
5960 throwf("unsupported r_length=%d for scattered vanilla reloc", sreloc->r_length());
5961 contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
5962 target.addend = (int32_t)contentValue - (int32_t)(target.atom->objectAddress());
5963 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5964 }
5965 break;
5966 case GENERIC_RELOC_SECTDIFF:
5967 case GENERIC_RELOC_LOCAL_SECTDIFF:
5968 {
5969 if ( !nextRelocIsPair )
5970 throw "GENERIC_RELOC_SECTDIFF missing following pair";
5971 switch ( sreloc->r_length() ) {
5972 case 0:
5973 case 3:
5974 throw "bad length for GENERIC_RELOC_SECTDIFF";
5975 case 1:
5976 contentValue = (int32_t)(int16_t)LittleEndian::get16(*((uint16_t*)fixUpPtr));
5977 kind = ld::Fixup::kindStoreLittleEndian16;
5978 break;
5979 case 2:
5980 contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
5981 kind = ld::Fixup::kindStoreLittleEndian32;
5982 break;
5983 }
5984 Atom<x86>* fromAtom = parser.findAtomByAddress(nextRelocValue);
5985 uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5986 parser.findTargetFromAddress(sreloc->r_value(), target);
5987 // check for addend encoded in the section content
5988 int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
5989 if ( addend < 0 ) {
5990 // switch binding base on coalescing
5991 if ( target.atom == NULL ) {
5992 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
5993 }
5994 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
5995 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
5996 }
5997 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
5998 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
5999 }
6000 else {
6001 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
6002 }
6003 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
6004 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6005 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
6006 parser.addFixup(src, ld::Fixup::k5of5, kind);
6007 }
6008 else {
6009 // switch binding base on coalescing
6010 if ( target.atom == NULL ) {
6011 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
6012 }
6013 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
6014 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
6015 }
6016 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
6017 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
6018 }
6019 else {
6020 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
6021 }
6022 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend+addend);
6023 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6024 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
6025 parser.addFixup(src, ld::Fixup::k5of5, kind);
6026 }
6027 }
6028 break;
6029 }
6030 return result;
6031 }
6032 }
6033
6034
6035
6036
6037
6038 #if SUPPORT_ARCH_arm_any
6039 template <>
6040 bool Section<arm>::addRelocFixup(class Parser<arm>& parser, const macho_relocation_info<P>* reloc)
6041 {
6042 const macho_section<P>* sect = this->machoSection();
6043 bool result = false;
6044 uint32_t srcAddr;
6045 uint32_t dstAddr;
6046 uint32_t* fixUpPtr;
6047 int32_t displacement = 0;
6048 uint32_t instruction = 0;
6049 pint_t contentValue = 0;
6050 Parser<arm>::SourceLocation src;
6051 Parser<arm>::TargetDesc target;
6052 const macho_relocation_info<P>* nextReloc;
6053
6054 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
6055 bool externSymbolIsThumbDef = false;
6056 srcAddr = sect->addr() + reloc->r_address();
6057 src.atom = this->findAtomByAddress(srcAddr);
6058 src.offsetInAtom = srcAddr - src.atom->_objAddress;
6059 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + reloc->r_address());
6060 if ( reloc->r_type() != ARM_RELOC_PAIR )
6061 instruction = LittleEndian::get32(*fixUpPtr);
6062 if ( reloc->r_extern() ) {
6063 const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
6064 // use direct reference for local symbols
6065 if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) && (((targetSymbol.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(targetSymbol)[0] == 'L')) ) {
6066 parser.findTargetFromAddressAndSectionNum(targetSymbol.n_value(), targetSymbol.n_sect(), target);
6067 }
6068 else {
6069 target.atom = NULL;
6070 target.name = parser.nameFromSymbol(targetSymbol);
6071 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
6072 if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) && (targetSymbol.n_desc() & N_ARM_THUMB_DEF) )
6073 externSymbolIsThumbDef = true;
6074 }
6075 }
6076 switch ( reloc->r_type() ) {
6077 case ARM_RELOC_BR24:
6078 // Sign-extend displacement
6079 displacement = (instruction & 0x00FFFFFF) << 2;
6080 if ( (displacement & 0x02000000) != 0 )
6081 displacement |= 0xFC000000;
6082 // The pc added will be +8 from the pc
6083 displacement += 8;
6084 // If this is BLX add H << 1
6085 if ((instruction & 0xFE000000) == 0xFA000000)
6086 displacement += ((instruction & 0x01000000) >> 23);
6087 if ( reloc->r_extern() ) {
6088 target.addend = srcAddr + displacement;
6089 if ( externSymbolIsThumbDef )
6090 target.addend &= -2; // remove thumb bit
6091 }
6092 else {
6093 dstAddr = srcAddr + displacement;
6094 parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
6095 }
6096 // special case "calls" for dtrace
6097 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
6098 parser.addFixup(src, ld::Fixup::k1of1,
6099 ld::Fixup::kindStoreARMDtraceCallSiteNop, false, target.name);
6100 parser.addDtraceExtraInfos(src, &target.name[16]);
6101 }
6102 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
6103 parser.addFixup(src, ld::Fixup::k1of1,
6104 ld::Fixup::kindStoreARMDtraceIsEnableSiteClear, false, target.name);
6105 parser.addDtraceExtraInfos(src, &target.name[20]);
6106 }
6107 else {
6108 parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
6109 }
6110 break;
6111 case ARM_THUMB_RELOC_BR22:
6112 // thumb2 added two more bits to displacement, complicating the displacement decoding
6113 {
6114 uint32_t s = (instruction >> 10) & 0x1;
6115 uint32_t j1 = (instruction >> 29) & 0x1;
6116 uint32_t j2 = (instruction >> 27) & 0x1;
6117 uint32_t imm10 = instruction & 0x3FF;
6118 uint32_t imm11 = (instruction >> 16) & 0x7FF;
6119 uint32_t i1 = (j1 == s);
6120 uint32_t i2 = (j2 == s);
6121 uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
6122 int32_t sdis = dis;
6123 if ( s )
6124 sdis |= 0xFE000000;
6125 displacement = sdis;
6126 }
6127 // The pc added will be +4 from the pc
6128 displacement += 4;
6129 // If the instruction was blx, force the low 2 bits to be clear
6130 dstAddr = srcAddr + displacement;
6131 if ((instruction & 0xD0000000) == 0xC0000000)
6132 dstAddr &= 0xFFFFFFFC;
6133
6134 if ( reloc->r_extern() ) {
6135 target.addend = dstAddr;
6136 }
6137 else {
6138 parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
6139 }
6140 // special case "calls" for dtrace
6141 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
6142 parser.addFixup(src, ld::Fixup::k1of1,
6143 ld::Fixup::kindStoreThumbDtraceCallSiteNop, false, target.name);
6144 parser.addDtraceExtraInfos(src, &target.name[16]);
6145 }
6146 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
6147 parser.addFixup(src, ld::Fixup::k1of1,
6148 ld::Fixup::kindStoreThumbDtraceIsEnableSiteClear, false, target.name);
6149 parser.addDtraceExtraInfos(src, &target.name[20]);
6150 }
6151 else {
6152 parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
6153 }
6154 break;
6155 case ARM_RELOC_VANILLA:
6156 if ( reloc->r_length() != 2 )
6157 throw "bad length for ARM_RELOC_VANILLA";
6158 contentValue = LittleEndian::get32(*fixUpPtr);
6159 if ( reloc->r_extern() ) {
6160 target.addend = (int32_t)contentValue;
6161 if ( externSymbolIsThumbDef )
6162 target.addend &= -2; // remove thumb bit
6163 }
6164 else {
6165 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
6166 // possible non-extern relocation turned into by-name ref because target is a weak-def
6167 if ( target.atom != NULL ) {
6168 if ( target.atom->isThumb() )
6169 target.addend &= -2; // remove thumb bit
6170 // if reference to LSDA, add group subordinate fixup
6171 if ( target.atom->contentType() == ld::Atom::typeLSDA ) {
6172 Parser<arm>::SourceLocation src2;
6173 src2.atom = src.atom;
6174 src2.offsetInAtom = 0;
6175 parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, target.atom);
6176 }
6177 }
6178 }
6179 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
6180 break;
6181 case ARM_THUMB_32BIT_BRANCH:
6182 // silently ignore old unnecessary reloc
6183 break;
6184 case ARM_RELOC_HALF:
6185 nextReloc = &reloc[1];
6186 if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
6187 uint32_t instruction16;
6188 uint32_t other16 = (nextReloc->r_address() & 0xFFFF);
6189 bool isThumb;
6190 if ( reloc->r_length() & 2 ) {
6191 isThumb = true;
6192 uint32_t i = ((instruction & 0x00000400) >> 10);
6193 uint32_t imm4 = (instruction & 0x0000000F);
6194 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
6195 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
6196 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
6197 }
6198 else {
6199 isThumb = false;
6200 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
6201 uint32_t imm12 = (instruction & 0x00000FFF);
6202 instruction16 = (imm4 << 12) | imm12;
6203 }
6204 if ( reloc->r_length() & 1 ) {
6205 // high 16
6206 dstAddr = ((instruction16 << 16) | other16);
6207 if ( reloc->r_extern() ) {
6208 target.addend = dstAddr;
6209 if ( externSymbolIsThumbDef )
6210 target.addend &= -2; // remove thumb bit
6211 }
6212 else {
6213 parser.findTargetFromAddress(dstAddr, target);
6214 if ( target.atom->isThumb() )
6215 target.addend &= (-2); // remove thumb bit
6216 }
6217 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16), target);
6218 }
6219 else {
6220 // low 16
6221 dstAddr = (other16 << 16) | instruction16;
6222 if ( reloc->r_extern() ) {
6223 target.addend = dstAddr;
6224 if ( externSymbolIsThumbDef )
6225 target.addend &= -2; // remove thumb bit
6226 }
6227 else {
6228 parser.findTargetFromAddress(dstAddr, target);
6229 if ( target.atom->isThumb() )
6230 target.addend &= (-2); // remove thumb bit
6231 }
6232 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16), target);
6233 }
6234 result = true;
6235 }
6236 else
6237 throw "for ARM_RELOC_HALF, next reloc is not ARM_RELOC_PAIR";
6238 break;
6239 default:
6240 throwf("unknown relocation type %d", reloc->r_type());
6241 break;
6242 }
6243 }
6244 else {
6245 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
6246 // file format allows pair to be scattered or not
6247 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
6248 nextReloc = &reloc[1];
6249 srcAddr = sect->addr() + sreloc->r_address();
6250 dstAddr = sreloc->r_value();
6251 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + sreloc->r_address());
6252 instruction = LittleEndian::get32(*fixUpPtr);
6253 src.atom = this->findAtomByAddress(srcAddr);
6254 src.offsetInAtom = srcAddr - src.atom->_objAddress;
6255 bool nextRelocIsPair = false;
6256 uint32_t nextRelocAddress = 0;
6257 uint32_t nextRelocValue = 0;
6258 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
6259 if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
6260 nextRelocIsPair = true;
6261 nextRelocAddress = nextReloc->r_address();
6262 result = true;
6263 }
6264 }
6265 else {
6266 if ( nextSReloc->r_type() == ARM_RELOC_PAIR ) {
6267 nextRelocIsPair = true;
6268 nextRelocAddress = nextSReloc->r_address();
6269 nextRelocValue = nextSReloc->r_value();
6270 result = true;
6271 }
6272 }
6273 switch ( sreloc->r_type() ) {
6274 case ARM_RELOC_VANILLA:
6275 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
6276 if ( sreloc->r_length() != 2 )
6277 throw "bad length for ARM_RELOC_VANILLA";
6278 target.atom = parser.findAtomByAddress(sreloc->r_value());
6279 if ( target.atom == NULL )
6280 throwf("bad r_value (0x%08X) for ARM_RELOC_VANILLA\n", sreloc->r_value());
6281 contentValue = LittleEndian::get32(*fixUpPtr);
6282 target.addend = contentValue - target.atom->_objAddress;
6283 if ( target.atom->isThumb() )
6284 target.addend &= -2; // remove thumb bit
6285 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
6286 break;
6287 case ARM_RELOC_BR24:
6288 // Sign-extend displacement
6289 displacement = (instruction & 0x00FFFFFF) << 2;
6290 if ( (displacement & 0x02000000) != 0 )
6291 displacement |= 0xFC000000;
6292 // The pc added will be +8 from the pc
6293 displacement += 8;
6294 // If this is BLX add H << 1
6295 if ((instruction & 0xFE000000) == 0xFA000000)
6296 displacement += ((instruction & 0x01000000) >> 23);
6297 target.atom = parser.findAtomByAddress(sreloc->r_value());
6298 target.addend = (int64_t)(srcAddr + displacement) - (int64_t)(target.atom->_objAddress);
6299 parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
6300 break;
6301 case ARM_THUMB_RELOC_BR22:
6302 // thumb2 added two more bits to displacement, complicating the displacement decoding
6303 {
6304 uint32_t s = (instruction >> 10) & 0x1;
6305 uint32_t j1 = (instruction >> 29) & 0x1;
6306 uint32_t j2 = (instruction >> 27) & 0x1;
6307 uint32_t imm10 = instruction & 0x3FF;
6308 uint32_t imm11 = (instruction >> 16) & 0x7FF;
6309 uint32_t i1 = (j1 == s);
6310 uint32_t i2 = (j2 == s);
6311 uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
6312 int32_t sdis = dis;
6313 if ( s )
6314 sdis |= 0xFE000000;
6315 displacement = sdis;
6316 }
6317 // The pc added will be +4 from the pc
6318 displacement += 4;
6319 dstAddr = srcAddr+displacement;
6320 // If the instruction was blx, force the low 2 bits to be clear
6321 if ((instruction & 0xF8000000) == 0xE8000000)
6322 dstAddr &= 0xFFFFFFFC;
6323 target.atom = parser.findAtomByAddress(sreloc->r_value());
6324 target.addend = dstAddr - target.atom->_objAddress;
6325 parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
6326 break;
6327 case ARM_RELOC_SECTDIFF:
6328 case ARM_RELOC_LOCAL_SECTDIFF:
6329 {
6330 if ( ! nextRelocIsPair )
6331 throw "ARM_RELOC_SECTDIFF missing following pair";
6332 if ( sreloc->r_length() != 2 )
6333 throw "bad length for ARM_RELOC_SECTDIFF";
6334 contentValue = LittleEndian::get32(*fixUpPtr);
6335 Atom<arm>* fromAtom = parser.findAtomByAddress(nextRelocValue);
6336 uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
6337 uint32_t offsetInTarget;
6338 Atom<arm>* targetAtom = parser.findAtomByAddressOrLocalTargetOfStub(sreloc->r_value(), &offsetInTarget);
6339 // check for addend encoded in the section content
6340 int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
6341 if ( targetAtom->isThumb() )
6342 addend &= -2; // remove thumb bit
6343 // if reference to LSDA, add group subordinate fixup
6344 if ( targetAtom->contentType() == ld::Atom::typeLSDA ) {
6345 Parser<arm>::SourceLocation src2;
6346 src2.atom = src.atom;
6347 src2.offsetInAtom = 0;
6348 parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, targetAtom);
6349 }
6350 if ( addend < 0 ) {
6351 // switch binding base on coalescing
6352 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6353 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
6354 }
6355 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6356 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6357 }
6358 else {
6359 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6360 }
6361 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, offsetInTarget);
6362 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6363 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
6364 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
6365 }
6366 else {
6367 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6368 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
6369 }
6370 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6371 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6372 }
6373 else {
6374 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6375 }
6376 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)(offsetInTarget+addend));
6377 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6378 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
6379 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
6380 }
6381 }
6382 break;
6383 case ARM_RELOC_HALF_SECTDIFF:
6384 if ( nextRelocIsPair ) {
6385 instruction = LittleEndian::get32(*fixUpPtr);
6386 Atom<arm>* fromAtom = parser.findAtomByAddress(nextRelocValue);
6387 uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
6388 Atom<arm>* targetAtom = parser.findAtomByAddress(sreloc->r_value());
6389 uint32_t offsetInTarget = sreloc->r_value() - targetAtom->_objAddress;
6390 uint32_t instruction16;
6391 uint32_t other16 = (nextRelocAddress & 0xFFFF);
6392 bool isThumb;
6393 if ( sreloc->r_length() & 2 ) {
6394 isThumb = true;
6395 uint32_t i = ((instruction & 0x00000400) >> 10);
6396 uint32_t imm4 = (instruction & 0x0000000F);
6397 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
6398 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
6399 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
6400 }
6401 else {
6402 isThumb = false;
6403 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
6404 uint32_t imm12 = (instruction & 0x00000FFF);
6405 instruction16 = (imm4 << 12) | imm12;
6406 }
6407 if ( sreloc->r_length() & 1 )
6408 dstAddr = ((instruction16 << 16) | other16);
6409 else
6410 dstAddr = (other16 << 16) | instruction16;
6411 if ( targetAtom->isThumb() )
6412 dstAddr &= (-2); // remove thumb bit
6413 int32_t addend = dstAddr - (sreloc->r_value() - nextRelocValue);
6414 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6415 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
6416 }
6417 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6418 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6419 }
6420 else {
6421 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6422 }
6423 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)offsetInTarget+addend);
6424 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6425 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
6426 if ( sreloc->r_length() & 1 ) {
6427 // high 16
6428 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
6429 }
6430 else {
6431 // low 16
6432 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
6433 }
6434 result = true;
6435 }
6436 else
6437 throw "ARM_RELOC_HALF_SECTDIFF reloc missing following pair";
6438 break;
6439 case ARM_RELOC_HALF:
6440 if ( nextRelocIsPair ) {
6441 instruction = LittleEndian::get32(*fixUpPtr);
6442 Atom<arm>* targetAtom = parser.findAtomByAddress(sreloc->r_value());
6443 uint32_t instruction16;
6444 uint32_t other16 = (nextRelocAddress & 0xFFFF);
6445 bool isThumb;
6446 if ( sreloc->r_length() & 2 ) {
6447 isThumb = true;
6448 uint32_t i = ((instruction & 0x00000400) >> 10);
6449 uint32_t imm4 = (instruction & 0x0000000F);
6450 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
6451 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
6452 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
6453 }
6454 else {
6455 isThumb = false;
6456 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
6457 uint32_t imm12 = (instruction & 0x00000FFF);
6458 instruction16 = (imm4 << 12) | imm12;
6459 }
6460 if ( sreloc->r_length() & 1 )
6461 dstAddr = ((instruction16 << 16) | other16);
6462 else
6463 dstAddr = (other16 << 16) | instruction16;
6464 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6465 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, targetAtom);
6466 }
6467 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6468 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6469 }
6470 else {
6471 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6472 }
6473 parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, dstAddr - targetAtom->_objAddress);
6474 if ( sreloc->r_length() & 1 ) {
6475 // high 16
6476 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
6477 }
6478 else {
6479 // low 16
6480 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
6481 }
6482 result = true;
6483 }
6484 else
6485 throw "scattered ARM_RELOC_HALF reloc missing following pair";
6486 break;
6487 default:
6488 throwf("unknown ARM scattered relocation type %d", sreloc->r_type());
6489 }
6490 }
6491 return result;
6492 }
6493 #endif
6494
6495
6496 #if SUPPORT_ARCH_arm64
6497 template <>
6498 bool Section<arm64>::addRelocFixup(class Parser<arm64>& parser, const macho_relocation_info<P>* reloc)
6499 {
6500 bool result = false;
6501 Parser<arm64>::SourceLocation src;
6502 Parser<arm64>::TargetDesc target = { NULL, NULL, false, 0 };
6503 Parser<arm64>::TargetDesc toTarget;
6504 int32_t prefixRelocAddend = 0;
6505 if ( reloc->r_type() == ARM64_RELOC_ADDEND ) {
6506 uint32_t rawAddend = reloc->r_symbolnum();
6507 prefixRelocAddend = rawAddend;
6508 if ( rawAddend & 0x00800000 )
6509 prefixRelocAddend |= 0xFF000000; // sign extend 24-bit signed int to 32-bits
6510 uint32_t addendAddress = reloc->r_address();
6511 ++reloc; //advance to next reloc record
6512 result = true;
6513 if ( reloc->r_address() != addendAddress )
6514 throw "ARM64_RELOC_ADDEND r_address does not match next reloc's r_address";
6515 }
6516 const macho_section<P>* sect = this->machoSection();
6517 uint64_t srcAddr = sect->addr() + reloc->r_address();
6518 src.atom = this->findAtomByAddress(srcAddr);
6519 src.offsetInAtom = srcAddr - src.atom->_objAddress;
6520 const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6521 uint64_t contentValue = 0;
6522 const macho_relocation_info<arm64::P>* nextReloc = &reloc[1];
6523 bool useDirectBinding;
6524 uint32_t instruction;
6525 uint32_t encodedAddend;
6526 switch ( reloc->r_length() ) {
6527 case 0:
6528 contentValue = *fixUpPtr;
6529 break;
6530 case 1:
6531 contentValue = (int64_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
6532 break;
6533 case 2:
6534 contentValue = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
6535 break;
6536 case 3:
6537 contentValue = E::get64(*((uint64_t*)fixUpPtr));
6538 break;
6539 }
6540 if ( reloc->r_extern() ) {
6541 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
6542 const char* symbolName = parser.nameFromSymbol(sym);
6543 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (symbolName[0] == 'L') || (symbolName[0] == 'l')) ) {
6544 // use direct reference for local symbols
6545 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
6546 //target.addend += contentValue;
6547 }
6548 else if ( ((sym.n_type() & N_TYPE) == N_SECT) && (src.atom->_objAddress <= sym.n_value()) && (sym.n_value() < (src.atom->_objAddress+src.atom->size())) ) {
6549 // <rdar://problem/13700961> spurious warning when weak function has reference to itself
6550 // use direct reference when atom targets itself
6551 target.atom = src.atom;
6552 target.name = NULL;
6553 }
6554 else {
6555 target.name = symbolName;
6556 target.weakImport = parser.weakImportFromSymbol(sym);
6557 //target.addend = contentValue;
6558 }
6559 // cfstrings should always use direct reference to backing store
6560 if ( (this->type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
6561 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
6562 //target.addend = contentValue;
6563 }
6564 }
6565 else {
6566 if ( reloc->r_pcrel() )
6567 contentValue += srcAddr;
6568 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
6569 }
6570 switch ( reloc->r_type() ) {
6571 case ARM64_RELOC_UNSIGNED:
6572 if ( reloc->r_pcrel() )
6573 throw "pcrel and ARM64_RELOC_UNSIGNED not supported";
6574 target.addend = contentValue;
6575 switch ( reloc->r_length() ) {
6576 case 0:
6577 case 1:
6578 throw "length < 2 and ARM64_RELOC_UNSIGNED not supported";
6579 case 2:
6580 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
6581 break;
6582 case 3:
6583 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian64, target);
6584 break;
6585 }
6586 break;
6587 case ARM64_RELOC_BRANCH26:
6588 if ( ! reloc->r_pcrel() )
6589 throw "not pcrel and ARM64_RELOC_BRANCH26 not supported";
6590 if ( ! reloc->r_extern() )
6591 throw "r_extern == 0 and ARM64_RELOC_BRANCH26 not supported";
6592 if ( reloc->r_length() != 2 )
6593 throw "r_length != 2 and ARM64_RELOC_BRANCH26 not supported";
6594 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
6595 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreARM64DtraceCallSiteNop, false, target.name);
6596 parser.addDtraceExtraInfos(src, &target.name[16]);
6597 }
6598 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
6599 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreARM64DtraceIsEnableSiteClear, false, target.name);
6600 parser.addDtraceExtraInfos(src, &target.name[20]);
6601 }
6602 else {
6603 target.addend = prefixRelocAddend;
6604 instruction = contentValue;
6605 encodedAddend = (instruction & 0x03FFFFFF) << 2;
6606 if ( encodedAddend != 0 ) {
6607 if ( prefixRelocAddend == 0 ) {
6608 warning("branch26 instruction at 0x%08X has embedded addend. ARM64_RELOC_ADDEND should be used instead", reloc->r_address());
6609 target.addend = encodedAddend;
6610 }
6611 else {
6612 throwf("branch26 instruction at 0x%08X has embedded addend and ARM64_RELOC_ADDEND also used", reloc->r_address());
6613 }
6614 }
6615 parser.addFixups(src, ld::Fixup::kindStoreARM64Branch26, target);
6616 }
6617 break;
6618 case ARM64_RELOC_PAGE21:
6619 if ( ! reloc->r_pcrel() )
6620 throw "not pcrel and ARM64_RELOC_PAGE21 not supported";
6621 if ( ! reloc->r_extern() )
6622 throw "r_extern == 0 and ARM64_RELOC_PAGE21 not supported";
6623 if ( reloc->r_length() != 2 )
6624 throw "length != 2 and ARM64_RELOC_PAGE21 not supported";
6625 target.addend = prefixRelocAddend;
6626 instruction = contentValue;
6627 encodedAddend = ((instruction & 0x60000000) >> 29) | ((instruction & 0x01FFFFE0) >> 3);
6628 encodedAddend *= 4096; // internally addend is in bytes, so scale
6629 if ( encodedAddend != 0 ) {
6630 if ( prefixRelocAddend == 0 ) {
6631 warning("adrp instruction at 0x%08X has embedded addend. ARM64_RELOC_ADDEND should be used instead", reloc->r_address());
6632 target.addend = encodedAddend;
6633 }
6634 else {
6635 throwf("adrp instruction at 0x%08X has embedded addend and ARM64_RELOC_ADDEND also used", reloc->r_address());
6636 }
6637 }
6638 parser.addFixups(src, ld::Fixup::kindStoreARM64Page21, target);
6639 break;
6640 case ARM64_RELOC_PAGEOFF12:
6641 if ( reloc->r_pcrel() )
6642 throw "pcrel and ARM64_RELOC_PAGEOFF12 not supported";
6643 if ( ! reloc->r_extern() )
6644 throw "r_extern == 0 and ARM64_RELOC_PAGEOFF12 not supported";
6645 if ( reloc->r_length() != 2 )
6646 throw "length != 2 and ARM64_RELOC_PAGEOFF12 not supported";
6647 target.addend = prefixRelocAddend;
6648 instruction = contentValue;
6649 encodedAddend = ((instruction & 0x003FFC00) >> 10);
6650 // internally addend is in bytes. Some instructions have an implicit scale factor
6651 if ( (instruction & 0x3B000000) == 0x39000000 ) {
6652 switch ( instruction & 0xC0000000 ) {
6653 case 0x00000000:
6654 break;
6655 case 0x40000000:
6656 encodedAddend *= 2;
6657 break;
6658 case 0x80000000:
6659 encodedAddend *= 4;
6660 break;
6661 case 0xC0000000:
6662 encodedAddend *= 8;
6663 break;
6664 }
6665 }
6666 if ( encodedAddend != 0 ) {
6667 if ( prefixRelocAddend == 0 ) {
6668 warning("pageoff12 instruction at 0x%08X has embedded addend. ARM64_RELOC_ADDEND should be used instead", reloc->r_address());
6669 target.addend = encodedAddend;
6670 }
6671 else {
6672 throwf("pageoff12 instruction at 0x%08X has embedded addend and ARM64_RELOC_ADDEND also used", reloc->r_address());
6673 }
6674 }
6675 parser.addFixups(src, ld::Fixup::kindStoreARM64PageOff12, target);
6676 break;
6677 case ARM64_RELOC_GOT_LOAD_PAGE21:
6678 if ( ! reloc->r_pcrel() )
6679 throw "not pcrel and ARM64_RELOC_GOT_LOAD_PAGE21 not supported";
6680 if ( ! reloc->r_extern() )
6681 throw "r_extern == 0 and ARM64_RELOC_GOT_LOAD_PAGE21 not supported";
6682 if ( reloc->r_length() != 2 )
6683 throw "length != 2 and ARM64_RELOC_GOT_LOAD_PAGE21 not supported";
6684 if ( prefixRelocAddend != 0 )
6685 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_GOT_LOAD_PAGE21 not supported";
6686 instruction = contentValue;
6687 target.addend = ((instruction & 0x60000000) >> 29) | ((instruction & 0x01FFFFE0) >> 3);
6688 if ( target.addend != 0 )
6689 throw "non-zero addend with ARM64_RELOC_GOT_LOAD_PAGE21 is not supported";
6690 parser.addFixups(src, ld::Fixup::kindStoreARM64GOTLoadPage21, target);
6691 break;
6692 case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
6693 if ( reloc->r_pcrel() )
6694 throw "pcrel and ARM64_RELOC_GOT_LOAD_PAGEOFF12 not supported";
6695 if ( ! reloc->r_extern() )
6696 throw "r_extern == 0 and ARM64_RELOC_GOT_LOAD_PAGEOFF12 not supported";
6697 if ( reloc->r_length() != 2 )
6698 throw "length != 2 and ARM64_RELOC_GOT_LOAD_PAGEOFF12 not supported";
6699 if ( prefixRelocAddend != 0 )
6700 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_GOT_LOAD_PAGEOFF12 not supported";
6701 instruction = contentValue;
6702 target.addend = ((instruction & 0x003FFC00) >> 10);
6703 parser.addFixups(src, ld::Fixup::kindStoreARM64GOTLoadPageOff12, target);
6704 break;
6705 case ARM64_RELOC_TLVP_LOAD_PAGE21:
6706 if ( ! reloc->r_pcrel() )
6707 throw "not pcrel and ARM64_RELOC_TLVP_LOAD_PAGE21 not supported";
6708 if ( ! reloc->r_extern() )
6709 throw "r_extern == 0 and ARM64_RELOC_TLVP_LOAD_PAGE21 not supported";
6710 if ( reloc->r_length() != 2 )
6711 throw "length != 2 and ARM64_RELOC_TLVP_LOAD_PAGE21 not supported";
6712 if ( prefixRelocAddend != 0 )
6713 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_TLVP_LOAD_PAGE21 not supported";
6714 instruction = contentValue;
6715 target.addend = ((instruction & 0x60000000) >> 29) | ((instruction & 0x01FFFFE0) >> 3);
6716 if ( target.addend != 0 )
6717 throw "non-zero addend with ARM64_RELOC_GOT_LOAD_PAGE21 is not supported";
6718 parser.addFixups(src, ld::Fixup::kindStoreARM64TLVPLoadPage21, target);
6719 break;
6720 case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
6721 if ( reloc->r_pcrel() )
6722 throw "pcrel and ARM64_RELOC_TLVP_LOAD_PAGEOFF12 not supported";
6723 if ( ! reloc->r_extern() )
6724 throw "r_extern == 0 and ARM64_RELOC_TLVP_LOAD_PAGEOFF12 not supported";
6725 if ( reloc->r_length() != 2 )
6726 throw "length != 2 and ARM64_RELOC_TLVP_LOAD_PAGEOFF12 not supported";
6727 if ( prefixRelocAddend != 0 )
6728 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_TLVP_LOAD_PAGEOFF12 not supported";
6729 instruction = contentValue;
6730 target.addend = ((instruction & 0x003FFC00) >> 10);
6731 parser.addFixups(src, ld::Fixup::kindStoreARM64TLVPLoadPageOff12, target);
6732 break;
6733 case ARM64_RELOC_SUBTRACTOR:
6734 if ( reloc->r_pcrel() )
6735 throw "ARM64_RELOC_SUBTRACTOR cannot be pc-relative";
6736 if ( reloc->r_length() < 2 )
6737 throw "ARM64_RELOC_SUBTRACTOR must have r_length of 2 or 3";
6738 if ( !reloc->r_extern() )
6739 throw "ARM64_RELOC_SUBTRACTOR must have r_extern=1";
6740 if ( nextReloc->r_type() != ARM64_RELOC_UNSIGNED )
6741 throw "ARM64_RELOC_SUBTRACTOR must be followed by ARM64_RELOC_UNSIGNED";
6742 if ( prefixRelocAddend != 0 )
6743 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_SUBTRACTOR not supported";
6744 result = true;
6745 if ( nextReloc->r_pcrel() )
6746 throw "ARM64_RELOC_UNSIGNED following a ARM64_RELOC_SUBTRACTOR cannot be pc-relative";
6747 if ( nextReloc->r_length() != reloc->r_length() )
6748 throw "ARM64_RELOC_UNSIGNED following a ARM64_RELOC_SUBTRACTOR must have same r_length";
6749 if ( nextReloc->r_extern() ) {
6750 const macho_nlist<P>& sym = parser.symbolFromIndex(nextReloc->r_symbolnum());
6751 // use direct reference for local symbols
6752 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
6753 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), toTarget);
6754 toTarget.addend = contentValue;
6755 useDirectBinding = true;
6756 }
6757 else {
6758 toTarget.name = parser.nameFromSymbol(sym);
6759 toTarget.weakImport = parser.weakImportFromSymbol(sym);
6760 toTarget.addend = contentValue;
6761 useDirectBinding = false;
6762 }
6763 }
6764 else {
6765 parser.findTargetFromAddressAndSectionNum(contentValue, nextReloc->r_symbolnum(), toTarget);
6766 useDirectBinding = (toTarget.atom->scope() == ld::Atom::scopeTranslationUnit);
6767 }
6768 if ( useDirectBinding )
6769 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.atom);
6770 else
6771 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.weakImport, toTarget.name);
6772 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, toTarget.addend);
6773 if ( target.atom == NULL )
6774 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, false, target.name);
6775 else
6776 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, target.atom);
6777 if ( reloc->r_length() == 2 )
6778 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
6779 else
6780 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian64);
6781 break;
6782 case ARM64_RELOC_POINTER_TO_GOT:
6783 if ( ! reloc->r_extern() )
6784 throw "r_extern == 0 and ARM64_RELOC_POINTER_TO_GOT not supported";
6785 if ( prefixRelocAddend != 0 )
6786 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_POINTER_TO_GOT not supported";
6787 if ( reloc->r_pcrel() ) {
6788 if ( reloc->r_length() != 2 )
6789 throw "r_length != 2 and r_extern = 1 and ARM64_RELOC_POINTER_TO_GOT not supported";
6790 parser.addFixups(src, ld::Fixup::kindStoreARM64PCRelToGOT, target);
6791 }
6792 else {
6793 if ( reloc->r_length() != 3 )
6794 throw "r_length != 3 and r_extern = 0 and ARM64_RELOC_POINTER_TO_GOT not supported";
6795 parser.addFixups(src, ld::Fixup::kindStoreARM64PointerToGOT, target);
6796 }
6797 break;
6798 default:
6799 throwf("unknown relocation type %d", reloc->r_type());
6800 }
6801 return result;
6802 }
6803 #endif
6804
6805 template <typename A>
6806 bool ObjC1ClassSection<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
6807 {
6808 // inherited
6809 FixedSizeSection<A>::addRelocFixup(parser, reloc);
6810
6811 assert(0 && "needs template specialization");
6812 return false;
6813 }
6814
6815 template <>
6816 bool ObjC1ClassSection<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
6817 {
6818 // if this is the reloc for the super class name string, add implicit reference to super class
6819 if ( ((reloc->r_address() & R_SCATTERED) == 0) && (reloc->r_type() == GENERIC_RELOC_VANILLA) ) {
6820 assert( reloc->r_length() == 2 );
6821 assert( ! reloc->r_pcrel() );
6822
6823 const macho_section<P>* sect = this->machoSection();
6824 Parser<x86>::SourceLocation src;
6825 uint32_t srcAddr = sect->addr() + reloc->r_address();
6826 src.atom = this->findAtomByAddress(srcAddr);
6827 src.offsetInAtom = srcAddr - src.atom->objectAddress();
6828 if ( src.offsetInAtom == 4 ) {
6829 Parser<x86>::TargetDesc stringTarget;
6830 const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6831 uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6832 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
6833
6834 assert(stringTarget.atom != NULL);
6835 assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
6836 const char* superClassBaseName = (char*)stringTarget.atom->rawContentPointer();
6837 char* superClassName = new char[strlen(superClassBaseName) + 20];
6838 strcpy(superClassName, ".objc_class_name_");
6839 strcat(superClassName, superClassBaseName);
6840
6841 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, superClassName);
6842 }
6843 }
6844 // inherited
6845 return FixedSizeSection<x86>::addRelocFixup(parser, reloc);
6846 }
6847
6848
6849
6850 template <typename A>
6851 bool Objc1ClassReferences<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
6852 {
6853 // inherited
6854 PointerToCStringSection<A>::addRelocFixup(parser, reloc);
6855
6856 assert(0 && "needs template specialization");
6857 return false;
6858 }
6859
6860
6861
6862 template <>
6863 bool Objc1ClassReferences<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
6864 {
6865 // add implict class refs, fixups not usable yet, so look at relocations
6866 assert( (reloc->r_address() & R_SCATTERED) == 0 );
6867 assert( reloc->r_type() == GENERIC_RELOC_VANILLA );
6868 assert( reloc->r_length() == 2 );
6869 assert( ! reloc->r_pcrel() );
6870
6871 const macho_section<P>* sect = this->machoSection();
6872 Parser<x86>::SourceLocation src;
6873 uint32_t srcAddr = sect->addr() + reloc->r_address();
6874 src.atom = this->findAtomByAddress(srcAddr);
6875 src.offsetInAtom = srcAddr - src.atom->objectAddress();
6876 Parser<x86>::TargetDesc stringTarget;
6877 const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6878 uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6879 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
6880
6881 assert(stringTarget.atom != NULL);
6882 assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
6883 const char* baseClassName = (char*)stringTarget.atom->rawContentPointer();
6884 char* objcClassName = new char[strlen(baseClassName) + 20];
6885 strcpy(objcClassName, ".objc_class_name_");
6886 strcat(objcClassName, baseClassName);
6887
6888 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, objcClassName);
6889
6890 // inherited
6891 return PointerToCStringSection<x86>::addRelocFixup(parser, reloc);
6892 }
6893
6894 #if SUPPORT_ARCH_arm64
6895 template <>
6896 void Section<arm64>::addLOH(class Parser<arm64>& parser, int kind, int count, const uint64_t addrs[]) {
6897 switch (kind) {
6898 case LOH_ARM64_ADRP_ADRP:
6899 case LOH_ARM64_ADRP_LDR:
6900 case LOH_ARM64_ADRP_ADD:
6901 case LOH_ARM64_ADRP_LDR_GOT:
6902 if ( count != 2 )
6903 warning("arm64 Linker Optimiztion Hint %d has wrong number of arguments", kind);
6904 break;
6905 case LOH_ARM64_ADRP_ADD_LDR:
6906 case LOH_ARM64_ADRP_LDR_GOT_LDR:
6907 case LOH_ARM64_ADRP_ADD_STR:
6908 case LOH_ARM64_ADRP_LDR_GOT_STR:
6909 if ( count != 3 )
6910 warning("arm64 Linker Optimiztion Hint %d has wrong number of arguments", kind);
6911 }
6912
6913 // pick lowest address in tuple for use as offsetInAtom
6914 uint64_t lowestAddress = addrs[0];
6915 for(int i=1; i < count; ++i) {
6916 if ( addrs[i] < lowestAddress )
6917 lowestAddress = addrs[i];
6918 }
6919 // verify all other address are in same atom
6920 Atom<arm64>* inAtom = parser.findAtomByAddress(lowestAddress);
6921 const uint64_t atomStartAddr = inAtom->objectAddress();
6922 const uint64_t atomEndAddr = atomStartAddr + inAtom->size();
6923 for(int i=0; i < count; ++i) {
6924 if ( (addrs[i] < atomStartAddr) || (addrs[i] >= atomEndAddr) ) {
6925 warning("arm64 Linker Optimiztion Hint addresses are not in same atom: 0x%08llX and 0x%08llX",
6926 lowestAddress, addrs[i]);
6927 return; // skip this LOH
6928 }
6929 if ( (addrs[i] & 0x3) != 0 ) {
6930 warning("arm64 Linker Optimiztion Hint address is not 4-byte aligned: 0x%08llX", addrs[i]);
6931 return; // skip this LOH
6932 }
6933 if ( (addrs[i] - lowestAddress) > 0xFFFF ) {
6934 if ( parser.verboseOptimizationHints() ) {
6935 warning("arm64 Linker Optimiztion Hint addresses are too far apart: 0x%08llX and 0x%08llX",
6936 lowestAddress, addrs[i]);
6937 }
6938 return; // skip this LOH
6939 }
6940 }
6941
6942 // encoded kind, count, and address deltas in 64-bit addend
6943 ld::Fixup::LOH_arm64 extra;
6944 extra.addend = 0;
6945 extra.info.kind = kind;
6946 extra.info.count = count-1;
6947 extra.info.delta1 = (addrs[0] - lowestAddress) >> 2;
6948 extra.info.delta2 = (count > 1) ? ((addrs[1] - lowestAddress) >> 2) : 0;
6949 extra.info.delta3 = (count > 2) ? ((addrs[2] - lowestAddress) >> 2) : 0;
6950 extra.info.delta4 = (count > 3) ? ((addrs[3] - lowestAddress) >> 2) : 0;
6951 typename Parser<arm64>::SourceLocation src(inAtom, lowestAddress- inAtom->objectAddress());
6952 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindLinkerOptimizationHint, extra.addend);
6953 }
6954 #endif
6955
6956 template <typename A>
6957 void Section<A>::addLOH(class Parser<A>& parser, int kind, int count, const uint64_t addrs[]) {
6958
6959 }
6960
6961 template <typename A>
6962 void Section<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
6963 {
6964 const macho_section<P>* sect = this->machoSection();
6965 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + sect->reloff());
6966 const uint32_t relocCount = sect->nreloc();
6967 for (uint32_t r = 0; r < relocCount; ++r) {
6968 try {
6969 if ( this->addRelocFixup(parser, &relocs[r]) )
6970 ++r; // skip next
6971 }
6972 catch (const char* msg) {
6973 throwf("in section %s,%s reloc %u: %s", sect->segname(), Section<A>::makeSectionName(sect), r, msg);
6974 }
6975 }
6976
6977 // add follow-on fixups if .o file is missing .subsections_via_symbols
6978 if ( this->addFollowOnFixups() ) {
6979 Atom<A>* end = &_endAtoms[-1];
6980 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
6981 typename Parser<A>::SourceLocation src(p, 0);
6982 Atom<A>* nextAtom = &p[1];
6983 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
6984 }
6985 }
6986 else if ( this->type() == ld::Section::typeCode ) {
6987 // if FDE broke text not at a symbol, use followOn to keep code together
6988 Atom<A>* end = &_endAtoms[-1];
6989 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
6990 typename Parser<A>::SourceLocation src(p, 0);
6991 Atom<A>* nextAtom = &p[1];
6992 if ( (p->symbolTableInclusion() == ld::Atom::symbolTableIn) && (nextAtom->symbolTableInclusion() == ld::Atom::symbolTableNotIn) ) {
6993 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
6994 }
6995 }
6996 }
6997
6998 // <rdar://problem/9218847> track data-in-code
6999 if ( parser.hasDataInCodeLabels() && (this->type() == ld::Section::typeCode) ) {
7000 for (uint32_t i=0; i < parser.symbolCount(); ++i) {
7001 const macho_nlist<P>& sym = parser.symbolFromIndex(i);
7002 // ignore stabs
7003 if ( (sym.n_type() & N_STAB) != 0 )
7004 continue;
7005 // ignore non-definitions
7006 if ( (sym.n_type() & N_TYPE) != N_SECT )
7007 continue;
7008
7009 // 'L' labels do not denote atom breaks
7010 const char* symbolName = parser.nameFromSymbol(sym);
7011 if ( symbolName[0] == 'L' ) {
7012 if ( strncmp(symbolName, "L$start$", 8) == 0 ) {
7013 ld::Fixup::Kind kind = ld::Fixup::kindNone;
7014 if ( strncmp(&symbolName[8], "data$", 5) == 0 )
7015 kind = ld::Fixup::kindDataInCodeStartData;
7016 else if ( strncmp(&symbolName[8], "code$", 5) == 0 )
7017 kind = ld::Fixup::kindDataInCodeEnd;
7018 else if ( strncmp(&symbolName[8], "jt8$", 4) == 0 )
7019 kind = ld::Fixup::kindDataInCodeStartJT8;
7020 else if ( strncmp(&symbolName[8], "jt16$", 4) == 0 )
7021 kind = ld::Fixup::kindDataInCodeStartJT16;
7022 else if ( strncmp(&symbolName[8], "jt32$", 4) == 0 )
7023 kind = ld::Fixup::kindDataInCodeStartJT32;
7024 else if ( strncmp(&symbolName[8], "jta32$", 4) == 0 )
7025 kind = ld::Fixup::kindDataInCodeStartJTA32;
7026 else
7027 warning("unknown L$start$ label %s in file %s", symbolName, this->file().path());
7028 if ( kind != ld::Fixup::kindNone ) {
7029 Atom<A>* inAtom = parser.findAtomByAddress(sym.n_value());
7030 typename Parser<A>::SourceLocation src(inAtom, sym.n_value() - inAtom->objectAddress());
7031 parser.addFixup(src, ld::Fixup::k1of1, kind);
7032 }
7033 }
7034 }
7035 }
7036 }
7037
7038 // <rdar://problem/11150575> Handle LC_DATA_IN_CODE in object files
7039 if ( this->type() == ld::Section::typeCode ) {
7040 const pint_t startAddr = this->_machOSection->addr();
7041 const pint_t endAddr = startAddr + this->_machOSection->size();
7042 for ( const macho_data_in_code_entry<P>* p = parser.dataInCodeStart(); p != parser.dataInCodeEnd(); ++p ) {
7043 if ( (p->offset() >= startAddr) && (p->offset() < endAddr) ) {
7044 ld::Fixup::Kind kind = ld::Fixup::kindNone;
7045 switch ( p->kind() ) {
7046 case DICE_KIND_DATA:
7047 kind = ld::Fixup::kindDataInCodeStartData;
7048 break;
7049 case DICE_KIND_JUMP_TABLE8:
7050 kind = ld::Fixup::kindDataInCodeStartJT8;
7051 break;
7052 case DICE_KIND_JUMP_TABLE16:
7053 kind = ld::Fixup::kindDataInCodeStartJT16;
7054 break;
7055 case DICE_KIND_JUMP_TABLE32:
7056 kind = ld::Fixup::kindDataInCodeStartJT32;
7057 break;
7058 case DICE_KIND_ABS_JUMP_TABLE32:
7059 kind = ld::Fixup::kindDataInCodeStartJTA32;
7060 break;
7061 default:
7062 kind = ld::Fixup::kindDataInCodeStartData;
7063 warning("uknown LC_DATA_IN_CODE kind (%d) at offset 0x%08X", p->kind(), p->offset());
7064 break;
7065 }
7066 Atom<A>* inAtom = parser.findAtomByAddress(p->offset());
7067 typename Parser<A>::SourceLocation srcStart(inAtom, p->offset() - inAtom->objectAddress());
7068 parser.addFixup(srcStart, ld::Fixup::k1of1, kind);
7069 typename Parser<A>::SourceLocation srcEnd(inAtom, p->offset() + p->length() - inAtom->objectAddress());
7070 parser.addFixup(srcEnd, ld::Fixup::k1of1, ld::Fixup::kindDataInCodeEnd);
7071 }
7072 }
7073 }
7074
7075 // <rdar://problem/11945700> convert linker optimization hints into internal format
7076 if ( this->type() == ld::Section::typeCode && parser.hasOptimizationHints() ) {
7077 const pint_t startAddr = this->_machOSection->addr();
7078 const pint_t endAddr = startAddr + this->_machOSection->size();
7079 for (const uint8_t* p = parser.optimizationHintsStart(); p < parser.optimizationHintsEnd(); ) {
7080 uint64_t addrs[4];
7081 int32_t kind = read_uleb128(&p, parser.optimizationHintsEnd());
7082 if ( kind == 0 ) // padding at end of loh buffer
7083 break;
7084 if ( kind == -1 ) {
7085 warning("malformed uleb128 kind in LC_LINKER_OPTIMIZATION_HINTS");
7086 break;
7087 }
7088 int32_t count = read_uleb128(&p, parser.optimizationHintsEnd());
7089 if ( count == -1 ) {
7090 warning("malformed uleb128 count in LC_LINKER_OPTIMIZATION_HINTS");
7091 break;
7092 }
7093 if ( count > 3 ) {
7094 warning("address count > 3 in LC_LINKER_OPTIMIZATION_HINTS");
7095 break;
7096 }
7097 for (int32_t i=0; i < count; ++i) {
7098 addrs[i] = read_uleb128(&p, parser.optimizationHintsEnd());
7099 }
7100 if ( (startAddr <= addrs[0]) && (addrs[0] < endAddr) ) {
7101 this->addLOH(parser, kind, count, addrs);
7102 //fprintf(stderr, "kind=%d", kind);
7103 //for (int32_t i=0; i < count; ++i) {
7104 // fprintf(stderr, ", addr=0x%08llX", addrs[i]);
7105 //}
7106 //fprintf(stderr, "\n");
7107 }
7108 }
7109 }
7110
7111
7112 // add follow-on fixups for aliases
7113 if ( _hasAliases ) {
7114 for(Atom<A>* p = _beginAtoms; p < _endAtoms; ++p) {
7115 if ( p->isAlias() && ! this->addFollowOnFixups() ) {
7116 Atom<A>* targetOfAlias = &p[1];
7117 assert(p < &_endAtoms[-1]);
7118 assert(p->_objAddress == targetOfAlias->_objAddress);
7119 typename Parser<A>::SourceLocation src(p, 0);
7120 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, targetOfAlias);
7121 }
7122 }
7123 }
7124 }
7125
7126
7127
7128 //
7129 // main function used by linker to instantiate ld::Files
7130 //
7131 ld::relocatable::File* parse(const uint8_t* fileContent, uint64_t fileLength,
7132 const char* path, time_t modTime, ld::File::Ordinal ordinal, const ParserOptions& opts)
7133 {
7134 switch ( opts.architecture ) {
7135 #if SUPPORT_ARCH_x86_64
7136 case CPU_TYPE_X86_64:
7137 if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) )
7138 return mach_o::relocatable::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
7139 break;
7140 #endif
7141 #if SUPPORT_ARCH_i386
7142 case CPU_TYPE_I386:
7143 if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) )
7144 return mach_o::relocatable::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
7145 break;
7146 #endif
7147 #if SUPPORT_ARCH_arm_any
7148 case CPU_TYPE_ARM:
7149 if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) )
7150 return mach_o::relocatable::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
7151 break;
7152 #endif
7153 #if SUPPORT_ARCH_arm64
7154 case CPU_TYPE_ARM64:
7155 if ( mach_o::relocatable::Parser<arm64>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) )
7156 return mach_o::relocatable::Parser<arm64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
7157 break;
7158 #endif
7159 }
7160 return NULL;
7161 }
7162
7163 //
7164 // used by archive reader to validate member object file
7165 //
7166 bool isObjectFile(const uint8_t* fileContent, uint64_t fileLength, const ParserOptions& opts)
7167 {
7168 switch ( opts.architecture ) {
7169 case CPU_TYPE_X86_64:
7170 return ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) );
7171 case CPU_TYPE_I386:
7172 return ( mach_o::relocatable::Parser<x86>::validFile(fileContent) );
7173 case CPU_TYPE_ARM:
7174 return ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) );
7175 case CPU_TYPE_ARM64:
7176 return ( mach_o::relocatable::Parser<arm64>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) );
7177 }
7178 return false;
7179 }
7180
7181 //
7182 // used by linker to infer architecture when no -arch is on command line
7183 //
7184 bool isObjectFile(const uint8_t* fileContent, cpu_type_t* result, cpu_subtype_t* subResult)
7185 {
7186 if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
7187 *result = CPU_TYPE_X86_64;
7188 const macho_header<Pointer64<LittleEndian> >* header = (const macho_header<Pointer64<LittleEndian> >*)fileContent;
7189 *subResult = header->cpusubtype();
7190 return true;
7191 }
7192 if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
7193 *result = CPU_TYPE_I386;
7194 *subResult = CPU_SUBTYPE_X86_ALL;
7195 return true;
7196 }
7197 if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
7198 *result = CPU_TYPE_ARM;
7199 const macho_header<Pointer32<LittleEndian> >* header = (const macho_header<Pointer32<LittleEndian> >*)fileContent;
7200 *subResult = header->cpusubtype();
7201 return true;
7202 }
7203 if ( mach_o::relocatable::Parser<arm64>::validFile(fileContent, false, 0) ) {
7204 *result = CPU_TYPE_ARM64;
7205 *subResult = CPU_SUBTYPE_ARM64_ALL;
7206 return true;
7207 }
7208 return false;
7209 }
7210
7211 //
7212 // used by linker is error messages to describe bad .o file
7213 //
7214 const char* archName(const uint8_t* fileContent)
7215 {
7216 if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
7217 return mach_o::relocatable::Parser<x86_64>::fileKind(fileContent);
7218 }
7219 if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
7220 return mach_o::relocatable::Parser<x86>::fileKind(fileContent);
7221 }
7222 if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
7223 return mach_o::relocatable::Parser<arm>::fileKind(fileContent);
7224 }
7225 return NULL;
7226 }
7227
7228 //
7229 // Used by archive reader when -ObjC option is specified
7230 //
7231 bool hasObjC2Categories(const uint8_t* fileContent)
7232 {
7233 if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
7234 return mach_o::relocatable::Parser<x86_64>::hasObjC2Categories(fileContent);
7235 }
7236 else if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
7237 return mach_o::relocatable::Parser<arm>::hasObjC2Categories(fileContent);
7238 }
7239 else if ( mach_o::relocatable::Parser<x86>::validFile(fileContent, false, 0) ) {
7240 return mach_o::relocatable::Parser<x86>::hasObjC2Categories(fileContent);
7241 }
7242 #if SUPPORT_ARCH_arm64
7243 else if ( mach_o::relocatable::Parser<arm64>::validFile(fileContent, false, 0) ) {
7244 return mach_o::relocatable::Parser<arm64>::hasObjC2Categories(fileContent);
7245 }
7246 #endif
7247 return false;
7248 }
7249
7250 //
7251 // Used by archive reader when -ObjC option is specified
7252 //
7253 bool hasObjC1Categories(const uint8_t* fileContent)
7254 {
7255 if ( mach_o::relocatable::Parser<x86>::validFile(fileContent, false, 0) ) {
7256 return mach_o::relocatable::Parser<x86>::hasObjC1Categories(fileContent);
7257 }
7258 return false;
7259 }
7260
7261
7262
7263 } // namespace relocatable
7264 } // namespace mach_o
7265
7266