src/ld/parsers/macho_relocatable_file.cpp

   1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
   2  *
   3  * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
   4  *
   5  * @APPLE_LICENSE_HEADER_START@
   6  *
   7  * This file contains Original Code and/or Modifications of Original Code
   8  * as defined in and that are subject to the Apple Public Source License
   9  * Version 2.0 (the 'License'). You may not use this file except in
  10  * compliance with the License. Please obtain a copy of the License at
  11  * http://www.opensource.apple.com/apsl/ and read it before using this
  12  * file.
  13  *
  14  * The Original Code and all software distributed under the License are
  15  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  19  * Please see the License for the specific language governing rights and
  20  * limitations under the License.
  21  *
  22  * @APPLE_LICENSE_HEADER_END@
  23  */
  24
  25
  26 #include <stdint.h>
  27 #include <stdlib.h>
  28 #include <math.h>
  29 #include <unistd.h>
  30 #include <fcntl.h>
  31 #include <sys/param.h>
  32 #include <sys/stat.h>
  33 #include <sys/mman.h>
  34
  35 #include "MachOFileAbstraction.hpp"
  36
  37 #include "libunwind/DwarfInstructions.hpp"
  38 #include "libunwind/AddressSpace.hpp"
  39 #include "libunwind/Registers.hpp"
  40
  41 #include <vector>
  42 #include <set>
  43 #include <map>
  44 #include <algorithm>
  45
  46 #include "dwarf2.h"
  47 #include "debugline.h"
  48
  49 #include "Architectures.hpp"
  50 #include "ld.hpp"
  51 #include "macho_relocatable_file.h"
  52
  53
  54
  55 extern void throwf(const char* format, ...) __attribute__ ((noreturn,format(printf, 1, 2)));
  56 extern void warning(const char* format, ...) __attribute__((format(printf, 1, 2)));
  57
  58 namespace mach_o {
  59 namespace relocatable {
  60
  61
  62 // forward reference
  63 template <typename A> class Parser;
  64 template <typename A> class Atom;
  65 template <typename A> class Section;
  66 template <typename A> class CFISection;
  67 template <typename A> class CUSection;
  68
  69 template <typename A>
  70 class File : public ld::relocatable::File
  71 {
  72 public:
  73                                                                                         File(const char* p, time_t mTime, const uint8_t* content, ld::File::Ordinal ord) :
  74                                                                                                 ld::relocatable::File(p,mTime,ord), _fileContent(content),
  75                                                                                                 _sectionsArray(NULL), _atomsArray(NULL),
  76                                                                                                 _sectionsArrayCount(0), _atomsArrayCount(0),
  77                                                                                                 _debugInfoKind(ld::relocatable::File::kDebugInfoNone),
  78                                                                                                 _dwarfTranslationUnitPath(NULL),
  79                                                                                                 _dwarfDebugInfoSect(NULL), _dwarfDebugAbbrevSect(NULL),
  80                                                                                                 _dwarfDebugLineSect(NULL), _dwarfDebugStringSect(NULL),
  81                                                                                                 _objConstraint(ld::File::objcConstraintNone),
  82                                                                                                 _swiftVersion(0),
  83                                                                                                 _cpuSubType(0),
  84                                                                                                 _canScatterAtoms(false) {}
  85         virtual                                                                 ~File();
  86
  87         // overrides of ld::File
  88         virtual bool                                                                            forEachAtom(ld::File::AtomHandler&) const;
  89         virtual bool                                                                            justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const
  90                                                                                                                                                                         { return false; }
  91
  92         // overrides of ld::relocatable::File
  93         virtual ObjcConstraint                                                          objCConstraint() const                  { return _objConstraint; }
  94         virtual uint32_t                                                                        cpuSubType() const                              { return _cpuSubType; }
  95         virtual DebugInfoKind                                                           debugInfo() const                               { return _debugInfoKind; }
  96         virtual const std::vector<ld::relocatable::File::Stab>* stabs() const                           { return &_stabs; }
  97         virtual bool                                                                            canScatterAtoms() const                 { return _canScatterAtoms; }
  98         virtual const char*                                                                     translationUnitSource() const;
  99         virtual LinkerOptionsList*                                                      linkerOptions() const                   { return &_linkerOptions; }
 100         virtual uint8_t                                                                         swiftVersion() const                    { return _swiftVersion; }
 101
 102         const uint8_t*                                                                          fileContent()                                   { return _fileContent; }
 103 private:
 104         friend class Atom<A>;
 105         friend class Section<A>;
 106         friend class Parser<A>;
 107         friend class CFISection<A>::OAS;
 108
 109         typedef typename A::P                                   P;
 110
 111         const uint8_t*                                                  _fileContent;
 112         Section<A>**                                                    _sectionsArray;
 113         uint8_t*                                                                _atomsArray;
 114         uint8_t*                                                                _aliasAtomsArray;
 115         uint32_t                                                                _sectionsArrayCount;
 116         uint32_t                                                                _atomsArrayCount;
 117         uint32_t                                                                _aliasAtomsArrayCount;
 118         std::vector<ld::Fixup>                                  _fixups;
 119         std::vector<ld::Atom::UnwindInfo>               _unwindInfos;
 120         std::vector<ld::Atom::LineInfo>                 _lineInfos;
 121         std::vector<ld::relocatable::File::Stab>_stabs;
 122         ld::relocatable::File::DebugInfoKind    _debugInfoKind;
 123         const char*                                                             _dwarfTranslationUnitPath;
 124         const macho_section<P>*                                 _dwarfDebugInfoSect;
 125         const macho_section<P>*                                 _dwarfDebugAbbrevSect;
 126         const macho_section<P>*                                 _dwarfDebugLineSect;
 127         const macho_section<P>*                                 _dwarfDebugStringSect;
 128         ld::File::ObjcConstraint                                _objConstraint;
 129         uint8_t                                                                 _swiftVersion;
 130         uint32_t                                                                _cpuSubType;
 131         bool                                                                    _canScatterAtoms;
 132         std::vector<std::vector<const char*> >  _linkerOptions;
 133 };
 134
 135
 136 template <typename A>
 137 class Section : public ld::Section
 138 {
 139 public:
 140         typedef typename A::P::uint_t   pint_t;
 141         typedef typename A::P                   P;
 142         typedef typename A::P::E                E;
 143
 144         virtual                                                 ~Section()                                      { }
 145         class File<A>&                                  file() const                            { return _file; }
 146         const macho_section<P>*                 machoSection() const            { return _machOSection; }
 147         uint32_t                                                sectionNum(class Parser<A>&) const;
 148         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr);
 149         virtual ld::Atom::ContentType   contentType()                           { return ld::Atom::typeUnclassified; }
 150         virtual bool                                    dontDeadStrip()                         { return (this->_machOSection->flags() & S_ATTR_NO_DEAD_STRIP); }
 151         virtual Atom<A>*                                findAtomByAddress(pint_t addr) { return this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms); }
 152         virtual bool                                    addFollowOnFixups() const       { return ! _file.canScatterAtoms(); }
 153         virtual uint32_t                                appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 154                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 155                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
 156         virtual uint32_t                                computeAtomCount(class Parser<A>& parser,
 157                                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 158                                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
 159         virtual void                                    makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 160         virtual bool                                    addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
 161         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const { return 0; }
 162         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 163                                                                                                         const ld::IndirectBindingTable& ind) const { return false; }
 164         virtual bool                                    ignoreLabel(const char* label) const { return false; }
 165         static const char*                              makeSectionName(const macho_section<typename A::P>* s);
 166
 167 protected:
 168                                                 Section(File<A>& f, const macho_section<typename A::P>* s)
 169                                                         : ld::Section(makeSegmentName(s), makeSectionName(s), sectionType(s)),
 170                                                                 _file(f), _machOSection(s), _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
 171                                                 Section(File<A>& f, const char* segName, const char* sectName, ld::Section::Type t, bool hidden=false)
 172                                                         : ld::Section(segName, sectName, t, hidden), _file(f), _machOSection(NULL),
 173                                                                 _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
 174
 175
 176         Atom<A>*                                                findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end);
 177         uint32_t                                                x86_64PcRelOffset(uint8_t r_type);
 178         void                                                    addLOH(class Parser<A>& parser, int kind, int count, const uint64_t addrs[]);
 179         static const char*                              makeSegmentName(const macho_section<typename A::P>* s);
 180         static bool                                             readable(const macho_section<typename A::P>* s);
 181         static bool                                             writable(const macho_section<typename A::P>* s);
 182         static bool                                             exectuable(const macho_section<typename A::P>* s);
 183         static ld::Section::Type                sectionType(const macho_section<typename A::P>* s);
 184
 185         File<A>&                                                _file;
 186         const macho_section<P>*                 _machOSection;
 187         class Atom<A>*                                  _beginAtoms;
 188         class Atom<A>*                                  _endAtoms;
 189         bool                                                    _hasAliases;
 190         std::set<const class Atom<A>*>  _altEntries;
 191 };
 192
 193
 194 template <typename A>
 195 class CFISection : public Section<A>
 196 {
 197 public:
 198                                                 CFISection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 199                                                         : Section<A>(f, s) { }
 200         uint32_t                        cfiCount(Parser<A>& parser);
 201
 202         virtual ld::Atom::ContentType   contentType()           { return ld::Atom::typeCFI; }
 203         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 204         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 205         virtual void            makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 206         virtual bool            addFollowOnFixups() const       { return false; }
 207
 208
 209         ///
 210         /// ObjectFileAddressSpace is used as a template parameter to UnwindCursor for parsing
 211         /// dwarf CFI information in an object file.
 212         ///
 213         class OAS
 214         {
 215         public:
 216                         typedef typename A::P::uint_t   pint_t;
 217                         typedef typename A::P                   P;
 218                         typedef typename A::P::E                E;
 219                         typedef typename A::P::uint_t   sint_t;
 220
 221                                                         OAS(CFISection<A>& ehFrameSection, const uint8_t* ehFrameBuffer) :
 222                                                                 _ehFrameSection(ehFrameSection),
 223                                                                 _ehFrameContent(ehFrameBuffer),
 224                                                                 _ehFrameStartAddr(ehFrameSection.machoSection()->addr()),
 225                                                                 _ehFrameEndAddr(ehFrameSection.machoSection()->addr()+ehFrameSection.machoSection()->size()) {}
 226
 227                         uint8_t                 get8(pint_t addr) { return *((uint8_t*)mappedAddress(addr)); }
 228                         uint16_t                get16(pint_t addr)      { return E::get16(*((uint16_t*)mappedAddress(addr))); }
 229                         uint32_t                get32(pint_t addr)      { return E::get32(*((uint32_t*)mappedAddress(addr))); }
 230                         uint64_t                get64(pint_t addr)      { return E::get64(*((uint64_t*)mappedAddress(addr))); }
 231                         pint_t                  getP(pint_t addr)       { return P::getP(*((pint_t*)mappedAddress(addr))); }
 232                         uint64_t                getULEB128(pint_t& addr, pint_t end);
 233                         int64_t                 getSLEB128(pint_t& addr, pint_t end);
 234                         pint_t                  getEncodedP(pint_t& addr, pint_t end, uint8_t encoding);
 235         private:
 236                 const void*                     mappedAddress(pint_t addr);
 237
 238                 CFISection<A>&                          _ehFrameSection;
 239                 const uint8_t*                          _ehFrameContent;
 240                 pint_t                                          _ehFrameStartAddr;
 241                 pint_t                                          _ehFrameEndAddr;
 242         };
 243
 244
 245         typedef typename A::P::uint_t                   pint_t;
 246         typedef libunwind::CFI_Atom_Info<OAS>   CFI_Atom_Info;
 247
 248         void                            cfiParse(class Parser<A>& parser, uint8_t* buffer, CFI_Atom_Info cfiArray[], uint32_t& cfiCount, const pint_t cuStarts[], uint32_t cuCount);
 249         bool                            needsRelocating();
 250
 251         static bool                     bigEndian();
 252 private:
 253         void                            addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo);
 254         static void                     warnFunc(void* ref, uint64_t funcAddr, const char* msg);
 255 };
 256
 257
 258 template <typename A>
 259 class CUSection : public Section<A>
 260 {
 261 public:
 262                                                 CUSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 263                                                         : Section<A>(f, s) { }
 264
 265         typedef typename A::P::uint_t   pint_t;
 266         typedef typename A::P                   P;
 267         typedef typename A::P::E                E;
 268
 269         virtual uint32_t                computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
 270         virtual uint32_t                appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
 271         virtual void                    makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 272         virtual bool                    addFollowOnFixups() const       { return false; }
 273
 274         struct Info {
 275                 pint_t          functionStartAddress;
 276                 uint32_t        functionSymbolIndex;
 277                 uint32_t        rangeLength;
 278                 uint32_t        compactUnwindInfo;
 279                 const char*     personality;
 280                 pint_t          lsdaAddress;
 281                 Atom<A>*        function;
 282                 Atom<A>*        lsda;
 283         };
 284
 285         uint32_t                                count();
 286         void                                    parse(class Parser<A>& parser, uint32_t cnt, Info array[]);
 287         static bool                             encodingMeansUseDwarf(compact_unwind_encoding_t enc);
 288
 289
 290 private:
 291
 292         const char*                             personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc);
 293
 294         static int                              infoSorter(const void* l, const void* r);
 295
 296 };
 297
 298
 299 template <typename A>
 300 class TentativeDefinitionSection : public Section<A>
 301 {
 302 public:
 303                                                 TentativeDefinitionSection(Parser<A>& parser, File<A>& f)
 304                                                         : Section<A>(f, "__DATA", "__comm/tent", ld::Section::typeTentativeDefs)  {}
 305
 306         virtual ld::Atom::ContentType   contentType()           { return ld::Atom::typeZeroFill; }
 307         virtual bool            addFollowOnFixups() const       { return false; }
 308         virtual Atom<A>*        findAtomByAddress(typename A::P::uint_t addr) { throw "TentativeDefinitionSection::findAtomByAddress() should never be called"; }
 309         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
 310                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 311         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 312                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 313                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&);
 314         virtual void            makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
 315 private:
 316         typedef typename A::P::uint_t   pint_t;
 317         typedef typename A::P                   P;
 318 };
 319
 320
 321 template <typename A>
 322 class AbsoluteSymbolSection : public Section<A>
 323 {
 324 public:
 325                                                 AbsoluteSymbolSection(Parser<A>& parser, File<A>& f)
 326                                                         : Section<A>(f, "__DATA", "__abs", ld::Section::typeAbsoluteSymbols, true)  {}
 327
 328         virtual ld::Atom::ContentType   contentType()           { return ld::Atom::typeUnclassified; }
 329         virtual bool                                    dontDeadStrip()         { return false; }
 330         virtual ld::Atom::Alignment             alignmentForAddress(typename A::P::uint_t addr) { return ld::Atom::Alignment(0); }
 331         virtual bool            addFollowOnFixups() const       { return false; }
 332         virtual Atom<A>*        findAtomByAddress(typename A::P::uint_t addr) { throw "AbsoluteSymbolSection::findAtomByAddress() should never be called"; }
 333         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
 334                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 335         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 336                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 337                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&);
 338         virtual void            makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
 339         virtual Atom<A>*        findAbsAtomForValue(typename A::P::uint_t);
 340
 341 private:
 342         typedef typename A::P::uint_t   pint_t;
 343         typedef typename A::P                   P;
 344 };
 345
 346
 347 template <typename A>
 348 class SymboledSection : public Section<A>
 349 {
 350 public:
 351                                                 SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s);
 352         virtual ld::Atom::ContentType   contentType() { return _type; }
 353         virtual bool                                    dontDeadStrip();
 354         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
 355                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 356         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 357                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
 358                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 359 protected:
 360         typedef typename A::P::uint_t   pint_t;
 361         typedef typename A::P                   P;
 362
 363         ld::Atom::ContentType                   _type;
 364 };
 365
 366
 367 template <typename A>
 368 class TLVDefsSection : public SymboledSection<A>
 369 {
 370 public:
 371                                                 TLVDefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s) :
 372                                                         SymboledSection<A>(parser, f, s) { }
 373
 374 private:
 375
 376 };
 377
 378
 379 template <typename A>
 380 class ImplicitSizeSection : public Section<A>
 381 {
 382 public:
 383                                                 ImplicitSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 384                                                         : Section<A>(f, s) { }
 385         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 386         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 387 protected:
 388         typedef typename A::P::uint_t   pint_t;
 389         typedef typename A::P                   P;
 390
 391         virtual bool                                            addFollowOnFixups() const               { return false; }
 392         virtual const char*                                     unlabeledAtomName(Parser<A>& parser, pint_t addr) = 0;
 393         virtual ld::Atom::SymbolTableInclusion  symbolTableInclusion();
 394         virtual pint_t                                          elementSizeAtAddress(pint_t addr) = 0;
 395         virtual ld::Atom::Scope                         scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeLinkageUnit; }
 396         virtual bool                                            useElementAt(Parser<A>& parser,
 397                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr) = 0;
 398         virtual ld::Atom::Definition            definition()                                    { return ld::Atom::definitionRegular; }
 399         virtual ld::Atom::Combine                       combine(Parser<A>& parser, pint_t addr) = 0;
 400         virtual bool                                            ignoreLabel(const char* label) const { return (label[0] == 'L'); }
 401 };
 402
 403
 404 template <typename A>
 405 class FixedSizeSection : public ImplicitSizeSection<A>
 406 {
 407 public:
 408                                                 FixedSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 409                                                         : ImplicitSizeSection<A>(parser, f, s) { }
 410 protected:
 411         typedef typename A::P::uint_t   pint_t;
 412         typedef typename A::P                   P;
 413         typedef typename A::P::E                E;
 414
 415         virtual bool                                    useElementAt(Parser<A>& parser,
 416                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
 417                                                                                                                 { return true; }
 418 };
 419
 420
 421 template <typename A>
 422 class Literal4Section : public FixedSizeSection<A>
 423 {
 424 public:
 425                                                 Literal4Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 426                                                         : FixedSizeSection<A>(parser, f, s) {}
 427 protected:
 428         typedef typename A::P::uint_t   pint_t;
 429         typedef typename A::P                   P;
 430
 431         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(2); }
 432         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "4-byte-literal"; }
 433         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 4; }
 434         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 435         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 436         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 437                                                                                                         const ld::IndirectBindingTable& ind) const;
 438         virtual bool                                    ignoreLabel(const char* label) const;
 439 };
 440
 441 template <typename A>
 442 class Literal8Section : public FixedSizeSection<A>
 443 {
 444 public:
 445                                                 Literal8Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 446                                                         : FixedSizeSection<A>(parser, f, s) {}
 447 protected:
 448         typedef typename A::P::uint_t   pint_t;
 449         typedef typename A::P                   P;
 450
 451         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(3); }
 452         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "8-byte-literal"; }
 453         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 8; }
 454         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 455         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 456         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 457                                                                                                         const ld::IndirectBindingTable& ind) const;
 458         virtual bool                                    ignoreLabel(const char* label) const;
 459 };
 460
 461 template <typename A>
 462 class Literal16Section : public FixedSizeSection<A>
 463 {
 464 public:
 465                                                 Literal16Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 466                                                         : FixedSizeSection<A>(parser, f, s) {}
 467 protected:
 468         typedef typename A::P::uint_t   pint_t;
 469         typedef typename A::P                   P;
 470
 471         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(4); }
 472         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "16-byte-literal"; }
 473         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 16; }
 474         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 475         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 476         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 477                                                                                                         const ld::IndirectBindingTable& ind) const;
 478         virtual bool                                    ignoreLabel(const char* label) const;
 479 };
 480
 481
 482 template <typename A>
 483 class NonLazyPointerSection : public FixedSizeSection<A>
 484 {
 485 public:
 486                                                 NonLazyPointerSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 487                                                         : FixedSizeSection<A>(parser, f, s) {}
 488 protected:
 489         typedef typename A::P::uint_t   pint_t;
 490         typedef typename A::P                   P;
 491
 492         virtual void                                    makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 493         virtual ld::Atom::ContentType   contentType()                                                   { return ld::Atom::typeNonLazyPointer; }
 494         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 495         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "non_lazy_ptr"; }
 496         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 497         virtual ld::Atom::Scope                 scopeAtAddress(Parser<A>& parser, pint_t addr);
 498         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t);
 499         virtual bool                                    ignoreLabel(const char* label) const    { return true; }
 500         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 501         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 502                                                                                                         const ld::IndirectBindingTable& ind) const;
 503
 504 private:
 505         static const char*                              targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind);
 506         static ld::Fixup::Kind                  fixupKind();
 507 };
 508
 509
 510 template <typename A>
 511 class CFStringSection : public FixedSizeSection<A>
 512 {
 513 public:
 514                                                 CFStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 515                                                         : FixedSizeSection<A>(parser, f, s) {}
 516 protected:
 517         typedef typename A::P::uint_t   pint_t;
 518
 519         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 520         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "CFString"; }
 521         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 4*sizeof(pint_t); }
 522         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndReferences; }
 523         virtual bool                                    ignoreLabel(const char* label) const    { return true; }
 524         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 525         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 526                                                                                                         const ld::IndirectBindingTable& ind) const;
 527 private:
 528         enum ContentType { contentUTF8, contentUTF16, contentUnknown };
 529         static const uint8_t*                   targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
 530                                                                                                 ContentType* ct, unsigned int* count);
 531 };
 532
 533
 534 template <typename A>
 535 class ObjC1ClassSection : public FixedSizeSection<A>
 536 {
 537 public:
 538                                                 ObjC1ClassSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 539                                                         : FixedSizeSection<A>(parser, f, s) {}
 540 protected:
 541         typedef typename A::P::uint_t   pint_t;
 542         typedef typename A::P                   P;
 543         typedef typename A::P::E                E;
 544
 545         virtual ld::Atom::Scope                 scopeAtAddress(Parser<A>& , pint_t )    { return ld::Atom::scopeGlobal; }
 546         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(2); }
 547         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t);
 548         virtual ld::Atom::SymbolTableInclusion  symbolTableInclusion()                  { return ld::Atom::symbolTableIn; }
 549         virtual pint_t                                  elementSizeAtAddress(pint_t addr);
 550         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineNever; }
 551         virtual bool                                    ignoreLabel(const char* label) const    { return true; }
 552         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
 553                                                                                                                                                         { return 0; }
 554         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 555                                                                                                         const ld::IndirectBindingTable& ind) const { return false; }
 556         virtual bool                                    addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
 557 };
 558
 559
 560 template <typename A>
 561 class ObjC2ClassRefsSection : public FixedSizeSection<A>
 562 {
 563 public:
 564                                                 ObjC2ClassRefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 565                                                         : FixedSizeSection<A>(parser, f, s) {}
 566 protected:
 567         typedef typename A::P::uint_t   pint_t;
 568
 569         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 570         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "objc-class-ref"; }
 571         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 572         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndReferences; }
 573         virtual bool                                    ignoreLabel(const char* label) const    { return true; }
 574         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 575         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 576                                                                                                         const ld::IndirectBindingTable& ind) const;
 577 private:
 578         const char*                                             targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 579 };
 580
 581
 582 template <typename A>
 583 class ObjC2CategoryListSection : public FixedSizeSection<A>
 584 {
 585 public:
 586                                                 ObjC2CategoryListSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 587                                                         : FixedSizeSection<A>(parser, f, s) {}
 588 protected:
 589         typedef typename A::P::uint_t   pint_t;
 590
 591         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 592         virtual ld::Atom::Scope                 scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeTranslationUnit; }
 593         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "objc-cat-list"; }
 594         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 595         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineNever; }
 596         virtual bool                                    ignoreLabel(const char* label) const    { return true; }
 597 private:
 598         const char*                                             targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 599 };
 600
 601
 602 template <typename A>
 603 class PointerToCStringSection : public FixedSizeSection<A>
 604 {
 605 public:
 606                                                 PointerToCStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 607                                                         : FixedSizeSection<A>(parser, f, s) {}
 608 protected:
 609         typedef typename A::P::uint_t   pint_t;
 610
 611         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 612         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "pointer-to-literal-cstring"; }
 613         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 614         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndReferences; }
 615         virtual bool                                    ignoreLabel(const char* label) const    { return true; }
 616         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 617         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 618                                                                                                         const ld::IndirectBindingTable& ind) const;
 619         virtual const char*                             targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 620 };
 621
 622
 623 template <typename A>
 624 class Objc1ClassReferences : public PointerToCStringSection<A>
 625 {
 626 public:
 627                                                 Objc1ClassReferences(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 628                                                         : PointerToCStringSection<A>(parser, f, s) {}
 629
 630         typedef typename A::P::uint_t   pint_t;
 631         typedef typename A::P                   P;
 632
 633         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "pointer-to-literal-objc-class-name"; }
 634         virtual bool                                    addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
 635         virtual const char*                             targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 636 };
 637
 638
 639 template <typename A>
 640 class CStringSection : public ImplicitSizeSection<A>
 641 {
 642 public:
 643                                                 CStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 644                                                         : ImplicitSizeSection<A>(parser, f, s) {}
 645 protected:
 646         typedef typename A::P::uint_t   pint_t;
 647         typedef typename A::P                   P;
 648
 649         virtual ld::Atom::ContentType   contentType()                                                   { return ld::Atom::typeCString; }
 650         virtual Atom<A>*                                findAtomByAddress(pint_t addr);
 651         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "cstring"; }
 652         virtual pint_t                                  elementSizeAtAddress(pint_t addr);
 653         virtual bool                                    ignoreLabel(const char* label) const;
 654         virtual bool                                    useElementAt(Parser<A>& parser,
 655                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr);
 656         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 657         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 658         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 659                                                                                                         const ld::IndirectBindingTable& ind) const;
 660
 661 };
 662
 663
 664 template <typename A>
 665 class UTF16StringSection : public SymboledSection<A>
 666 {
 667 public:
 668                                                 UTF16StringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 669                                                         : SymboledSection<A>(parser, f, s) {}
 670 protected:
 671         typedef typename A::P::uint_t   pint_t;
 672         typedef typename A::P                   P;
 673
 674         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 675         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 676         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 677                                                                                                         const ld::IndirectBindingTable& ind) const;
 678 };
 679
 680
 681 //
 682 // Atoms in mach-o files
 683 //
 684 template <typename A>
 685 class Atom : public ld::Atom
 686 {
 687 public:
 688         // overrides of ld::Atom
 689         virtual const ld::File*                                         file() const;
 690         virtual const char*                                                     translationUnitSource() const
 691                                                                                                                                         { return sect().file().translationUnitSource(); }
 692         virtual const char*                                                     name() const            { return _name; }
 693         virtual uint64_t                                                        size() const            { return _size; }
 694         virtual uint64_t                                                        objectAddress() const { return _objAddress; }
 695         virtual void                                                            copyRawContent(uint8_t buffer[]) const;
 696         virtual const uint8_t*                                          rawContentPointer() const { return contentPointer(); }
 697         virtual unsigned long                                           contentHash(const ld::IndirectBindingTable& ind) const
 698                                                                                                                         { if ( _hash == 0 ) _hash = sect().contentHash(this, ind); return _hash; }
 699         virtual bool                                                            canCoalesceWith(const ld::Atom& rhs, const ld::IndirectBindingTable& ind) const
 700                                                                                                                         { return sect().canCoalesceWith(this, rhs, ind); }
 701         virtual ld::Fixup::iterator                                     fixupsBegin() const     { return &machofile()._fixups[_fixupsStartIndex]; }
 702         virtual ld::Fixup::iterator                                     fixupsEnd()     const   { return &machofile()._fixups[_fixupsStartIndex+_fixupsCount]; }
 703         virtual ld::Atom::UnwindInfo::iterator          beginUnwind() const     { return &machofile()._unwindInfos[_unwindInfoStartIndex]; }
 704         virtual ld::Atom::UnwindInfo::iterator          endUnwind()     const   { return &machofile()._unwindInfos[_unwindInfoStartIndex+_unwindInfoCount];  }
 705         virtual ld::Atom::LineInfo::iterator            beginLineInfo() const{ return &machofile()._lineInfos[_lineInfoStartIndex]; }
 706         virtual ld::Atom::LineInfo::iterator            endLineInfo() const { return &machofile()._lineInfos[_lineInfoStartIndex+_lineInfoCount];  }
 707         virtual void                                                            setFile(const ld::File* f);
 708
 709 private:
 710
 711         enum {  kFixupStartIndexBits = 32,
 712                         kLineInfoStartIndexBits = 32,
 713                         kUnwindInfoStartIndexBits = 24,
 714                         kFixupCountBits = 24,
 715                         kLineInfoCountBits = 12,
 716                         kUnwindInfoCountBits = 4
 717                 }; // must sum to 128
 718
 719 public:
 720         // methods for all atoms from mach-o object file
 721                         Section<A>&                                                     sect() const                    { return (Section<A>&)section(); }
 722                         File<A>&                                                        machofile() const                       { return ((Section<A>*)(this->_section))->file(); }
 723                         void                                                            setFixupsRange(uint32_t s, uint32_t c);
 724                         void                                                            setUnwindInfoRange(uint32_t s, uint32_t c);
 725                         void                                                            extendUnwindInfoRange();
 726                         void                                                            setLineInfoRange(uint32_t s, uint32_t c);
 727                         bool                                                            roomForMoreLineInfoCount() { return (_lineInfoCount < ((1<<kLineInfoCountBits)-1)); }
 728                         void                                                            incrementLineInfoCount() { assert(roomForMoreLineInfoCount()); ++_lineInfoCount; }
 729                         void                                                            incrementFixupCount() { if (_fixupsCount == ((1 << kFixupCountBits)-1))
 730                                                                                                                                                         throwf("too may fixups in %s", name()); ++_fixupsCount; }
 731                         const uint8_t*                                          contentPointer() const;
 732                         uint32_t                                                        fixupCount() const { return _fixupsCount; }
 733                         void                                                            verifyAlignment(const macho_section<typename A::P>&) const;
 734
 735         typedef typename A::P                                           P;
 736         typedef typename A::P::E                                        E;
 737         typedef typename A::P::uint_t                           pint_t;
 738                                                                                                 // constuct via all attributes
 739                                                                                                 Atom(Section<A>& sct, const char* nm, pint_t addr, uint64_t sz,
 740                                                                                                         ld::Atom::Definition d, ld::Atom::Combine c, ld::Atom::Scope s,
 741                                                                                                         ld::Atom::ContentType ct, ld::Atom::SymbolTableInclusion i,
 742                                                                                                         bool dds, bool thumb, bool al, ld::Atom::Alignment a)
 743                                                                                                                 : ld::Atom((ld::Section&)sct, d, c, s, ct, i, dds, thumb, al, a),
 744                                                                                                                         _size(sz), _objAddress(addr), _name(nm), _hash(0),
 745                                                                                                                         _fixupsStartIndex(0), _lineInfoStartIndex(0),
 746                                                                                                                         _unwindInfoStartIndex(0), _fixupsCount(0),
 747                                                                                                                         _lineInfoCount(0), _unwindInfoCount(0) { }
 748                                                                                                 // construct via symbol table entry
 749                                                                                                 Atom(Section<A>& sct, Parser<A>& parser, const macho_nlist<P>& sym,
 750                                                                                                                                 uint64_t sz, bool alias=false)
 751                                                                                                                 : ld::Atom((ld::Section&)sct, parser.definitionFromSymbol(sym),
 752                                                                                                                                 parser.combineFromSymbol(sym), parser.scopeFromSymbol(sym),
 753                                                                                                                                 parser.resolverFromSymbol(sym) ? ld::Atom::typeResolver : sct.contentType(),
 754                                                                                                                                 parser.inclusionFromSymbol(sym),
 755                                                                                                                                 parser.dontDeadStripFromSymbol(sym) || sct.dontDeadStrip(),
 756                                                                                                                                 parser.isThumbFromSymbol(sym), alias,
 757                                                                                                                                 sct.alignmentForAddress(sym.n_value())),
 758                                                                                                                         _size(sz), _objAddress(sym.n_value()),
 759                                                                                                                         _name(parser.nameFromSymbol(sym)), _hash(0),
 760                                                                                                                         _fixupsStartIndex(0), _lineInfoStartIndex(0),
 761                                                                                                                         _unwindInfoStartIndex(0), _fixupsCount(0),
 762                                                                                                                         _lineInfoCount(0), _unwindInfoCount(0) {
 763                                                                                                                                 // <rdar://problem/6783167> support auto-hidden weak symbols
 764                                                                                                                                 if ( _scope == ld::Atom::scopeGlobal &&
 765                                                                                                                                                 (sym.n_desc() & (N_WEAK_DEF|N_WEAK_REF)) == (N_WEAK_DEF|N_WEAK_REF) )
 766                                                                                                                                         this->setAutoHide();
 767                                                                                                                                         this->verifyAlignment(*sct.machoSection());
 768                                                                                                                         }
 769
 770 private:
 771         friend class Parser<A>;
 772         friend class Section<A>;
 773         friend class CStringSection<A>;
 774         friend class AbsoluteSymbolSection<A>;
 775
 776         pint_t                                                                          _size;
 777         pint_t                                                                          _objAddress;
 778         const char*                                                                     _name;
 779         mutable unsigned long                                           _hash;
 780
 781         uint64_t                                                                        _fixupsStartIndex               : kFixupStartIndexBits,
 782                                                                                                 _lineInfoStartIndex             : kLineInfoStartIndexBits,
 783                                                                                                 _unwindInfoStartIndex   : kUnwindInfoStartIndexBits,
 784                                                                                                 _fixupsCount                    : kFixupCountBits,
 785                                                                                                 _lineInfoCount                  : kLineInfoCountBits,
 786                                                                                                 _unwindInfoCount                : kUnwindInfoCountBits;
 787
 788         static std::map<const ld::Atom*, const ld::File*> _s_fileOverride;
 789 };
 790
 791 template <typename A>
 792 std::map<const ld::Atom*, const ld::File*> Atom<A>::_s_fileOverride;
 793
 794 template <typename A>
 795 void Atom<A>::setFile(const ld::File* f) {
 796         _s_fileOverride[this] = f;
 797 }
 798
 799 template <typename A>
 800 const ld::File* Atom<A>::file() const
 801 {
 802         std::map<const ld::Atom*, const ld::File*>::iterator pos = _s_fileOverride.find(this);
 803         if ( pos != _s_fileOverride.end() )
 804                 return pos->second;
 805
 806         return &sect().file();
 807 }
 808
 809 template <typename A>
 810 void Atom<A>::setFixupsRange(uint32_t startIndex, uint32_t count)
 811 {
 812         if ( count >= (1 << kFixupCountBits) )
 813                 throwf("too many fixups in function %s", this->name());
 814         if ( startIndex >= (1 << kFixupStartIndexBits) )
 815                 throwf("too many fixups in file");
 816         assert(((startIndex+count) <= sect().file()._fixups.size()) && "fixup index out of range");
 817         _fixupsStartIndex = startIndex;
 818         _fixupsCount = count;
 819 }
 820
 821 template <typename A>
 822 void Atom<A>::setUnwindInfoRange(uint32_t startIndex, uint32_t count)
 823 {
 824         if ( count >= (1 << kUnwindInfoCountBits) )
 825                 throwf("too many compact unwind infos in function %s", this->name());
 826         if ( startIndex >= (1 << kUnwindInfoStartIndexBits) )
 827                 throwf("too many compact unwind infos (%d) in file", startIndex);
 828         assert((startIndex+count) <= sect().file()._unwindInfos.size() && "unwindinfo index out of range");
 829         _unwindInfoStartIndex = startIndex;
 830         _unwindInfoCount = count;
 831 }
 832
 833 template <typename A>
 834 void Atom<A>::extendUnwindInfoRange()
 835 {
 836         if ( _unwindInfoCount+1 >= (1 << kUnwindInfoCountBits) )
 837                 throwf("too many compact unwind infos in function %s", this->name());
 838         _unwindInfoCount += 1;
 839 }
 840
 841 template <typename A>
 842 void Atom<A>::setLineInfoRange(uint32_t startIndex, uint32_t count)
 843 {
 844         assert((count < (1 << kLineInfoCountBits)) && "too many line infos");
 845         assert((startIndex+count) < sect().file()._lineInfos.size() && "line info index out of range");
 846         _lineInfoStartIndex = startIndex;
 847         _lineInfoCount = count;
 848 }
 849
 850 template <typename A>
 851 const uint8_t* Atom<A>::contentPointer() const
 852 {
 853         const macho_section<P>* sct = this->sect().machoSection();
 854         if ( this->_objAddress > sct->addr() + sct->size() )
 855                 throwf("malformed .o file, symbol has address 0x%0llX which is outside range of its section", (uint64_t)this->_objAddress);
 856         uint32_t fileOffset = sct->offset() - sct->addr() + this->_objAddress;
 857         return this->sect().file().fileContent()+fileOffset;
 858 }
 859
 860
 861 template <typename A>
 862 void Atom<A>::copyRawContent(uint8_t buffer[]) const
 863 {
 864         // copy base bytes
 865         if ( this->contentType() == ld::Atom::typeZeroFill ) {
 866                 bzero(buffer, _size);
 867         }
 868         else if ( _size != 0 ) {
 869                 memcpy(buffer, this->contentPointer(), _size);
 870         }
 871 }
 872
 873 template <>
 874 void Atom<arm>::verifyAlignment(const macho_section<P>&) const
 875 {
 876         if ( (this->section().type() == ld::Section::typeCode) && ! isThumb() ) {
 877                 if ( ((_objAddress % 4) != 0) || (this->alignment().powerOf2 < 2) )
 878                         warning("ARM function not 4-byte aligned: %s from %s", this->name(), this->file()->path());
 879         }
 880 }
 881
 882 #if SUPPORT_ARCH_arm64
 883 template <>
 884 void Atom<arm64>::verifyAlignment(const macho_section<P>& sect) const
 885 {
 886         if ( (this->section().type() == ld::Section::typeCode) && (sect.size() != 0) ) {
 887                 if ( ((_objAddress % 4) != 0) || (this->alignment().powerOf2 < 2) )
 888                         warning("arm64 function not 4-byte aligned: %s from %s", this->name(), this->file()->path());
 889         }
 890 }
 891 #endif
 892
 893 template <typename A>
 894 void Atom<A>::verifyAlignment(const macho_section<P>&) const
 895 {
 896 }
 897
 898
 899 class AliasAtom : public ld::Atom
 900 {
 901 public:
 902                                                                                 AliasAtom(const char* name, bool hidden, const ld::File* file, const char* aliasOfName) :
 903                                                                                         ld::Atom(_s_section, ld::Atom::definitionRegular, ld::Atom::combineNever,
 904                                                                                                         (hidden ? ld::Atom::scopeLinkageUnit : ld::Atom::scopeGlobal),
 905                                                                                                         ld::Atom::typeUnclassified, ld::Atom::symbolTableIn,
 906                                                                                                         false, false, true, 0),
 907                                                                                         _file(file),
 908                                                                                         _name(name),
 909                                                                                         _fixup(0, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, ld::Fixup::bindingByNameUnbound, aliasOfName) { }
 910
 911         virtual const ld::File*                         file() const            { return _file; }
 912         virtual const char*                                     translationUnitSource() const
 913                                                                                                                         { return NULL; }
 914         virtual const char*                                     name() const            { return _name; }
 915         virtual uint64_t                                        size() const            { return 0; }
 916         virtual uint64_t                                        objectAddress() const { return 0; }
 917         virtual void                                            copyRawContent(uint8_t buffer[]) const { }
 918         virtual ld::Fixup::iterator                     fixupsBegin() const     { return &((ld::Fixup*)&_fixup)[0]; }
 919         virtual ld::Fixup::iterator                     fixupsEnd()     const   { return &((ld::Fixup*)&_fixup)[1]; }
 920
 921 private:
 922         static ld::Section                                      _s_section;
 923
 924         const ld::File*                                         _file;
 925         const char*                                                     _name;
 926         ld::Fixup                                                       _fixup;
 927 };
 928
 929 ld::Section AliasAtom::_s_section("__LD", "__aliases", ld::Section::typeTempAlias, true);
 930
 931
 932 template <typename A>
 933 class Parser
 934 {
 935 public:
 936         static bool                                                                             validFile(const uint8_t* fileContent, bool subtypeMustMatch=false,
 937                                                                                                                                 cpu_subtype_t subtype=0);
 938         static const char*                                                              fileKind(const uint8_t* fileContent);
 939         static bool                                                                             hasObjC2Categories(const uint8_t* fileContent);
 940         static bool                                                                             hasObjC1Categories(const uint8_t* fileContent);
 941         static ld::relocatable::File*                                   parse(const uint8_t* fileContent, uint64_t fileLength,
 942                                                                                                                         const char* path, time_t modTime, ld::File::Ordinal ordinal,
 943                                                                                                                          const ParserOptions& opts) {
 944                                                                                                                                 Parser p(fileContent, fileLength, path, modTime,
 945                                                                                                                                                 ordinal, opts.warnUnwindConversionProblems,
 946                                                                                                                                                 opts.keepDwarfUnwind, opts.forceDwarfConversion,
 947                                                                                                                                                 opts.neverConvertDwarf, opts.verboseOptimizationHints);
 948                                                                                                                                 return p.parse(opts);
 949                                                                                                                 }
 950
 951         typedef typename A::P                                           P;
 952         typedef typename A::P::E                                        E;
 953         typedef typename A::P::uint_t                           pint_t;
 954
 955         struct SourceLocation {
 956                                                                 SourceLocation() {}
 957                                                                 SourceLocation(Atom<A>* a, uint32_t o) : atom(a), offsetInAtom(o) {}
 958                 Atom<A>*        atom;
 959                 uint32_t        offsetInAtom;
 960         };
 961
 962         struct TargetDesc {
 963                 Atom<A>*        atom;
 964                 const char*     name;           // only used if targetAtom is NULL
 965                 int64_t         addend;
 966                 bool            weakImport;     // only used if targetAtom is NULL
 967         };
 968
 969         struct FixupInAtom {
 970                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) :
 971                         fixup(src.offsetInAtom, c, k, target), atom(src.atom) { src.atom->incrementFixupCount(); }
 972
 973                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) :
 974                         fixup(src.offsetInAtom, c, k, b, target), atom(src.atom) { src.atom->incrementFixupCount(); }
 975
 976                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) :
 977                         fixup(src.offsetInAtom, c, k, wi, name), atom(src.atom) { src.atom->incrementFixupCount(); }
 978
 979                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) :
 980                         fixup(src.offsetInAtom, c, k, b, name), atom(src.atom) { src.atom->incrementFixupCount(); }
 981
 982                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) :
 983                         fixup(src.offsetInAtom, c, k, addend), atom(src.atom) { src.atom->incrementFixupCount(); }
 984
 985                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) :
 986                         fixup(src.offsetInAtom, c, k, (uint64_t)0), atom(src.atom) { src.atom->incrementFixupCount(); }
 987
 988                 ld::Fixup               fixup;
 989                 Atom<A>*                atom;
 990         };
 991
 992         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) {
 993                 _allFixups.push_back(FixupInAtom(src, c, k, target));
 994         }
 995
 996         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) {
 997                 _allFixups.push_back(FixupInAtom(src, c, k, b, target));
 998         }
 999
1000         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) {
1001                 _allFixups.push_back(FixupInAtom(src, c, k, wi, name));
1002         }
1003
1004         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) {
1005                 _allFixups.push_back(FixupInAtom(src, c, k, b, name));
1006         }
1007
1008         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) {
1009                 _allFixups.push_back(FixupInAtom(src, c, k, addend));
1010         }
1011
1012         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) {
1013                 _allFixups.push_back(FixupInAtom(src, c, k));
1014         }
1015
1016         const char*                                                                             path() { return _path; }
1017         uint32_t                                                                                symbolCount() { return _symbolCount; }
1018         uint32_t                                                                                indirectSymbol(uint32_t indirectIndex);
1019         const macho_nlist<P>&                                                   symbolFromIndex(uint32_t index);
1020         const char*                                                                             nameFromSymbol(const macho_nlist<P>& sym);
1021         ld::Atom::Scope                                                                 scopeFromSymbol(const macho_nlist<P>& sym);
1022         static ld::Atom::Definition                                             definitionFromSymbol(const macho_nlist<P>& sym);
1023         static ld::Atom::Combine                                                combineFromSymbol(const macho_nlist<P>& sym);
1024                         ld::Atom::SymbolTableInclusion                  inclusionFromSymbol(const macho_nlist<P>& sym);
1025         static bool                                                                             dontDeadStripFromSymbol(const macho_nlist<P>& sym);
1026         static bool                                                                             isThumbFromSymbol(const macho_nlist<P>& sym);
1027         static bool                                                                             weakImportFromSymbol(const macho_nlist<P>& sym);
1028         static bool                                                                             resolverFromSymbol(const macho_nlist<P>& sym);
1029         static bool                                                                             altEntryFromSymbol(const macho_nlist<P>& sym);
1030         uint32_t                                                                                symbolIndexFromIndirectSectionAddress(pint_t,const macho_section<P>*);
1031         const macho_section<P>*                                                 firstMachOSection() { return _sectionsStart; }
1032         const macho_section<P>*                                                 machOSectionFromSectionIndex(uint32_t index);
1033         uint32_t                                                                                machOSectionCount() { return _machOSectionsCount; }
1034         uint32_t                                                                                undefinedStartIndex() { return _undefinedStartIndex; }
1035         uint32_t                                                                                undefinedEndIndex() { return _undefinedEndIndex; }
1036         void                                                                                    addFixup(FixupInAtom f) { _allFixups.push_back(f); }
1037         Section<A>*                                                                             sectionForNum(unsigned int sectNum);
1038         Section<A>*                                                                             sectionForAddress(pint_t addr);
1039         Atom<A>*                                                                                findAtomByAddress(pint_t addr);
1040         Atom<A>*                                                                                findAtomByAddressOrNullIfStub(pint_t addr);
1041         Atom<A>*                                                                                findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom);
1042         Atom<A>*                                                                                findAtomByName(const char* name);       // slow!
1043         void                                                                                    findTargetFromAddress(pint_t addr, TargetDesc& target);
1044         void                                                                                    findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target);
1045         void                                                                                    findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum,
1046                                                                                                                                                                                 TargetDesc& target);
1047         uint32_t                                                                                tentativeDefinitionCount() { return _tentativeDefinitionCount; }
1048         uint32_t                                                                                absoluteSymbolCount() { return _absoluteSymbolCount; }
1049
1050         bool                                                                                    hasStubsSection() { return (_stubsSectionNum != 0); }
1051         unsigned int                                                                    stubsSectionNum() { return _stubsSectionNum; }
1052         void                                                                                    addDtraceExtraInfos(const SourceLocation& src, const char* provider);
1053         const char*                                                                             scanSymbolTableForAddress(uint64_t addr);
1054         bool                                                                                    warnUnwindConversionProblems() { return _warnUnwindConversionProblems; }
1055         bool                                                                                    hasDataInCodeLabels() { return _hasDataInCodeLabels; }
1056         bool                                                                                    keepDwarfUnwind() { return _keepDwarfUnwind; }
1057         bool                                                                                    forceDwarfConversion() { return _forceDwarfConversion; }
1058         bool                                                                                    verboseOptimizationHints() { return _verboseOptimizationHints; }
1059         bool                                                                                    neverConvertDwarf() { return _neverConvertDwarf; }
1060         bool                                                                                    armUsesZeroCostExceptions() { return _armUsesZeroCostExceptions; }
1061
1062         macho_data_in_code_entry<P>*                                    dataInCodeStart() { return _dataInCodeStart; }
1063         macho_data_in_code_entry<P>*                                    dataInCodeEnd()   { return _dataInCodeEnd; }
1064         const uint8_t*                                                                  optimizationHintsStart() { return _lohStart; }
1065         const uint8_t*                                                                  optimizationHintsEnd() { return _lohEnd; }
1066         bool                                                                                    hasOptimizationHints() { return _lohStart != _lohEnd; }
1067
1068
1069         void                                                    addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target);
1070         void                                                    addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase);
1071
1072
1073
1074         struct LabelAndCFIBreakIterator {
1075                 typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
1076                                                                 LabelAndCFIBreakIterator(const uint32_t* ssa, uint32_t ssc, const pint_t* cfisa,
1077                                                                                                                 uint32_t cfisc, bool ols)
1078                                                                         : sortedSymbolIndexes(ssa), sortedSymbolCount(ssc), cfiStartsArray(cfisa),
1079                                                                                 cfiStartsCount(cfisc), fileHasOverlappingSymbols(ols),
1080                                                                                 newSection(false), cfiIndex(0), symIndex(0) {}
1081                 bool                                    next(Parser<A>& parser, const Section<A>& sect, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
1082                                                                                 pint_t* addr, pint_t* size, const macho_nlist<P>** sym);
1083                 pint_t                                  peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr);
1084                 void                                    beginSection() { newSection = true; symIndex = 0; }
1085
1086                 const uint32_t* const           sortedSymbolIndexes;
1087                 const uint32_t                          sortedSymbolCount;
1088                 const pint_t*                           cfiStartsArray;
1089                 const uint32_t                          cfiStartsCount;
1090                 const bool                                      fileHasOverlappingSymbols;
1091                 bool                                            newSection;
1092                 uint32_t                                        cfiIndex;
1093                 uint32_t                                        symIndex;
1094         };
1095
1096         struct CFI_CU_InfoArrays {
1097                         typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
1098                         typedef typename CUSection<A>::Info CU_Info;
1099                                                 CFI_CU_InfoArrays(const CFI_Atom_Info* cfiAr, uint32_t cfiC, CU_Info* cuAr, uint32_t cuC)
1100                                                         : cfiArray(cfiAr), cuArray(cuAr), cfiCount(cfiC), cuCount(cuC) {}
1101                 const CFI_Atom_Info* const      cfiArray;
1102                         CU_Info* const                  cuArray;
1103                 const uint32_t                          cfiCount;
1104                 const uint32_t                          cuCount;
1105         };
1106
1107
1108
1109 private:
1110         friend class Section<A>;
1111
1112         enum SectionType { sectionTypeIgnore, sectionTypeLiteral4, sectionTypeLiteral8, sectionTypeLiteral16,
1113                                                 sectionTypeNonLazy, sectionTypeCFI, sectionTypeCString, sectionTypeCStringPointer,
1114                                                 sectionTypeUTF16Strings, sectionTypeCFString, sectionTypeObjC2ClassRefs, typeObjC2CategoryList,
1115                                                 sectionTypeObjC1Classes, sectionTypeSymboled, sectionTypeObjC1ClassRefs,
1116                                                 sectionTypeTentativeDefinitions, sectionTypeAbsoluteSymbols, sectionTypeTLVDefs,
1117                                                 sectionTypeCompactUnwind };
1118
1119         template <typename P>
1120         struct MachOSectionAndSectionClass
1121         {
1122                 const macho_section<P>* sect;
1123                 SectionType                             type;
1124
1125                 static int sorter(const void* l, const void* r) {
1126                         const MachOSectionAndSectionClass<P>* left = (MachOSectionAndSectionClass<P>*)l;
1127                         const MachOSectionAndSectionClass<P>* right = (MachOSectionAndSectionClass<P>*)r;
1128                         int64_t diff = left->sect->addr() - right->sect->addr();
1129                         if ( diff == 0 )
1130                                 return 0;
1131                         if ( diff < 0 )
1132                                 return -1;
1133                         else
1134                                 return 1;
1135                 }
1136         };
1137
1138         struct ParserAndSectionsArray { Parser* parser; const uint32_t* sortedSectionsArray; };
1139
1140
1141                                                                                                         Parser(const uint8_t* fileContent, uint64_t fileLength,
1142                                                                                                                         const char* path, time_t modTime, ld::File::Ordinal ordinal,
1143                                                                                                                         bool warnUnwindConversionProblems, bool keepDwarfUnwind,
1144                                                                                                                         bool forceDwarfConversion, bool neverConvertDwarf, bool verboseOptimizationHints);
1145         ld::relocatable::File*                                                  parse(const ParserOptions& opts);
1146         uint8_t                                                                                 loadCommandSizeMask();
1147         bool                                                                                    parseLoadCommands();
1148         void                                                                                    makeSections();
1149         void                                                                                    prescanSymbolTable();
1150         void                                                                                    makeSortedSymbolsArray(uint32_t symArray[], const uint32_t sectionArray[]);
1151         void                                                                                    makeSortedSectionsArray(uint32_t array[]);
1152         static int                                                                              pointerSorter(const void* l, const void* r);
1153         static int                                                                              symbolIndexSorter(void* extra, const void* l, const void* r);
1154         static int                                                                              sectionIndexSorter(void* extra, const void* l, const void* r);
1155
1156         void                                                                                    parseDebugInfo();
1157         void                                                                                    parseStabs();
1158         void                                                                                    appendAliasAtoms(uint8_t* atomBuffer);
1159         static bool                                                                             isConstFunStabs(const char *stabStr);
1160         bool                                                                                    read_comp_unit(const char ** name, const char ** comp_dir,
1161                                                                                                                                                                                                 uint64_t *stmt_list);
1162         pint_t                                                                                  realAddr(pint_t addr);
1163         const char*                                                                             getDwarfString(uint64_t form, const uint8_t*& p);
1164         uint64_t                                                                                getDwarfOffset(uint64_t form, const uint8_t*& di, bool dwarf64);
1165         bool                                                                                    skip_form(const uint8_t ** offset, const uint8_t * end,
1166                                                                                                                                 uint64_t form, uint8_t addr_size, bool dwarf64);
1167
1168
1169         // filled in by constructor
1170         const uint8_t*                                                          _fileContent;
1171         uint32_t                                                                        _fileLength;
1172         const char*                                                                     _path;
1173         time_t                                                                          _modTime;
1174         ld::File::Ordinal                                                       _ordinal;
1175
1176         // filled in by parseLoadCommands()
1177         File<A>*                                                                        _file;
1178         const macho_nlist<P>*                                           _symbols;
1179         uint32_t                                                                        _symbolCount;
1180         uint32_t                                                                        _indirectSymbolCount;
1181         const char*                                                                     _strings;
1182         uint32_t                                                                        _stringsSize;
1183         const uint32_t*                                                         _indirectTable;
1184         uint32_t                                                                        _indirectTableCount;
1185         uint32_t                                                                        _undefinedStartIndex;
1186         uint32_t                                                                        _undefinedEndIndex;
1187         const macho_section<P>*                                         _sectionsStart;
1188         uint32_t                                                                        _machOSectionsCount;
1189         bool                                                                            _hasUUID;
1190         macho_data_in_code_entry<P>*                            _dataInCodeStart;
1191         macho_data_in_code_entry<P>*                            _dataInCodeEnd;
1192         const uint8_t*                                                          _lohStart;
1193         const uint8_t*                                                          _lohEnd;
1194
1195         // filled in by parse()
1196         CFISection<A>*                                                          _EHFrameSection;
1197         CUSection<A>*                                                           _compactUnwindSection;
1198         AbsoluteSymbolSection<A>*                                       _absoluteSection;
1199         uint32_t                                                                        _tentativeDefinitionCount;
1200         uint32_t                                                                        _absoluteSymbolCount;
1201         uint32_t                                                                        _symbolsInSections;
1202         bool                                                                            _hasLongBranchStubs;
1203         bool                                                                            _AppleObjc; // FSF has objc that uses different data layout
1204         bool                                                                            _overlappingSymbols;
1205         bool                                                                            _warnUnwindConversionProblems;
1206         bool                                                                            _hasDataInCodeLabels;
1207         bool                                                                            _keepDwarfUnwind;
1208         bool                                                                            _forceDwarfConversion;
1209         bool                                                                            _neverConvertDwarf;
1210         bool                                                                            _verboseOptimizationHints;
1211         bool                                                                            _armUsesZeroCostExceptions;
1212         unsigned int                                                            _stubsSectionNum;
1213         const macho_section<P>*                                         _stubsMachOSection;
1214         std::vector<const char*>                                        _dtraceProviderInfo;
1215         std::vector<FixupInAtom>                                        _allFixups;
1216 };
1217
1218
1219
1220 template <typename A>
1221 Parser<A>::Parser(const uint8_t* fileContent, uint64_t fileLength, const char* path, time_t modTime,
1222                                         ld::File::Ordinal ordinal, bool convertDUI, bool keepDwarfUnwind, bool forceDwarfConversion,
1223                                         bool neverConvertDwarf, bool verboseOptimizationHints)
1224                 : _fileContent(fileContent), _fileLength(fileLength), _path(path), _modTime(modTime),
1225                         _ordinal(ordinal), _file(NULL),
1226                         _symbols(NULL), _symbolCount(0), _indirectSymbolCount(0), _strings(NULL), _stringsSize(0),
1227                         _indirectTable(NULL), _indirectTableCount(0),
1228                         _undefinedStartIndex(0), _undefinedEndIndex(0),
1229                         _sectionsStart(NULL), _machOSectionsCount(0), _hasUUID(false),
1230                         _dataInCodeStart(NULL), _dataInCodeEnd(NULL),
1231                         _lohStart(NULL), _lohEnd(NULL),
1232                         _EHFrameSection(NULL), _compactUnwindSection(NULL), _absoluteSection(NULL),
1233                         _tentativeDefinitionCount(0), _absoluteSymbolCount(0),
1234                         _symbolsInSections(0), _hasLongBranchStubs(false),  _AppleObjc(false),
1235                         _overlappingSymbols(false), _warnUnwindConversionProblems(convertDUI), _hasDataInCodeLabels(false),
1236                         _keepDwarfUnwind(keepDwarfUnwind), _forceDwarfConversion(forceDwarfConversion),
1237                         _neverConvertDwarf(neverConvertDwarf),
1238                         _verboseOptimizationHints(verboseOptimizationHints),
1239                         _stubsSectionNum(0), _stubsMachOSection(NULL)
1240 {
1241 }
1242
1243
1244 template <>
1245 bool Parser<x86>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1246 {
1247         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1248         if ( header->magic() != MH_MAGIC )
1249                 return false;
1250         if ( header->cputype() != CPU_TYPE_I386 )
1251                 return false;
1252         if ( header->filetype() != MH_OBJECT )
1253                 return false;
1254         return true;
1255 }
1256
1257 template <>
1258 bool Parser<x86_64>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1259 {
1260         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1261         if ( header->magic() != MH_MAGIC_64 )
1262                 return false;
1263         if ( header->cputype() != CPU_TYPE_X86_64 )
1264                 return false;
1265         if ( header->filetype() != MH_OBJECT )
1266                 return false;
1267         return true;
1268 }
1269
1270 template <>
1271 bool Parser<arm>::validFile(const uint8_t* fileContent, bool subtypeMustMatch, cpu_subtype_t subtype)
1272 {
1273         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1274         if ( header->magic() != MH_MAGIC )
1275                 return false;
1276         if ( header->cputype() != CPU_TYPE_ARM )
1277                 return false;
1278         if ( header->filetype() != MH_OBJECT )
1279                 return false;
1280         if ( subtypeMustMatch ) {
1281                 if ( (cpu_subtype_t)header->cpusubtype() == subtype )
1282                         return true;
1283                 // hack until libcc_kext.a is made fat
1284                 if ( header->cpusubtype() == CPU_SUBTYPE_ARM_ALL )
1285                         return true;
1286                 return false;
1287         }
1288         return true;
1289 }
1290
1291
1292 template <>
1293 bool Parser<arm64>::validFile(const uint8_t* fileContent, bool subtypeMustMatch, cpu_subtype_t subtype)
1294 {
1295         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1296         if ( header->magic() != MH_MAGIC_64 )
1297                 return false;
1298         if ( header->cputype() != CPU_TYPE_ARM64 )
1299                 return false;
1300         if ( header->filetype() != MH_OBJECT )
1301                 return false;
1302         return true;
1303 }
1304
1305
1306 template <>
1307 const char* Parser<x86>::fileKind(const uint8_t* fileContent)
1308 {
1309         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1310         if ( header->magic() != MH_MAGIC )
1311                 return NULL;
1312         if ( header->cputype() != CPU_TYPE_I386 )
1313                 return NULL;
1314         return "i386";
1315 }
1316
1317 template <>
1318 const char* Parser<x86_64>::fileKind(const uint8_t* fileContent)
1319 {
1320         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1321         if ( header->magic() != MH_MAGIC )
1322                 return NULL;
1323         if ( header->cputype() != CPU_TYPE_X86_64 )
1324                 return NULL;
1325         return "x86_64";
1326 }
1327
1328 template <>
1329 const char* Parser<arm>::fileKind(const uint8_t* fileContent)
1330 {
1331         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1332         if ( header->magic() != MH_MAGIC )
1333                 return NULL;
1334         if ( header->cputype() != CPU_TYPE_ARM )
1335                 return NULL;
1336         for (const ArchInfo* t=archInfoArray; t->archName != NULL; ++t) {
1337                 if ( (t->cpuType == CPU_TYPE_ARM) && ((cpu_subtype_t)header->cpusubtype() == t->cpuSubType) ) {
1338                         return t->archName;
1339                 }
1340         }
1341         return "arm???";
1342 }
1343
1344 #if SUPPORT_ARCH_arm64
1345 template <>
1346 const char* Parser<arm64>::fileKind(const uint8_t* fileContent)
1347 {
1348         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1349         if ( header->magic() != MH_MAGIC )
1350                 return NULL;
1351         if ( header->cputype() != CPU_TYPE_ARM64 )
1352                 return NULL;
1353         return "arm64";
1354 }
1355 #endif
1356
1357 template <typename A>
1358 bool Parser<A>::hasObjC2Categories(const uint8_t* fileContent)
1359 {
1360         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1361         const uint32_t cmd_count = header->ncmds();
1362         const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1363         const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1364         const macho_load_command<P>* cmd = cmds;
1365         for (uint32_t i = 0; i < cmd_count; ++i) {
1366                 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1367                         const macho_segment_command<P>* segment = (macho_segment_command<P>*)cmd;
1368                         const macho_section<P>* sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1369                         for (uint32_t si=0; si < segment->nsects(); ++si) {
1370                                 const macho_section<P>* sect = &sectionsStart[si];
1371                                 if ( (sect->size() > 0)
1372                                         && (strcmp(sect->sectname(), "__objc_catlist") == 0)
1373                                         && (strcmp(sect->segname(), "__DATA") == 0) ) {
1374                                                 return true;
1375                                 }
1376                         }
1377                 }
1378                 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1379                 if ( cmd > cmdsEnd )
1380                         throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1381         }
1382         return false;
1383 }
1384
1385
1386 template <typename A>
1387 bool Parser<A>::hasObjC1Categories(const uint8_t* fileContent)
1388 {
1389         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1390         const uint32_t cmd_count = header->ncmds();
1391         const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1392         const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1393         const macho_load_command<P>* cmd = cmds;
1394         for (uint32_t i = 0; i < cmd_count; ++i) {
1395                 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1396                         const macho_segment_command<P>* segment = (macho_segment_command<P>*)cmd;
1397                         const macho_section<P>* sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1398                         for (uint32_t si=0; si < segment->nsects(); ++si) {
1399                                 const macho_section<P>* sect = &sectionsStart[si];
1400                                 if ( (sect->size() > 0)
1401                                         && (strcmp(sect->sectname(), "__category") == 0)
1402                                         && (strcmp(sect->segname(), "__OBJC") == 0) ) {
1403                                                 return true;
1404                                 }
1405                         }
1406                 }
1407                 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1408                 if ( cmd > cmdsEnd )
1409                         throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1410         }
1411         return false;
1412 }
1413
1414 template <typename A>
1415 int Parser<A>::pointerSorter(const void* l, const void* r)
1416 {
1417         // sort references by address
1418         const pint_t* left = (pint_t*)l;
1419         const pint_t* right = (pint_t*)r;
1420         return (*left - *right);
1421 }
1422
1423 template <typename A>
1424 typename A::P::uint_t Parser<A>::LabelAndCFIBreakIterator::peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr)
1425 {
1426         pint_t symbolAddr;
1427         if ( symIndex < sortedSymbolCount )
1428                 symbolAddr = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]).n_value();
1429         else
1430                 symbolAddr = endAddr;
1431         pint_t cfiAddr;
1432         if ( cfiIndex < cfiStartsCount )
1433                 cfiAddr = cfiStartsArray[cfiIndex];
1434         else
1435                 cfiAddr = endAddr;
1436         if ( (cfiAddr < symbolAddr) && (cfiAddr >= startAddr) ) {
1437                 if ( cfiAddr <  endAddr )
1438                         return cfiAddr;
1439                 else
1440                         return endAddr;
1441         }
1442         else  {
1443                 if ( symbolAddr <  endAddr )
1444                         return symbolAddr;
1445                 else
1446                         return endAddr;
1447         }
1448 }
1449
1450 //
1451 // Parses up a section into chunks based on labels and CFI information.
1452 // Each call returns the next chunk address and size, and (if the break
1453 // was becuase of a label, the symbol). Returns false when no more chunks.
1454 //
1455 template <typename A>
1456 bool Parser<A>::LabelAndCFIBreakIterator::next(Parser<A>& parser, const Section<A>& sect, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
1457                                                                                                 pint_t* addr, pint_t* size, const macho_nlist<P>** symbol)
1458 {
1459         // may not be a label on start of section, but need atom demarcation there
1460         if ( newSection ) {
1461                 newSection = false;
1462                 // advance symIndex until we get to the first label at or past the start of this section
1463                 while ( symIndex < sortedSymbolCount ) {
1464                         const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1465                         if ( ! sect.ignoreLabel(parser.nameFromSymbol(sym)) ) {
1466                                 pint_t nextSymbolAddr = sym.n_value();
1467                                 //fprintf(stderr, "sectNum=%d, nextSymbolAddr=0x%08llX, name=%s\n", sectNum, (uint64_t)nextSymbolAddr, parser.nameFromSymbol(sym));
1468                                 if ( (nextSymbolAddr > startAddr) || ((nextSymbolAddr == startAddr) && (sym.n_sect() == sectNum)) )
1469                                         break;
1470                         }
1471                         ++symIndex;
1472                 }
1473                 if ( symIndex < sortedSymbolCount ) {
1474                         const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1475                         pint_t nextSymbolAddr = sym.n_value();
1476                         // if next symbol found is not in this section
1477                         if ( sym.n_sect() != sectNum ) {
1478                                 // check for CFI break instead of symbol break
1479                                 if ( cfiIndex < cfiStartsCount ) {
1480                                         pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1481                                         if ( nextCfiAddr < endAddr ) {
1482                                                 // use cfi
1483                                                 ++cfiIndex;
1484                                                 *addr = nextCfiAddr;
1485                                                 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1486                                                 *symbol = NULL;
1487                                                 return true;
1488                                         }
1489                                 }
1490                                 *addr = startAddr;
1491                                 *size = endAddr - startAddr;
1492                                 *symbol = NULL;
1493                                 if ( startAddr == endAddr )
1494                                         return false;  // zero size section
1495                                 else
1496                                         return true;  // whole section is one atom with no label
1497                         }
1498                         // if also CFI break here, eat it
1499                         if ( cfiIndex < cfiStartsCount ) {
1500                                 if ( cfiStartsArray[cfiIndex] == nextSymbolAddr )
1501                                         ++cfiIndex;
1502                         }
1503                         if ( nextSymbolAddr == startAddr ) {
1504                                 // label at start of section, return it as chunk
1505                                 ++symIndex;
1506                                 *addr = startAddr;
1507                                 *size = peek(parser, startAddr, endAddr) - startAddr;
1508                                 *symbol = &sym;
1509                                 return true;
1510                         }
1511                         // return chunk before first symbol
1512                         *addr = startAddr;
1513                         *size = nextSymbolAddr - startAddr;
1514                         *symbol = NULL;
1515                         return true;
1516                 }
1517                 // no symbols in section, check CFI
1518                 if ( cfiIndex < cfiStartsCount ) {
1519                         pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1520                         if ( nextCfiAddr < endAddr ) {
1521                                 // use cfi
1522                                 ++cfiIndex;
1523                                 *addr = nextCfiAddr;
1524                                 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1525                                 *symbol = NULL;
1526                                 return true;
1527                         }
1528                 }
1529                 // no cfi, so whole section is one chunk
1530                 *addr = startAddr;
1531                 *size = endAddr - startAddr;
1532                 *symbol = NULL;
1533                 if ( startAddr == endAddr )
1534                         return false;  // zero size section
1535                 else
1536                         return true;  // whole section is one atom with no label
1537         }
1538
1539         while ( (symIndex < sortedSymbolCount) && (cfiIndex < cfiStartsCount) ) {
1540                 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1541                 pint_t nextSymbolAddr = sym.n_value();
1542                 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1543                 if ( nextSymbolAddr <  nextCfiAddr ) {
1544                         if ( nextSymbolAddr >= endAddr )
1545                                 return false;
1546                         ++symIndex;
1547                         if ( nextSymbolAddr < startAddr )
1548                                 continue;
1549                         *addr = nextSymbolAddr;
1550                         *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1551                         *symbol = &sym;
1552                         return true;
1553                 }
1554                 else if ( nextCfiAddr < nextSymbolAddr ) {
1555                         if ( nextCfiAddr >= endAddr )
1556                                 return false;
1557                         ++cfiIndex;
1558                         if ( nextCfiAddr < startAddr )
1559                                 continue;
1560                         *addr = nextCfiAddr;
1561                         *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1562                         *symbol = NULL;
1563                         return true;
1564                 }
1565                 else {
1566                         if ( nextCfiAddr >= endAddr )
1567                                 return false;
1568                         ++symIndex;
1569                         ++cfiIndex;
1570                         if ( nextCfiAddr < startAddr )
1571                                 continue;
1572                         *addr = nextCfiAddr;
1573                         *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1574                         *symbol = &sym;
1575                         return true;
1576                 }
1577         }
1578         while ( symIndex < sortedSymbolCount ) {
1579                 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1580                 pint_t nextSymbolAddr = sym.n_value();
1581                 // if next symbol found is not in this section, then done with iteration
1582                 if ( sym.n_sect() != sectNum )
1583                         return false;
1584                 ++symIndex;
1585                 if ( nextSymbolAddr < startAddr )
1586                         continue;
1587                 *addr = nextSymbolAddr;
1588                 *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1589                 *symbol = &sym;
1590                 return true;
1591         }
1592         while ( cfiIndex < cfiStartsCount ) {
1593                 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1594                 if ( nextCfiAddr >= endAddr )
1595                         return false;
1596                 ++cfiIndex;
1597                 if ( nextCfiAddr < startAddr )
1598                         continue;
1599                 *addr = nextCfiAddr;
1600                 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1601                 *symbol = NULL;
1602                 return true;
1603         }
1604         return false;
1605 }
1606
1607 template <>
1608 typename arm::P::uint_t Parser<arm>::realAddr(typename arm::P::uint_t addr)
1609 {
1610         return addr & (-2);
1611 }
1612
1613 template <typename A>
1614 typename A::P::uint_t Parser<A>::realAddr(typename A::P::uint_t addr)
1615 {
1616         return addr;
1617 }
1618
1619 #define STACK_ALLOC_IF_SMALL(_type, _name, _actual_count, _maxCount) \
1620         _type*  _name = NULL;   \
1621         uint32_t _name##_count = 1; \
1622         if ( _actual_count > _maxCount ) \
1623                 _name = (_type*)malloc(sizeof(_type) * _actual_count); \
1624         else \
1625                 _name##_count = _actual_count; \
1626         _type  _name##_buffer[_name##_count]; \
1627         if ( _name == NULL ) \
1628                 _name = _name##_buffer;
1629
1630
1631 template <typename A>
1632 ld::relocatable::File* Parser<A>::parse(const ParserOptions& opts)
1633 {
1634         // create file object
1635         _file = new File<A>(_path, _modTime, _fileContent, _ordinal);
1636
1637         // respond to -t option
1638         if ( opts.logAllFiles )
1639                 printf("%s\n", _path);
1640
1641         _armUsesZeroCostExceptions = opts.armUsesZeroCostExceptions;
1642
1643         // parse start of mach-o file
1644         if ( ! parseLoadCommands() )
1645                 return _file;
1646
1647         // make array of
1648         uint32_t sortedSectionIndexes[_machOSectionsCount];
1649         this->makeSortedSectionsArray(sortedSectionIndexes);
1650
1651         // make symbol table sorted by address
1652         this->prescanSymbolTable();
1653         uint32_t sortedSymbolIndexes[_symbolsInSections];
1654         this->makeSortedSymbolsArray(sortedSymbolIndexes, sortedSectionIndexes);
1655
1656         // allocate Section<A> object for each mach-o section
1657         makeSections();
1658
1659         // if it exists, do special early parsing of __compact_unwind section
1660         uint32_t countOfCUs = 0;
1661         if ( _compactUnwindSection != NULL )
1662                 countOfCUs = _compactUnwindSection->count();
1663         // stack allocate (if not too large) cuInfoBuffer
1664         STACK_ALLOC_IF_SMALL(typename CUSection<A>::Info, cuInfoArray, countOfCUs, 1024);
1665         if ( countOfCUs != 0 )
1666                 _compactUnwindSection->parse(*this, countOfCUs, cuInfoArray);
1667
1668         // create lists of address that already have compact unwind and thus don't need the dwarf parsed
1669         unsigned cuLsdaCount = 0;
1670         pint_t cuStarts[countOfCUs];
1671         for (uint32_t i=0; i < countOfCUs; ++i) {
1672                 if ( CUSection<A>::encodingMeansUseDwarf(cuInfoArray[i].compactUnwindInfo) )
1673                         cuStarts[i] = -1;
1674                 else
1675                         cuStarts[i] = cuInfoArray[i].functionStartAddress;
1676                 if ( cuInfoArray[i].lsdaAddress != 0 )
1677                         ++cuLsdaCount;
1678         }
1679
1680
1681         // if it exists, do special early parsing of __eh_frame section
1682         // stack allocate (if not too large) array of CFI_Atom_Info
1683         uint32_t countOfCFIs = 0;
1684         if ( _EHFrameSection != NULL )
1685                 countOfCFIs = _EHFrameSection->cfiCount(*this);
1686         STACK_ALLOC_IF_SMALL(typename CFISection<A>::CFI_Atom_Info, cfiArray, countOfCFIs, 1024);
1687
1688         // stack allocate (if not too large) a copy of __eh_frame to apply relocations to
1689         uint32_t sectSize = 4;
1690         if ( (countOfCFIs != 0) && _EHFrameSection->needsRelocating() )
1691                 sectSize = _EHFrameSection->machoSection()->size()+4;
1692         STACK_ALLOC_IF_SMALL(uint8_t, ehBuffer, sectSize, 50*1024);
1693         uint32_t cfiStartsCount = 0;
1694         if ( countOfCFIs != 0 ) {
1695                 _EHFrameSection->cfiParse(*this, ehBuffer, cfiArray, countOfCFIs, cuStarts, countOfCUs);
1696                 // count functions and lsdas
1697                 for(uint32_t i=0; i < countOfCFIs; ++i) {
1698                         if ( cfiArray[i].isCIE )
1699                                 continue;
1700                         //fprintf(stderr, "cfiArray[i].func = 0x%08llX, cfiArray[i].lsda = 0x%08llX, encoding=0x%08X\n",
1701                         //                      (uint64_t)cfiArray[i].u.fdeInfo.function.targetAddress,
1702                         //                      (uint64_t)cfiArray[i].u.fdeInfo.lsda.targetAddress,
1703                         //                      cfiArray[i].u.fdeInfo.compactUnwindInfo);
1704                         if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1705                                 ++cfiStartsCount;
1706                         if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1707                                 ++cfiStartsCount;
1708                 }
1709         }
1710         CFI_CU_InfoArrays cfis(cfiArray, countOfCFIs, cuInfoArray, countOfCUs);
1711
1712         // create sorted array of function starts and lsda starts
1713         pint_t cfiStartsArray[cfiStartsCount+cuLsdaCount];
1714         uint32_t countOfFDEs = 0;
1715         uint32_t cfiStartsArrayCount = 0;
1716         if ( countOfCFIs != 0 ) {
1717                 for(uint32_t i=0; i < countOfCFIs; ++i) {
1718                         if ( cfiArray[i].isCIE )
1719                                 continue;
1720                         if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1721                                 cfiStartsArray[cfiStartsArrayCount++] = realAddr(cfiArray[i].u.fdeInfo.function.targetAddress);
1722                         if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1723                                 cfiStartsArray[cfiStartsArrayCount++] = cfiArray[i].u.fdeInfo.lsda.targetAddress;
1724                         ++countOfFDEs;
1725                 }
1726         }
1727         if ( cuLsdaCount != 0 ) {
1728                 // merge in an lsda info from compact unwind
1729                 for (uint32_t i=0; i < countOfCUs; ++i) {
1730                         if ( cuInfoArray[i].lsdaAddress == 0 )
1731                                 continue;
1732                         // append to cfiStartsArray if not already in that list
1733                         bool found = false;
1734                         for(uint32_t j=0; j < cfiStartsArrayCount; ++j) {
1735                                 if ( cfiStartsArray[j] == cuInfoArray[i].lsdaAddress )
1736                                         found = true;
1737                         }
1738                         if ( ! found ) {
1739                                 cfiStartsArray[cfiStartsArrayCount++] = cuInfoArray[i].lsdaAddress;
1740                         }
1741                 }
1742         }
1743         if ( cfiStartsArrayCount != 0 ) {
1744                 ::qsort(cfiStartsArray, cfiStartsArrayCount, sizeof(pint_t), pointerSorter);
1745         #ifndef NDEBUG
1746                 // scan for FDEs claming the same function
1747                 for(uint32_t i=1; i < cfiStartsArrayCount; ++i) {
1748                         assert( cfiStartsArray[i] != cfiStartsArray[i-1] );
1749                 }
1750         #endif
1751         }
1752
1753         Section<A>** sections = _file->_sectionsArray;
1754         uint32_t        sectionsCount = _file->_sectionsArrayCount;
1755
1756         // figure out how many atoms will be allocated and allocate
1757         LabelAndCFIBreakIterator breakIterator(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1758                                                                                         cfiStartsArrayCount, _overlappingSymbols);
1759         uint32_t computedAtomCount = 0;
1760         for (uint32_t i=0; i < sectionsCount; ++i ) {
1761                 breakIterator.beginSection();
1762                 uint32_t count = sections[i]->computeAtomCount(*this, breakIterator, cfis);
1763                 //const macho_section<P>* sect = sections[i]->machoSection();
1764                 //fprintf(stderr, "computed count=%u for section %s size=%llu\n", count, sect->sectname(), (sect != NULL) ? sect->size() : 0);
1765                 computedAtomCount += count;
1766         }
1767         //fprintf(stderr, "allocating %d atoms * sizeof(Atom<A>)=%ld, sizeof(ld::Atom)=%ld\n", computedAtomCount, sizeof(Atom<A>), sizeof(ld::Atom));
1768         _file->_atomsArray = new uint8_t[computedAtomCount*sizeof(Atom<A>)];
1769         _file->_atomsArrayCount = 0;
1770
1771         // have each section append atoms to _atomsArray
1772         LabelAndCFIBreakIterator breakIterator2(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1773                                                                                                 cfiStartsArrayCount, _overlappingSymbols);
1774         for (uint32_t i=0; i < sectionsCount; ++i ) {
1775                 uint8_t* atoms = _file->_atomsArray + _file->_atomsArrayCount*sizeof(Atom<A>);
1776                 breakIterator2.beginSection();
1777                 uint32_t count = sections[i]->appendAtoms(*this, atoms, breakIterator2, cfis);
1778                 //fprintf(stderr, "append count=%u for section %s/%s\n", count, sections[i]->machoSection()->segname(), sections[i]->machoSection()->sectname());
1779                 _file->_atomsArrayCount += count;
1780         }
1781         assert( _file->_atomsArrayCount == computedAtomCount && "more atoms allocated than expected");
1782
1783
1784         // have each section add all fix-ups for its atoms
1785         _allFixups.reserve(computedAtomCount*5);
1786         for (uint32_t i=0; i < sectionsCount; ++i )
1787                 sections[i]->makeFixups(*this, cfis);
1788
1789         // assign fixups start offset for each atom
1790         uint8_t* p = _file->_atomsArray;
1791         uint32_t fixupOffset = 0;
1792         for(int i=_file->_atomsArrayCount; i > 0; --i) {
1793                 Atom<A>* atom = (Atom<A>*)p;
1794                 atom->_fixupsStartIndex = fixupOffset;
1795                 fixupOffset += atom->_fixupsCount;
1796                 atom->_fixupsCount = 0;
1797                 p += sizeof(Atom<A>);
1798         }
1799         assert(fixupOffset == _allFixups.size());
1800         _file->_fixups.reserve(fixupOffset);
1801
1802         // copy each fixup for each atom
1803         for(typename std::vector<FixupInAtom>::iterator it=_allFixups.begin(); it != _allFixups.end(); ++it) {
1804                 uint32_t slot = it->atom->_fixupsStartIndex + it->atom->_fixupsCount;
1805                 _file->_fixups[slot] = it->fixup;
1806                 it->atom->_fixupsCount++;
1807         }
1808
1809         // done with temp vector
1810         _allFixups.clear();
1811
1812         // add unwind info
1813         _file->_unwindInfos.reserve(countOfFDEs+countOfCUs);
1814         for(uint32_t i=0; i < countOfCFIs; ++i) {
1815                 if ( cfiArray[i].isCIE )
1816                         continue;
1817                 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS ) {
1818                         ld::Atom::UnwindInfo info;
1819                         info.startOffset = 0;
1820                         info.unwindInfo = cfiArray[i].u.fdeInfo.compactUnwindInfo;
1821                         _file->_unwindInfos.push_back(info);
1822                         Atom<A>* func = findAtomByAddress(cfiArray[i].u.fdeInfo.function.targetAddress);
1823                         func->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1824                         //fprintf(stderr, "cu from dwarf =0x%08X, atom=%s\n", info.unwindInfo, func->name());
1825                 }
1826         }
1827         // apply compact infos in __LD,__compact_unwind section to each function
1828         // if function also has dwarf unwind, CU will override it
1829         Atom<A>* lastFunc = NULL;
1830         uint32_t lastEnd = 0;
1831         for(uint32_t i=0; i < countOfCUs; ++i) {
1832                 typename CUSection<A>::Info* info = &cuInfoArray[i];
1833                 assert(info->function != NULL);
1834                 ld::Atom::UnwindInfo ui;
1835                 ui.startOffset = info->functionStartAddress - info->function->objectAddress();
1836                 ui.unwindInfo = info->compactUnwindInfo;
1837                 _file->_unwindInfos.push_back(ui);
1838                 // don't override with converted cu with "use dwarf" cu, if forcing dwarf conversion
1839                 if ( !_forceDwarfConversion || !CUSection<A>::encodingMeansUseDwarf(info->compactUnwindInfo) ) {
1840                         //fprintf(stderr, "cu=0x%08X, atom=%s\n", ui.unwindInfo, info->function->name());
1841                         // if previous is for same function, extend range
1842                         if ( info->function == lastFunc ) {
1843                                 if ( lastEnd != ui.startOffset ) {
1844                                         if ( lastEnd < ui.startOffset )
1845                                                 warning("__LD,__compact_unwind entries for %s have a gap at offset 0x%0X", info->function->name(), lastEnd);
1846                                         else
1847                                                 warning("__LD,__compact_unwind entries for %s overlap at offset 0x%0X", info->function->name(), lastEnd);
1848                                 }
1849                                 lastFunc->extendUnwindInfoRange();
1850                         }
1851                         else
1852                                 info->function->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1853                         lastFunc = info->function;
1854                         lastEnd = ui.startOffset + info->rangeLength;
1855                 }
1856         }
1857
1858         // process indirect symbols which become AliasAtoms
1859         _file->_aliasAtomsArray = NULL;
1860         _file->_aliasAtomsArrayCount = 0;
1861         if ( _indirectSymbolCount != 0 ) {
1862                 _file->_aliasAtomsArrayCount = _indirectSymbolCount;
1863                 _file->_aliasAtomsArray = new uint8_t[_file->_aliasAtomsArrayCount*sizeof(AliasAtom)];
1864                 this->appendAliasAtoms(_file->_aliasAtomsArray);
1865         }
1866
1867
1868         // parse dwarf debug info to get line info
1869         this->parseDebugInfo();
1870
1871         return _file;
1872 }
1873
1874
1875 template <> uint8_t Parser<x86>::loadCommandSizeMask()          { return 0x03; }
1876 template <> uint8_t Parser<x86_64>::loadCommandSizeMask()       { return 0x07; }
1877 template <> uint8_t Parser<arm>::loadCommandSizeMask()          { return 0x03; }
1878 template <> uint8_t Parser<arm64>::loadCommandSizeMask()        { return 0x07; }
1879
1880 template <typename A>
1881 bool Parser<A>::parseLoadCommands()
1882 {
1883         const macho_header<P>* header = (const macho_header<P>*)_fileContent;
1884
1885         // set File attributes
1886         _file->_canScatterAtoms = (header->flags() & MH_SUBSECTIONS_VIA_SYMBOLS);
1887         _file->_cpuSubType = header->cpusubtype();
1888
1889         const macho_segment_command<P>* segment = NULL;
1890         const uint8_t* const endOfFile = _fileContent + _fileLength;
1891         const uint32_t cmd_count = header->ncmds();
1892         // <rdar://problem/5394172> an empty .o file with zero load commands will crash linker
1893         if ( cmd_count == 0 )
1894                 return false;
1895         const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1896         const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1897         const macho_load_command<P>* cmd = cmds;
1898         for (uint32_t i = 0; i < cmd_count; ++i) {
1899                 uint32_t size = cmd->cmdsize();
1900                 if ( (size & this->loadCommandSizeMask()) != 0 )
1901                         throwf("load command #%d has a unaligned size", i);
1902                 const uint8_t* endOfCmd = ((uint8_t*)cmd)+cmd->cmdsize();
1903                 if ( endOfCmd > (uint8_t*)cmdsEnd )
1904                         throwf("load command #%d extends beyond the end of the load commands", i);
1905                 if ( endOfCmd > endOfFile )
1906                         throwf("load command #%d extends beyond the end of the file", i);
1907                 switch (cmd->cmd()) {
1908                     case LC_SYMTAB:
1909                                 {
1910                                         const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
1911                                         _symbolCount = symtab->nsyms();
1912                                         _symbols = (const macho_nlist<P>*)(_fileContent + symtab->symoff());
1913                                         _strings = (char*)_fileContent + symtab->stroff();
1914                                         _stringsSize = symtab->strsize();
1915                                         if ( (symtab->symoff() + _symbolCount*sizeof(macho_nlist<P>)) > _fileLength )
1916                                                 throw "mach-o symbol table extends beyond end of file";
1917                                         if ( (_strings + _stringsSize) > (char*)endOfFile )
1918                                                 throw "mach-o string pool extends beyond end of file";
1919                                         if ( _indirectTable == NULL ) {
1920                                                 if ( _undefinedEndIndex == 0 ) {
1921                                                         _undefinedStartIndex = 0;
1922                                                         _undefinedEndIndex = symtab->nsyms();
1923                                                 }
1924                                         }
1925                                 }
1926                                 break;
1927                         case LC_DYSYMTAB:
1928                                 {
1929                                         const macho_dysymtab_command<P>* dsymtab = (macho_dysymtab_command<P>*)cmd;
1930                                         _indirectTable = (uint32_t*)(_fileContent + dsymtab->indirectsymoff());
1931                                         _indirectTableCount = dsymtab->nindirectsyms();
1932                                         if ( &_indirectTable[_indirectTableCount] > (uint32_t*)endOfFile )
1933                                                 throw "indirect symbol table extends beyond end of file";
1934                                         _undefinedStartIndex = dsymtab->iundefsym();
1935                                         _undefinedEndIndex = _undefinedStartIndex + dsymtab->nundefsym();
1936                                 }
1937                                 break;
1938                     case LC_UUID:
1939                                 _hasUUID = true;
1940                                 break;
1941                         case LC_DATA_IN_CODE:
1942                                 {
1943                                         const macho_linkedit_data_command<P>* dc = (macho_linkedit_data_command<P>*)cmd;
1944                                         _dataInCodeStart = (macho_data_in_code_entry<P>*)(_fileContent + dc->dataoff());
1945                                         _dataInCodeEnd = (macho_data_in_code_entry<P>*)(_fileContent + dc->dataoff() + dc->datasize());
1946                                         if ( _dataInCodeEnd > (macho_data_in_code_entry<P>*)endOfFile )
1947                                                 throw "LC_DATA_IN_CODE table extends beyond end of file";
1948                                 }
1949                                 break;
1950                         case LC_LINKER_OPTION:
1951                                 {
1952                                         const macho_linker_option_command<P>* loc = (macho_linker_option_command<P>*)cmd;
1953                                         const char* buffer = loc->buffer();
1954                                         _file->_linkerOptions.resize(_file->_linkerOptions.size() + 1);
1955                                         std::vector<const char*>& vec = _file->_linkerOptions.back();
1956                                         for (uint32_t j=0; j < loc->count(); ++j) {
1957                                                 vec.push_back(buffer);
1958                                                 buffer += strlen(buffer) + 1;
1959                                         }
1960                                         if ( buffer > ((char*)cmd + loc->cmdsize()) )
1961                                                 throw "malformed LC_LINKER_OPTION";
1962                                 }
1963                                 break;
1964                         case LC_LINKER_OPTIMIZATION_HINTS:
1965                                 {
1966                                         const macho_linkedit_data_command<P>* loh = (macho_linkedit_data_command<P>*)cmd;
1967                                         _lohStart = _fileContent + loh->dataoff();
1968                                         _lohEnd = _fileContent + loh->dataoff() + loh->datasize();
1969                                         if ( _lohEnd > endOfFile )
1970                                                 throw "LC_LINKER_OPTIMIZATION_HINTS table extends beyond end of file";
1971                                 }
1972                                 break;
1973                         default:
1974                                 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1975                                         if ( segment != NULL )
1976                                                 throw "more than one LC_SEGMENT found in object file";
1977                                         segment = (macho_segment_command<P>*)cmd;
1978                                 }
1979                                 break;
1980                 }
1981                 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1982                 if ( cmd > cmdsEnd )
1983                         throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1984         }
1985
1986         // record range of sections
1987         if ( segment == NULL )
1988                 throw "missing LC_SEGMENT";
1989         _sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1990         _machOSectionsCount = segment->nsects();
1991
1992         return true;
1993 }
1994
1995
1996 template <typename A>
1997 void Parser<A>::prescanSymbolTable()
1998 {
1999         _tentativeDefinitionCount = 0;
2000         _absoluteSymbolCount = 0;
2001         _symbolsInSections = 0;
2002         _hasDataInCodeLabels = false;
2003         for (uint32_t i=0; i < this->_symbolCount; ++i) {
2004                 const macho_nlist<P>& sym =     symbolFromIndex(i);
2005                 // ignore stabs
2006                 if ( (sym.n_type() & N_STAB) != 0 )
2007                         continue;
2008
2009                 // look at undefines
2010                 const char* symbolName = this->nameFromSymbol(sym);
2011                 if ( (sym.n_type() & N_TYPE) == N_UNDF ) {
2012                         if ( sym.n_value() != 0 ) {
2013                                 // count tentative definitions
2014                                 ++_tentativeDefinitionCount;
2015                         }
2016                         else if ( strncmp(symbolName, "___dtrace_", 10) == 0 ) {
2017                                 // any undefined starting with __dtrace_*$ that is not ___dtrace_probe$* or ___dtrace_isenabled$*
2018                                 // is extra provider info
2019                                 if ( (strncmp(&symbolName[10], "probe$", 6) != 0) && (strncmp(&symbolName[10], "isenabled$", 10) != 0) ) {
2020                                         _dtraceProviderInfo.push_back(symbolName);
2021                                 }
2022                         }
2023                         continue;
2024                 }
2025                 else if ( ((sym.n_type() & N_TYPE) == N_INDR) && ((sym.n_type() & N_EXT) != 0) ) {
2026                         _indirectSymbolCount++;
2027                         continue;
2028                 }
2029
2030                 // count absolute symbols
2031                 if ( (sym.n_type() & N_TYPE) == N_ABS ) {
2032                         const char* absName = this->nameFromSymbol(sym);
2033                         // ignore .objc_class_name_* symbols
2034                         if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
2035                                 _AppleObjc = true;
2036                                 continue;
2037                         }
2038                         // ignore .objc_class_name_* symbols
2039                         if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
2040                                 continue;
2041                         // ignore empty *.eh symbols
2042                         if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
2043                                 continue;
2044                         ++_absoluteSymbolCount;
2045                 }
2046
2047                 // only look at definitions
2048                 if ( (sym.n_type() & N_TYPE) != N_SECT )
2049                         continue;
2050
2051                 // 'L' labels do not denote atom breaks
2052                 if ( symbolName[0] == 'L' ) {
2053                         // <rdar://problem/9218847> Formalize data in code with L$start$ labels
2054                         if ( strncmp(symbolName, "L$start$", 8) == 0 )
2055                                 _hasDataInCodeLabels = true;
2056                         continue;
2057                 }
2058                 // how many def syms in each section
2059                 if ( sym.n_sect() > _machOSectionsCount )
2060                         throw "bad n_sect in symbol table";
2061
2062                 _symbolsInSections++;
2063         }
2064 }
2065
2066 template <typename A>
2067 void Parser<A>::appendAliasAtoms(uint8_t* p)
2068 {
2069         for (uint32_t i=0; i < this->_symbolCount; ++i) {
2070                 const macho_nlist<P>& sym =     symbolFromIndex(i);
2071                 // ignore stabs
2072                 if ( (sym.n_type() & N_STAB) != 0 )
2073                         continue;
2074
2075                 // only look at N_INDR symbols
2076                 if ( (sym.n_type() & N_TYPE) != N_INDR )
2077                         continue;
2078
2079                 // skip non-external aliases
2080                 if ( (sym.n_type() & N_EXT) == 0 )
2081                         continue;
2082
2083                 const char* symbolName = this->nameFromSymbol(sym);
2084                 const char* aliasOfName = &_strings[sym.n_value()];
2085                 bool isHiddenVisibility = (sym.n_type() & N_PEXT);
2086                 AliasAtom* allocatedSpace = (AliasAtom*)p;
2087                 new (allocatedSpace) AliasAtom(symbolName, isHiddenVisibility, _file, aliasOfName);
2088                 p += sizeof(AliasAtom);
2089         }
2090 }
2091
2092
2093
2094 template <typename A>
2095 int Parser<A>::sectionIndexSorter(void* extra, const void* l, const void* r)
2096 {
2097         Parser<A>* parser = (Parser<A>*)extra;
2098         const uint32_t* left = (uint32_t*)l;
2099         const uint32_t* right = (uint32_t*)r;
2100         const macho_section<P>* leftSect =      parser->machOSectionFromSectionIndex(*left);
2101         const macho_section<P>* rightSect = parser->machOSectionFromSectionIndex(*right);
2102
2103         // can't just return difference because 64-bit diff does not fit in 32-bit return type
2104         int64_t result = leftSect->addr() - rightSect->addr();
2105         if ( result == 0 ) {
2106                 // two sections with same start address
2107                 // one with zero size goes first
2108                 bool leftEmpty = ( leftSect->size() == 0 );
2109                 bool rightEmpty = ( rightSect->size() == 0 );
2110                 if ( leftEmpty != rightEmpty ) {
2111                         return ( rightEmpty ? 1 : -1 );
2112                 }
2113                 if ( !leftEmpty && !rightEmpty )
2114                         throwf("overlapping sections");
2115                 // both empty, so chose file order
2116                 return ( rightSect - leftSect );
2117         }
2118         else if ( result < 0 )
2119                 return -1;
2120         else
2121                 return 1;
2122 }
2123
2124 template <typename A>
2125 void Parser<A>::makeSortedSectionsArray(uint32_t array[])
2126 {
2127         const bool log = false;
2128
2129         if ( log ) {
2130                 fprintf(stderr, "unsorted sections:\n");
2131                 for(unsigned int i=0; i < _machOSectionsCount; ++i )
2132                         fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[i].addr(), _sectionsStart[i].segname(), _sectionsStart[i].sectname());
2133         }
2134
2135         // sort by symbol table address
2136         for (uint32_t i=0; i < _machOSectionsCount; ++i)
2137                 array[i] = i;
2138         ::qsort_r(array, _machOSectionsCount, sizeof(uint32_t), this, &sectionIndexSorter);
2139
2140         if ( log ) {
2141                 fprintf(stderr, "sorted sections:\n");
2142                 for(unsigned int i=0; i < _machOSectionsCount; ++i )
2143                         fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[array[i]].addr(), _sectionsStart[array[i]].segname(), _sectionsStart[array[i]].sectname());
2144         }
2145 }
2146
2147
2148
2149 template <typename A>
2150 int Parser<A>::symbolIndexSorter(void* extra, const void* l, const void* r)
2151 {
2152         ParserAndSectionsArray* extraInfo = (ParserAndSectionsArray*)extra;
2153         Parser<A>* parser = extraInfo->parser;
2154         const uint32_t* sortedSectionsArray = extraInfo->sortedSectionsArray;
2155         const uint32_t* left = (uint32_t*)l;
2156         const uint32_t* right = (uint32_t*)r;
2157         const macho_nlist<P>& leftSym = parser->symbolFromIndex(*left);
2158         const macho_nlist<P>& rightSym = parser->symbolFromIndex(*right);
2159         // can't just return difference because 64-bit diff does not fit in 32-bit return type
2160         int64_t result = leftSym.n_value() - rightSym.n_value();
2161         if ( result == 0 ) {
2162                 // two symbols with same address
2163                 // if in different sections, sort earlier section first
2164                 if ( leftSym.n_sect() != rightSym.n_sect() ) {
2165                         for (uint32_t i=0; i < parser->machOSectionCount(); ++i) {
2166                                 if ( sortedSectionsArray[i]+1 == leftSym.n_sect() )
2167                                         return -1;
2168                                 if ( sortedSectionsArray[i]+1 == rightSym.n_sect() )
2169                                         return 1;
2170                         }
2171                 }
2172                 // two symbols in same section, means one is an alias
2173                 // if one is ltmp*, make it an alias (sort first)
2174                 const char* leftName  = parser->nameFromSymbol(leftSym);
2175                 const char* rightName = parser->nameFromSymbol(rightSym);
2176                 bool leftIsTmp  = strncmp(leftName,  "ltmp", 4);
2177                 bool rightIsTmp = strncmp(rightName, "ltmp", 4);
2178                 if ( leftIsTmp != rightIsTmp ) {
2179                         return (rightIsTmp ? -1 : 1);
2180                 }
2181
2182                 // if only one is global, make the other an alias (sort first)
2183                 if ( (leftSym.n_type() & N_EXT) != (rightSym.n_type() & N_EXT) ) {
2184                         if ( (rightSym.n_type() & N_EXT) != 0 )
2185                                 return -1;
2186                         else
2187                                 return 1;
2188                 }
2189                 // if both are global, sort alphabetically. earlier one will be the alias
2190                 return ( strcmp(rightName, leftName) );
2191         }
2192         else if ( result < 0 )
2193                 return -1;
2194         else
2195                 return 1;
2196 }
2197
2198
2199 template <typename A>
2200 void Parser<A>::makeSortedSymbolsArray(uint32_t array[], const uint32_t sectionArray[])
2201 {
2202         const bool log = false;
2203
2204         uint32_t* p = array;
2205         for (uint32_t i=0; i < this->_symbolCount; ++i) {
2206                 const macho_nlist<P>& sym =     symbolFromIndex(i);
2207                 // ignore stabs
2208                 if ( (sym.n_type() & N_STAB) != 0 )
2209                         continue;
2210
2211                 // only look at definitions
2212                 if ( (sym.n_type() & N_TYPE) != N_SECT )
2213                         continue;
2214
2215                 // 'L' labels do not denote atom breaks
2216                 const char* symbolName = this->nameFromSymbol(sym);
2217                 if ( symbolName[0] == 'L' )
2218                         continue;
2219
2220                 // how many def syms in each section
2221                 if ( sym.n_sect() > _machOSectionsCount )
2222                         throw "bad n_sect in symbol table";
2223
2224                 // append to array
2225                 *p++ = i;
2226         }
2227         assert(p == &array[_symbolsInSections] && "second pass over symbol table yield a different number of symbols");
2228
2229         // sort by symbol table address
2230         ParserAndSectionsArray extra = { this, sectionArray };
2231         ::qsort_r(array, _symbolsInSections, sizeof(uint32_t), &extra, &symbolIndexSorter);
2232
2233
2234         // look for two symbols at same address
2235         _overlappingSymbols = false;
2236         for (unsigned int i=1; i < _symbolsInSections; ++i) {
2237                 if ( symbolFromIndex(array[i-1]).n_value() == symbolFromIndex(array[i]).n_value() ) {
2238                         //fprintf(stderr, "overlapping symbols at 0x%08llX\n", symbolFromIndex(array[i-1]).n_value());
2239                         _overlappingSymbols = true;
2240                         break;
2241                 }
2242         }
2243
2244         if ( log ) {
2245                 fprintf(stderr, "sorted symbols:\n");
2246                 for(unsigned int i=0; i < _symbolsInSections; ++i )
2247                         fprintf(stderr, "0x%09llX symIndex=%d sectNum=%2d, %s\n", symbolFromIndex(array[i]).n_value(), array[i], symbolFromIndex(array[i]).n_sect(), nameFromSymbol(symbolFromIndex(array[i])) );
2248         }
2249 }
2250
2251 template <typename A>
2252 void Parser<A>::makeSections()
2253 {
2254         // classify each section by type
2255         // compute how many Section objects will be needed and total size for all
2256         unsigned int totalSectionsSize = 0;
2257         uint8_t machOSectsStorage[sizeof(MachOSectionAndSectionClass<P>)*(_machOSectionsCount+2)]; // also room for tentative-defs and absolute symbols
2258         // allocate raw storage for all section objects on stack
2259         MachOSectionAndSectionClass<P>* machOSects = (MachOSectionAndSectionClass<P>*)machOSectsStorage;
2260         unsigned int count = 0;
2261         for (uint32_t i=0; i < _machOSectionsCount; ++i) {
2262                 const macho_section<P>* sect = &_sectionsStart[i];
2263                 if ( (sect->flags() & S_ATTR_DEBUG) != 0 ) {
2264                         if ( strcmp(sect->segname(), "__DWARF") == 0 ) {
2265                                 // note that .o file has dwarf
2266                                 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoDwarf;
2267                                 // save off iteresting dwarf sections
2268                                 if ( strcmp(sect->sectname(), "__debug_info") == 0 )
2269                                         _file->_dwarfDebugInfoSect = sect;
2270                                 else if ( strcmp(sect->sectname(), "__debug_abbrev") == 0 )
2271                                         _file->_dwarfDebugAbbrevSect = sect;
2272                                 else if ( strcmp(sect->sectname(), "__debug_line") == 0 )
2273                                         _file->_dwarfDebugLineSect = sect;
2274                                 else if ( strcmp(sect->sectname(), "__debug_str") == 0 )
2275                                         _file->_dwarfDebugStringSect = sect;
2276                                 // linker does not propagate dwarf sections to output file
2277                                 continue;
2278                         }
2279                         else if ( strcmp(sect->segname(), "__LD") == 0 ) {
2280                                 if ( strncmp(sect->sectname(), "__compact_unwind", 16) == 0 ) {
2281                                         machOSects[count].sect = sect;
2282                                         totalSectionsSize += sizeof(CUSection<A>);
2283                                         machOSects[count++].type = sectionTypeCompactUnwind;
2284                                         continue;
2285                                 }
2286                         }
2287                 }
2288                 // ignore empty __OBJC sections
2289                 if ( (sect->size() == 0) && (strcmp(sect->segname(), "__OBJC") == 0) )
2290                         continue;
2291                 // objc image info section is really attributes and not content
2292                 if ( ((strcmp(sect->sectname(), "__image_info") == 0) && (strcmp(sect->segname(), "__OBJC") == 0))
2293                         || ((strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0) && (strcmp(sect->segname(), "__DATA") == 0)) ) {
2294                         //      struct objc_image_info  {
2295                         //              uint32_t        version;        // initially 0
2296                         //              uint32_t        flags;
2297                         //      };
2298                         // #define OBJC_IMAGE_SUPPORTS_GC   2
2299                         // #define OBJC_IMAGE_GC_ONLY       4
2300                         // #define OBJC_IMAGE_IS_SIMULATED  32
2301                         //
2302                         const uint32_t* contents = (uint32_t*)(_file->fileContent()+sect->offset());
2303                         if ( (sect->size() >= 8) && (contents[0] == 0) ) {
2304                                 uint32_t flags = E::get32(contents[1]);
2305                                 if ( (flags & 4) == 4 )
2306                                         _file->_objConstraint = ld::File::objcConstraintGC;
2307                                 else if ( (flags & 2) == 2 )
2308                                         _file->_objConstraint = ld::File::objcConstraintRetainReleaseOrGC;
2309                                 else if ( (flags & 32) == 32 )
2310                                         _file->_objConstraint = ld::File::objcConstraintRetainReleaseForSimulator;
2311                                 else
2312                                         _file->_objConstraint = ld::File::objcConstraintRetainRelease;
2313                                 _file->_swiftVersion = ((flags >> 8) & 0xFF);
2314                                 if ( sect->size() > 8 ) {
2315                                         warning("section %s/%s has unexpectedly large size %llu in %s",
2316                                                         sect->segname(), Section<A>::makeSectionName(sect), sect->size(), _file->path());
2317                                 }
2318                         }
2319                         else {
2320                                 warning("can't parse %s/%s section in %s", sect->segname(), Section<A>::makeSectionName(sect), _file->path());
2321                         }
2322                         continue;
2323                 }
2324                 machOSects[count].sect = sect;
2325                 switch ( sect->flags() & SECTION_TYPE ) {
2326                         case S_SYMBOL_STUBS:
2327                                 if ( _stubsSectionNum == 0 ) {
2328                                         _stubsSectionNum = i+1;
2329                                         _stubsMachOSection = sect;
2330                                 }
2331                                 else
2332                                         assert(1 && "multiple S_SYMBOL_STUBS sections");
2333                         case S_LAZY_SYMBOL_POINTERS:
2334                                 break;
2335                         case S_4BYTE_LITERALS:
2336                                 totalSectionsSize += sizeof(Literal4Section<A>);
2337                                 machOSects[count++].type = sectionTypeLiteral4;
2338                                 break;
2339                         case S_8BYTE_LITERALS:
2340                                 totalSectionsSize += sizeof(Literal8Section<A>);
2341                                 machOSects[count++].type = sectionTypeLiteral8;
2342                                 break;
2343                         case S_16BYTE_LITERALS:
2344                                 totalSectionsSize += sizeof(Literal16Section<A>);
2345                                 machOSects[count++].type = sectionTypeLiteral16;
2346                                 break;
2347                         case S_NON_LAZY_SYMBOL_POINTERS:
2348                                 totalSectionsSize += sizeof(NonLazyPointerSection<A>);
2349                                 machOSects[count++].type = sectionTypeNonLazy;
2350                                 break;
2351                         case S_LITERAL_POINTERS:
2352                                 if ( (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__cls_refs") == 0) ) {
2353                                         totalSectionsSize += sizeof(Objc1ClassReferences<A>);
2354                                         machOSects[count++].type = sectionTypeObjC1ClassRefs;
2355                                 }
2356                                 else {
2357                                         totalSectionsSize += sizeof(PointerToCStringSection<A>);
2358                                         machOSects[count++].type = sectionTypeCStringPointer;
2359                                 }
2360                                 break;
2361                         case S_CSTRING_LITERALS:
2362                                 totalSectionsSize += sizeof(CStringSection<A>);
2363                                 machOSects[count++].type = sectionTypeCString;
2364                                 break;
2365                         case S_MOD_INIT_FUNC_POINTERS:
2366                         case S_MOD_TERM_FUNC_POINTERS:
2367                         case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
2368                         case S_INTERPOSING:
2369                         case S_ZEROFILL:
2370                         case S_REGULAR:
2371                         case S_COALESCED:
2372                         case S_THREAD_LOCAL_REGULAR:
2373                         case S_THREAD_LOCAL_ZEROFILL:
2374                                 if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__eh_frame") == 0) ) {
2375                                         totalSectionsSize += sizeof(CFISection<A>);
2376                                         machOSects[count++].type = sectionTypeCFI;
2377                                 }
2378                                 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__cfstring") == 0) ) {
2379                                         totalSectionsSize += sizeof(CFStringSection<A>);
2380                                         machOSects[count++].type = sectionTypeCFString;
2381                                 }
2382                                 else if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__ustring") == 0) ) {
2383                                         totalSectionsSize += sizeof(UTF16StringSection<A>);
2384                                         machOSects[count++].type = sectionTypeUTF16Strings;
2385                                 }
2386                                 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strncmp(sect->sectname(), "__objc_classrefs", 16) == 0) ) {
2387                                         totalSectionsSize += sizeof(ObjC2ClassRefsSection<A>);
2388                                         machOSects[count++].type = sectionTypeObjC2ClassRefs;
2389                                 }
2390                                 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__objc_catlist") == 0) ) {
2391                                         totalSectionsSize += sizeof(ObjC2CategoryListSection<A>);
2392                                         machOSects[count++].type = typeObjC2CategoryList;
2393                                 }
2394                                 else if ( _AppleObjc && (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__class") == 0) ) {
2395                                         totalSectionsSize += sizeof(ObjC1ClassSection<A>);
2396                                         machOSects[count++].type = sectionTypeObjC1Classes;
2397                                 }
2398                                 else {
2399                                         totalSectionsSize += sizeof(SymboledSection<A>);
2400                                         machOSects[count++].type = sectionTypeSymboled;
2401                                 }
2402                                 break;
2403                         case S_THREAD_LOCAL_VARIABLES:
2404                                 totalSectionsSize += sizeof(TLVDefsSection<A>);
2405                                 machOSects[count++].type = sectionTypeTLVDefs;
2406                                 break;
2407                         case S_THREAD_LOCAL_VARIABLE_POINTERS:
2408                         default:
2409                                 throwf("unknown section type %d", sect->flags() & SECTION_TYPE);
2410                 }
2411         }
2412
2413         // sort by address (mach-o object files don't aways have sections sorted)
2414         ::qsort(machOSects, count, sizeof(MachOSectionAndSectionClass<P>), MachOSectionAndSectionClass<P>::sorter);
2415
2416         // we will synthesize a dummy Section<A> object for tentative definitions
2417         if ( _tentativeDefinitionCount > 0 ) {
2418                 totalSectionsSize += sizeof(TentativeDefinitionSection<A>);
2419                 machOSects[count++].type = sectionTypeTentativeDefinitions;
2420         }
2421
2422         // we will synthesize a dummy Section<A> object for Absolute symbols
2423         if ( _absoluteSymbolCount > 0 ) {
2424                 totalSectionsSize += sizeof(AbsoluteSymbolSection<A>);
2425                 machOSects[count++].type = sectionTypeAbsoluteSymbols;
2426         }
2427
2428         // allocate one block for all Section objects as well as pointers to each
2429         uint8_t* space = new uint8_t[totalSectionsSize+count*sizeof(Section<A>*)];
2430         _file->_sectionsArray = (Section<A>**)space;
2431         _file->_sectionsArrayCount = count;
2432         Section<A>** objects = _file->_sectionsArray;
2433         space += count*sizeof(Section<A>*);
2434         for (uint32_t i=0; i < count; ++i) {
2435                 switch ( machOSects[i].type ) {
2436                         case sectionTypeIgnore:
2437                                 break;
2438                         case sectionTypeLiteral4:
2439                                 *objects++ = new (space) Literal4Section<A>(*this, *_file, machOSects[i].sect);
2440                                 space += sizeof(Literal4Section<A>);
2441                                 break;
2442                         case sectionTypeLiteral8:
2443                                 *objects++ = new (space) Literal8Section<A>(*this, *_file, machOSects[i].sect);
2444                                 space += sizeof(Literal8Section<A>);
2445                                 break;
2446                         case sectionTypeLiteral16:
2447                                 *objects++ = new (space) Literal16Section<A>(*this, *_file, machOSects[i].sect);
2448                                 space += sizeof(Literal16Section<A>);
2449                                 break;
2450                         case sectionTypeNonLazy:
2451                                 *objects++ = new (space) NonLazyPointerSection<A>(*this, *_file, machOSects[i].sect);
2452                                 space += sizeof(NonLazyPointerSection<A>);
2453                                 break;
2454                         case sectionTypeCFI:
2455                                 _EHFrameSection = new (space) CFISection<A>(*this, *_file, machOSects[i].sect);
2456                                 *objects++ = _EHFrameSection;
2457                                 space += sizeof(CFISection<A>);
2458                                 break;
2459                         case sectionTypeCString:
2460                                 *objects++ = new (space) CStringSection<A>(*this, *_file, machOSects[i].sect);
2461                                 space += sizeof(CStringSection<A>);
2462                                 break;
2463                         case sectionTypeCStringPointer:
2464                                 *objects++ = new (space) PointerToCStringSection<A>(*this, *_file, machOSects[i].sect);
2465                                 space += sizeof(PointerToCStringSection<A>);
2466                                 break;
2467                         case sectionTypeObjC1ClassRefs:
2468                                 *objects++ = new (space) Objc1ClassReferences<A>(*this, *_file, machOSects[i].sect);
2469                                 space += sizeof(Objc1ClassReferences<A>);
2470                                 break;
2471                         case sectionTypeUTF16Strings:
2472                                 *objects++ = new (space) UTF16StringSection<A>(*this, *_file, machOSects[i].sect);
2473                                 space += sizeof(UTF16StringSection<A>);
2474                                 break;
2475                         case sectionTypeCFString:
2476                                 *objects++ = new (space) CFStringSection<A>(*this, *_file, machOSects[i].sect);
2477                                 space += sizeof(CFStringSection<A>);
2478                                 break;
2479                         case sectionTypeObjC2ClassRefs:
2480                                 *objects++ = new (space) ObjC2ClassRefsSection<A>(*this, *_file, machOSects[i].sect);
2481                                 space += sizeof(ObjC2ClassRefsSection<A>);
2482                                 break;
2483                         case typeObjC2CategoryList:
2484                                 *objects++ = new (space) ObjC2CategoryListSection<A>(*this, *_file, machOSects[i].sect);
2485                                 space += sizeof(ObjC2CategoryListSection<A>);
2486                                 break;
2487                         case sectionTypeObjC1Classes:
2488                                 *objects++ = new (space) ObjC1ClassSection<A>(*this, *_file, machOSects[i].sect);
2489                                 space += sizeof(ObjC1ClassSection<A>);
2490                                 break;
2491                         case sectionTypeSymboled:
2492                                 *objects++ = new (space) SymboledSection<A>(*this, *_file, machOSects[i].sect);
2493                                 space += sizeof(SymboledSection<A>);
2494                                 break;
2495                         case sectionTypeTLVDefs:
2496                                 *objects++ = new (space) TLVDefsSection<A>(*this, *_file, machOSects[i].sect);
2497                                 space += sizeof(TLVDefsSection<A>);
2498                                 break;
2499                         case sectionTypeCompactUnwind:
2500                                 _compactUnwindSection = new (space) CUSection<A>(*this, *_file, machOSects[i].sect);
2501                                 *objects++ = _compactUnwindSection;
2502                                 space += sizeof(CUSection<A>);
2503                                 break;
2504                         case sectionTypeTentativeDefinitions:
2505                                 *objects++ = new (space) TentativeDefinitionSection<A>(*this, *_file);
2506                                 space += sizeof(TentativeDefinitionSection<A>);
2507                                 break;
2508                         case sectionTypeAbsoluteSymbols:
2509                                 _absoluteSection = new (space) AbsoluteSymbolSection<A>(*this, *_file);
2510                                 *objects++ = _absoluteSection;
2511                                 space += sizeof(AbsoluteSymbolSection<A>);
2512                                 break;
2513                         default:
2514                                 throw "internal error uknown SectionType";
2515                 }
2516         }
2517 }
2518
2519
2520 template <typename A>
2521 Section<A>* Parser<A>::sectionForAddress(typename A::P::uint_t addr)
2522 {
2523         for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2524                 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2525                 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2526                 if ( sect != NULL ) {
2527                         if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
2528                                 return _file->_sectionsArray[i];
2529                         }
2530                 }
2531         }
2532         // not strictly in any section
2533         // may be in a zero length section
2534         for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2535                 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2536                 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2537                 if ( sect != NULL ) {
2538                         if ( (sect->addr() == addr) && (sect->size() == 0) ) {
2539                                 return _file->_sectionsArray[i];
2540                         }
2541                 }
2542         }
2543
2544         throwf("sectionForAddress(0x%llX) address not in any section", (uint64_t)addr);
2545 }
2546
2547 template <typename A>
2548 Section<A>* Parser<A>::sectionForNum(unsigned int num)
2549 {
2550         for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2551                 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2552                 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2553                 if ( sect != NULL ) {
2554                         if ( num == (unsigned int)((sect - _sectionsStart)+1) )
2555                                 return _file->_sectionsArray[i];
2556                 }
2557         }
2558         throwf("sectionForNum(%u) section number not for any section", num);
2559 }
2560
2561 template <typename A>
2562 Atom<A>* Parser<A>::findAtomByAddress(pint_t addr)
2563 {
2564         Section<A>* section = this->sectionForAddress(addr);
2565         return section->findAtomByAddress(addr);
2566 }
2567
2568 template <typename A>
2569 Atom<A>* Parser<A>::findAtomByAddressOrNullIfStub(pint_t addr)
2570 {
2571         if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) )
2572                 return NULL;
2573         return findAtomByAddress(addr);
2574 }
2575
2576 template <typename A>
2577 Atom<A>* Parser<A>::findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom)
2578 {
2579         if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2580                 // target is a stub, remove indirection
2581                 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2582                 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2583                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2584                 // can't be to external weak symbol
2585                 assert( (this->combineFromSymbol(sym) != ld::Atom::combineByName) || (this->scopeFromSymbol(sym) != ld::Atom::scopeGlobal) );
2586                 *offsetInAtom = 0;
2587                 return this->findAtomByName(this->nameFromSymbol(sym));
2588         }
2589         Atom<A>* target = this->findAtomByAddress(addr);
2590         *offsetInAtom = addr - target->_objAddress;
2591         return target;
2592 }
2593
2594 template <typename A>
2595 Atom<A>* Parser<A>::findAtomByName(const char* name)
2596 {
2597         uint8_t* p = _file->_atomsArray;
2598         for(int i=_file->_atomsArrayCount; i > 0; --i) {
2599                 Atom<A>* atom = (Atom<A>*)p;
2600                 if ( strcmp(name, atom->name()) == 0 )
2601                         return atom;
2602                 p += sizeof(Atom<A>);
2603         }
2604         return NULL;
2605 }
2606
2607 template <typename A>
2608 void Parser<A>::findTargetFromAddress(pint_t addr, TargetDesc& target)
2609 {
2610         if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2611                 // target is a stub, remove indirection
2612                 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2613                 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2614                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2615                 target.atom = NULL;
2616                 target.name = this->nameFromSymbol(sym);
2617                 target.weakImport = this->weakImportFromSymbol(sym);
2618                 target.addend = 0;
2619                 return;
2620         }
2621         Section<A>* section = this->sectionForAddress(addr);
2622         target.atom = section->findAtomByAddress(addr);
2623         target.addend = addr - target.atom->_objAddress;
2624         target.weakImport = false;
2625         target.name = NULL;
2626 }
2627
2628 template <typename A>
2629 void Parser<A>::findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target)
2630 {
2631         findTargetFromAddress(baseAddr, target);
2632         target.addend = addr - target.atom->_objAddress;
2633 }
2634
2635 template <typename A>
2636 void Parser<A>::findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum, TargetDesc& target)
2637 {
2638         if ( sectNum == R_ABS ) {
2639                 // target is absolute symbol that corresponds to addr
2640                 if ( _absoluteSection != NULL ) {
2641                         target.atom = _absoluteSection->findAbsAtomForValue(addr);
2642                         if ( target.atom != NULL ) {
2643                                 target.name = NULL;
2644                                 target.weakImport = false;
2645                                 target.addend = 0;
2646                                 return;
2647                         }
2648                 }
2649                 throwf("R_ABS reloc but no absolute symbol at target address");
2650         }
2651
2652         if ( hasStubsSection() && (stubsSectionNum() == sectNum) ) {
2653                 // target is a stub, remove indirection
2654                 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2655                 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2656                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2657                 // use direct reference when stub is to a static function
2658                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (this->nameFromSymbol(sym)[0] == 'L')) ) {
2659                         this->findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
2660                 }
2661                 else {
2662                         target.atom = NULL;
2663                         target.name = this->nameFromSymbol(sym);
2664                         target.weakImport = this->weakImportFromSymbol(sym);
2665                         target.addend = 0;
2666                 }
2667                 return;
2668         }
2669         Section<A>* section = this->sectionForNum(sectNum);
2670         target.atom = section->findAtomByAddress(addr);
2671         if ( target.atom == NULL ) {
2672                 typedef typename A::P::sint_t sint_t;
2673                 sint_t a = (sint_t)addr;
2674                 sint_t sectStart = (sint_t)(section->machoSection()->addr());
2675                 sint_t sectEnd  = sectStart + section->machoSection()->size();
2676                 if ( a < sectStart ) {
2677                         // target address is before start of section, so must be negative addend
2678                         target.atom = section->findAtomByAddress(sectStart);
2679                         target.addend = a - sectStart;
2680                         target.weakImport = false;
2681                         target.name = NULL;
2682                         return;
2683                 }
2684                 else if ( a >= sectEnd ) {
2685                         target.atom = section->findAtomByAddress(sectEnd-1);
2686                         target.addend = a - sectEnd;
2687                         target.weakImport = false;
2688                         target.name = NULL;
2689                         return;
2690                 }
2691         }
2692         assert(target.atom != NULL);
2693         target.addend = addr - target.atom->_objAddress;
2694         target.weakImport = false;
2695         target.name = NULL;
2696 }
2697
2698 template <typename A>
2699 void Parser<A>::addDtraceExtraInfos(const SourceLocation& src, const char* providerName)
2700 {
2701         // for every ___dtrace_stability$* and ___dtrace_typedefs$* undefine with
2702         // a matching provider name, add a by-name kDtraceTypeReference at probe site
2703         const char* dollar = strchr(providerName, '$');
2704         if ( dollar != NULL ) {
2705                 int providerNameLen = dollar-providerName+1;
2706                 for ( std::vector<const char*>::iterator it = _dtraceProviderInfo.begin(); it != _dtraceProviderInfo.end(); ++it) {
2707                         const char* typeDollar = strchr(*it, '$');
2708                         if ( typeDollar != NULL ) {
2709                                 if ( strncmp(typeDollar+1, providerName, providerNameLen) == 0 ) {
2710                                         addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindDtraceExtra,false, *it);
2711                                 }
2712                         }
2713                 }
2714         }
2715 }
2716
2717 template <typename A>
2718 const char* Parser<A>::scanSymbolTableForAddress(uint64_t addr)
2719 {
2720         uint64_t closestSymAddr = 0;
2721         const char* closestSymName = NULL;
2722         for (uint32_t i=0; i < this->_symbolCount; ++i) {
2723                 const macho_nlist<P>& sym =     symbolFromIndex(i);
2724                 // ignore stabs
2725                 if ( (sym.n_type() & N_STAB) != 0 )
2726                         continue;
2727
2728                 // only look at definitions
2729                 if ( (sym.n_type() & N_TYPE) != N_SECT )
2730                         continue;
2731
2732                 // return with exact match
2733                 if ( sym.n_value() == addr ) {
2734                         const char* name = nameFromSymbol(sym);
2735                         if ( strncmp(name, "ltmp", 4) != 0 )
2736                                 return name;
2737                         // treat 'ltmp*' labels as close match
2738                         closestSymAddr = sym.n_value();
2739                         closestSymName = name;
2740                 }
2741
2742                 // record closest seen so far
2743                 if ( (sym.n_value() < addr) && ((sym.n_value() > closestSymAddr) || (closestSymName == NULL)) )
2744                         closestSymName = nameFromSymbol(sym);
2745         }
2746
2747         return (closestSymName != NULL) ? closestSymName : "unknown";
2748 }
2749
2750
2751 template <typename A>
2752 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind setKind, const TargetDesc& target)
2753 {
2754         // some fixup pairs can be combined
2755         ld::Fixup::Cluster cl = ld::Fixup::k1of3;
2756         ld::Fixup::Kind firstKind = ld::Fixup::kindSetTargetAddress;
2757         bool combined = false;
2758         if ( target.addend == 0 ) {
2759                 cl = ld::Fixup::k1of1;
2760                 combined = true;
2761                 switch ( setKind ) {
2762                         case ld::Fixup::kindStoreLittleEndian32:
2763                                 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian32;
2764                                 break;
2765                         case ld::Fixup::kindStoreLittleEndian64:
2766                                 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian64;
2767                                 break;
2768                         case ld::Fixup::kindStoreBigEndian32:
2769                                 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian32;
2770                                 break;
2771                         case ld::Fixup::kindStoreBigEndian64:
2772                                 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian64;
2773                                 break;
2774                         case ld::Fixup::kindStoreX86BranchPCRel32:
2775                                 firstKind = ld::Fixup::kindStoreTargetAddressX86BranchPCRel32;
2776                                 break;
2777                         case ld::Fixup::kindStoreX86PCRel32:
2778                                 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32;
2779                                 break;
2780                         case ld::Fixup::kindStoreX86PCRel32GOTLoad:
2781                                 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32GOTLoad;
2782                                 break;
2783                         case ld::Fixup::kindStoreX86PCRel32TLVLoad:
2784                                 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32TLVLoad;
2785                                 break;
2786                         case ld::Fixup::kindStoreX86Abs32TLVLoad:
2787                                 firstKind = ld::Fixup::kindStoreTargetAddressX86Abs32TLVLoad;
2788                                 break;
2789                         case ld::Fixup::kindStoreARMBranch24:
2790                                 firstKind = ld::Fixup::kindStoreTargetAddressARMBranch24;
2791                                 break;
2792                         case ld::Fixup::kindStoreThumbBranch22:
2793                                 firstKind = ld::Fixup::kindStoreTargetAddressThumbBranch22;
2794                                 break;
2795 #if SUPPORT_ARCH_arm64
2796                         case ld::Fixup::kindStoreARM64Branch26:
2797                                 firstKind = ld::Fixup::kindStoreTargetAddressARM64Branch26;
2798                                 break;
2799                         case ld::Fixup::kindStoreARM64Page21:
2800                                 firstKind = ld::Fixup::kindStoreTargetAddressARM64Page21;
2801                                 break;
2802                         case ld::Fixup::kindStoreARM64PageOff12:
2803                                 firstKind = ld::Fixup::kindStoreTargetAddressARM64PageOff12;
2804                                 break;
2805                         case ld::Fixup::kindStoreARM64GOTLoadPage21:
2806                                 firstKind = ld::Fixup::kindStoreTargetAddressARM64GOTLoadPage21;
2807                                 break;
2808                         case ld::Fixup::kindStoreARM64GOTLoadPageOff12:
2809                                 firstKind = ld::Fixup::kindStoreTargetAddressARM64GOTLoadPageOff12;
2810                                 break;
2811                         case ld::Fixup::kindStoreARM64TLVPLoadPage21:
2812                                 firstKind = ld::Fixup::kindStoreTargetAddressARM64TLVPLoadPage21;
2813                                 break;
2814                         case ld::Fixup::kindStoreARM64TLVPLoadPageOff12:
2815                                 firstKind = ld::Fixup::kindStoreTargetAddressARM64TLVPLoadPageOff12;
2816                                 break;
2817 #endif
2818                         default:
2819                                 combined = false;
2820                                 cl = ld::Fixup::k1of2;
2821                                 break;
2822                 }
2823         }
2824
2825         if ( target.atom != NULL ) {
2826                 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2827                         addFixup(src, cl, firstKind, target.atom);
2828                 }
2829                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2830                         addFixup(src, cl, firstKind, ld::Fixup::bindingByContentBound, target.atom);
2831                 }
2832                 else if ( (src.atom->section().type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
2833                         // backing string in CFStrings should always be direct
2834                         addFixup(src, cl, firstKind, target.atom);
2835                 }
2836                 else if ( (src.atom == target.atom) && (target.atom->combine() == ld::Atom::combineByName) ) {
2837                         // reference to self should always be direct
2838                         addFixup(src, cl, firstKind, target.atom);
2839                 }
2840                 else {
2841                         // change direct fixup to by-name fixup
2842                         addFixup(src, cl, firstKind, false, target.atom->name());
2843                 }
2844         }
2845         else {
2846                 addFixup(src, cl, firstKind, target.weakImport, target.name);
2847         }
2848         if ( target.addend == 0 ) {
2849                 if ( ! combined )
2850                         addFixup(src, ld::Fixup::k2of2, setKind);
2851         }
2852         else {
2853                 addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, target.addend);
2854                 addFixup(src, ld::Fixup::k3of3, setKind);
2855         }
2856 }
2857
2858 template <typename A>
2859 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase)
2860 {
2861         ld::Fixup::Cluster cl = (target.addend == 0) ? ld::Fixup::k1of4 : ld::Fixup::k1of5;
2862         if ( target.atom != NULL ) {
2863                 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2864                         addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.atom);
2865                 }
2866                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2867                         addFixup(src, cl, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
2868                 }
2869                 else {
2870                         addFixup(src, cl, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
2871                 }
2872         }
2873         else {
2874                 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.weakImport, target.name);
2875         }
2876         if ( target.addend == 0 ) {
2877                 assert(picBase.atom != NULL);
2878                 addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2879                 addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, picBase.addend);
2880                 addFixup(src, ld::Fixup::k4of4, kind);
2881         }
2882         else {
2883                 addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
2884                 addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2885                 addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, picBase.addend);
2886                 addFixup(src, ld::Fixup::k5of5, kind);
2887         }
2888 }
2889
2890
2891
2892 template <typename A>
2893 uint32_t TentativeDefinitionSection<A>::computeAtomCount(class Parser<A>& parser,
2894                                                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
2895                                                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
2896 {
2897         return parser.tentativeDefinitionCount();
2898 }
2899
2900 template <typename A>
2901 uint32_t TentativeDefinitionSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2902                                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
2903                                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&)
2904 {
2905         this->_beginAtoms = (Atom<A>*)p;
2906         uint32_t count = 0;
2907         for (uint32_t i=parser.undefinedStartIndex(); i < parser.undefinedEndIndex(); ++i) {
2908                 const macho_nlist<P>& sym =     parser.symbolFromIndex(i);
2909                 if ( ((sym.n_type() & N_TYPE) == N_UNDF) && (sym.n_value() != 0) ) {
2910                         uint64_t size = sym.n_value();
2911                         uint8_t alignP2 = GET_COMM_ALIGN(sym.n_desc());
2912                         if ( alignP2 == 0 ) {
2913                                 // common symbols align to their size
2914                                 // that is, a 4-byte common aligns to 4-bytes
2915                                 // if this size is not a power of two,
2916                                 // then round up to the next power of two
2917                                 alignP2 = 63 - (uint8_t)__builtin_clzll(size);
2918                                 if ( size != (1ULL << alignP2) )
2919                                         ++alignP2;
2920                         }
2921                         // limit alignment of extremely large commons to 2^15 bytes (8-page)
2922                         if ( alignP2 > 15 )
2923                                 alignP2 = 15;
2924                         Atom<A>* allocatedSpace = (Atom<A>*)p;
2925                         new (allocatedSpace) Atom<A>(*this, parser.nameFromSymbol(sym), (pint_t)ULLONG_MAX, size,
2926                                                                                 ld::Atom::definitionTentative,  ld::Atom::combineByName,
2927                                                                                 parser.scopeFromSymbol(sym), ld::Atom::typeZeroFill, ld::Atom::symbolTableIn,
2928                                                                                 parser.dontDeadStripFromSymbol(sym), false, false, ld::Atom::Alignment(alignP2) );
2929                         p += sizeof(Atom<A>);
2930                         ++count;
2931                 }
2932         }
2933         this->_endAtoms = (Atom<A>*)p;
2934         return count;
2935 }
2936
2937
2938 template <typename A>
2939 uint32_t AbsoluteSymbolSection<A>::computeAtomCount(class Parser<A>& parser,
2940                                                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
2941                                                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
2942 {
2943         return parser.absoluteSymbolCount();
2944 }
2945
2946 template <typename A>
2947 uint32_t AbsoluteSymbolSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2948                                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
2949                                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&)
2950 {
2951         this->_beginAtoms = (Atom<A>*)p;
2952         uint32_t count = 0;
2953         for (uint32_t i=0; i < parser.symbolCount(); ++i) {
2954                 const macho_nlist<P>& sym =     parser.symbolFromIndex(i);
2955                 if ( (sym.n_type() & N_TYPE) != N_ABS )
2956                         continue;
2957                 const char* absName = parser.nameFromSymbol(sym);
2958                 // ignore .objc_class_name_* symbols
2959                 if ( strncmp(absName, ".objc_class_name_", 17) == 0 )
2960                         continue;
2961                 // ignore .objc_class_name_* symbols
2962                 if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
2963                         continue;
2964                 // ignore empty *.eh symbols
2965                 if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
2966                         continue;
2967
2968                 Atom<A>* allocatedSpace = (Atom<A>*)p;
2969                 new (allocatedSpace) Atom<A>(*this, parser, sym, 0);
2970                 p += sizeof(Atom<A>);
2971                 ++count;
2972         }
2973         this->_endAtoms = (Atom<A>*)p;
2974         return count;
2975 }
2976
2977 template <typename A>
2978 Atom<A>* AbsoluteSymbolSection<A>::findAbsAtomForValue(typename A::P::uint_t value)
2979 {
2980         Atom<A>* end = this->_endAtoms;
2981         for(Atom<A>* p = this->_beginAtoms; p < end; ++p) {
2982                 if ( p->_objAddress == value )
2983                         return p;
2984         }
2985         return NULL;
2986 }
2987
2988
2989 template <typename A>
2990 uint32_t Parser<A>::indirectSymbol(uint32_t indirectIndex)
2991 {
2992         if ( indirectIndex >= _indirectTableCount )
2993                 throw "indirect symbol index out of range";
2994         return E::get32(_indirectTable[indirectIndex]);
2995 }
2996
2997 template <typename A>
2998 const macho_nlist<typename A::P>& Parser<A>::symbolFromIndex(uint32_t index)
2999 {
3000         if ( index > _symbolCount )
3001                 throw "symbol index out of range";
3002         return _symbols[index];
3003 }
3004
3005 template <typename A>
3006 const macho_section<typename A::P>*     Parser<A>::machOSectionFromSectionIndex(uint32_t index)
3007 {
3008         if ( index >= _machOSectionsCount )
3009                 throw "section index out of range";
3010         return &_sectionsStart[index];
3011 }
3012
3013 template <typename A>
3014 uint32_t Parser<A>::symbolIndexFromIndirectSectionAddress(pint_t addr, const macho_section<P>* sect)
3015 {
3016         uint32_t elementSize = 0;
3017         switch ( sect->flags() & SECTION_TYPE ) {
3018                 case S_SYMBOL_STUBS:
3019                         elementSize = sect->reserved2();
3020                         break;
3021                 case S_LAZY_SYMBOL_POINTERS:
3022                 case S_NON_LAZY_SYMBOL_POINTERS:
3023                         elementSize = sizeof(pint_t);
3024                         break;
3025                 default:
3026                         throw "section does not use inirect symbol table";
3027         }
3028         uint32_t indexInSection = (addr - sect->addr()) / elementSize;
3029         uint32_t indexIntoIndirectTable = sect->reserved1() + indexInSection;
3030         return this->indirectSymbol(indexIntoIndirectTable);
3031 }
3032
3033
3034
3035 template <typename A>
3036 const char* Parser<A>::nameFromSymbol(const macho_nlist<P>& sym)
3037 {
3038         return &_strings[sym.n_strx()];
3039 }
3040
3041 template <typename A>
3042 ld::Atom::Scope Parser<A>::scopeFromSymbol(const macho_nlist<P>& sym)
3043 {
3044         if ( (sym.n_type() & N_EXT) == 0 )
3045                 return ld::Atom::scopeTranslationUnit;
3046         else if ( (sym.n_type() & N_PEXT) != 0 )
3047                 return ld::Atom::scopeLinkageUnit;
3048         else if ( this->nameFromSymbol(sym)[0] == 'l' ) // since all 'l' symbols will be remove, don't make them global
3049                 return ld::Atom::scopeLinkageUnit;
3050         else
3051                 return ld::Atom::scopeGlobal;
3052 }
3053
3054 template <typename A>
3055 ld::Atom::Definition Parser<A>::definitionFromSymbol(const macho_nlist<P>& sym)
3056 {
3057         switch ( sym.n_type() & N_TYPE ) {
3058                 case N_ABS:
3059                         return ld::Atom::definitionAbsolute;
3060                 case N_SECT:
3061                         return ld::Atom::definitionRegular;
3062                 case N_UNDF:
3063                         if ( sym.n_value() != 0 )
3064                                 return ld::Atom::definitionTentative;
3065         }
3066         throw "definitionFromSymbol() bad symbol";
3067 }
3068
3069 template <typename A>
3070 ld::Atom::Combine Parser<A>::combineFromSymbol(const macho_nlist<P>& sym)
3071 {
3072         if ( sym.n_desc() & N_WEAK_DEF )
3073                 return ld::Atom::combineByName;
3074         else
3075                 return ld::Atom::combineNever;
3076 }
3077
3078
3079 template <typename A>
3080 ld::Atom::SymbolTableInclusion Parser<A>::inclusionFromSymbol(const macho_nlist<P>& sym)
3081 {
3082         const char* symbolName = nameFromSymbol(sym);
3083         // labels beginning with 'l' (lowercase ell) are automatically removed in final linked images <rdar://problem/4571042>
3084         // labels beginning with 'L' should have been stripped by the assembler, so are stripped now
3085         if ( sym.n_desc() & REFERENCED_DYNAMICALLY )
3086                 return ld::Atom::symbolTableInAndNeverStrip;
3087         else if ( symbolName[0] == 'l' )
3088                 return ld::Atom::symbolTableNotInFinalLinkedImages;
3089         else if ( symbolName[0] == 'L' )
3090                 return ld::Atom::symbolTableNotIn;
3091         else
3092                 return ld::Atom::symbolTableIn;
3093 }
3094
3095 template <typename A>
3096 bool Parser<A>::dontDeadStripFromSymbol(const macho_nlist<P>& sym)
3097 {
3098         return ( (sym.n_desc() & (N_NO_DEAD_STRIP|REFERENCED_DYNAMICALLY)) != 0 );
3099 }
3100
3101 template <typename A>
3102 bool Parser<A>::isThumbFromSymbol(const macho_nlist<P>& sym)
3103 {
3104         return ( sym.n_desc() & N_ARM_THUMB_DEF );
3105 }
3106
3107 template <typename A>
3108 bool Parser<A>::weakImportFromSymbol(const macho_nlist<P>& sym)
3109 {
3110         return ( ((sym.n_type() & N_TYPE) == N_UNDF) && ((sym.n_desc() & N_WEAK_REF) != 0) );
3111 }
3112
3113 template <typename A>
3114 bool Parser<A>::resolverFromSymbol(const macho_nlist<P>& sym)
3115 {
3116         return ( sym.n_desc() & N_SYMBOL_RESOLVER );
3117 }
3118
3119 template <typename A>
3120 bool Parser<A>::altEntryFromSymbol(const macho_nlist<P>& sym)
3121 {
3122         return ( sym.n_desc() & N_ALT_ENTRY );
3123 }
3124
3125
3126 /* Skip over a LEB128 value (signed or unsigned).  */
3127 static void
3128 skip_leb128 (const uint8_t ** offset, const uint8_t * end)
3129 {
3130   while (*offset != end && **offset >= 0x80)
3131     (*offset)++;
3132   if (*offset != end)
3133     (*offset)++;
3134 }
3135
3136 /* Read a ULEB128 into a 64-bit word.  Return (uint64_t)-1 on overflow
3137    or error.  On overflow, skip past the rest of the uleb128.  */
3138 static uint64_t
3139 read_uleb128 (const uint8_t ** offset, const uint8_t * end)
3140 {
3141   uint64_t result = 0;
3142   int bit = 0;
3143
3144   do  {
3145     uint64_t b;
3146
3147     if (*offset == end)
3148       return (uint64_t) -1;
3149
3150     b = **offset & 0x7f;
3151
3152     if (bit >= 64 || b << bit >> bit != b)
3153       result = (uint64_t) -1;
3154     else
3155       result |= b << bit, bit += 7;
3156   } while (*(*offset)++ >= 0x80);
3157   return result;
3158 }
3159
3160
3161 /* Skip over a DWARF attribute of form FORM.  */
3162 template <typename A>
3163 bool Parser<A>::skip_form(const uint8_t ** offset, const uint8_t * end, uint64_t form,
3164                                                         uint8_t addr_size, bool dwarf64)
3165 {
3166   int64_t sz=0;
3167
3168   switch (form)
3169     {
3170     case DW_FORM_addr:
3171       sz = addr_size;
3172       break;
3173
3174     case DW_FORM_block2:
3175       if (end - *offset < 2)
3176         return false;
3177       sz = 2 + A::P::E::get16(*(uint16_t*)offset);
3178       break;
3179
3180     case DW_FORM_block4:
3181       if (end - *offset < 4)
3182         return false;
3183       sz = 2 + A::P::E::get32(*(uint32_t*)offset);
3184       break;
3185
3186     case DW_FORM_data2:
3187     case DW_FORM_ref2:
3188       sz = 2;
3189       break;
3190
3191     case DW_FORM_data4:
3192     case DW_FORM_ref4:
3193       sz = 4;
3194       break;
3195
3196     case DW_FORM_data8:
3197     case DW_FORM_ref8:
3198       sz = 8;
3199       break;
3200
3201     case DW_FORM_string:
3202       while (*offset != end && **offset)
3203         ++*offset;
3204     case DW_FORM_data1:
3205     case DW_FORM_flag:
3206     case DW_FORM_ref1:
3207       sz = 1;
3208       break;
3209
3210     case DW_FORM_block:
3211       sz = read_uleb128 (offset, end);
3212       break;
3213
3214     case DW_FORM_block1:
3215       if (*offset == end)
3216         return false;
3217       sz = 1 + **offset;
3218       break;
3219
3220     case DW_FORM_sdata:
3221     case DW_FORM_udata:
3222     case DW_FORM_ref_udata:
3223       skip_leb128 (offset, end);
3224       return true;
3225
3226     case DW_FORM_strp:
3227     case DW_FORM_ref_addr:
3228       sz = 4;
3229       break;
3230
3231         case DW_FORM_sec_offset:
3232           sz = sizeof(typename A::P::uint_t);
3233       break;
3234
3235         case DW_FORM_exprloc:
3236       sz = read_uleb128 (offset, end);
3237       break;
3238
3239     case DW_FORM_flag_present:
3240           sz = 0;
3241           break;
3242
3243     case DW_FORM_ref_sig8:
3244           sz = 8;
3245           break;
3246
3247     default:
3248       return false;
3249     }
3250   if (end - *offset < sz)
3251     return false;
3252   *offset += sz;
3253   return true;
3254 }
3255
3256
3257 template <typename A>
3258 const char* Parser<A>::getDwarfString(uint64_t form, const uint8_t*& di)
3259 {
3260         uint32_t offset;
3261         const char* dwarfStrings;
3262         const char* result = NULL;
3263         switch (form) {
3264                 case DW_FORM_string:
3265                         result = (const char*)di;
3266                         di += strlen(result) + 1;
3267                         break;
3268                 case DW_FORM_strp:
3269                         offset = E::get32(*((uint32_t*)di));
3270                         dwarfStrings = (char*)_file->fileContent() + _file->_dwarfDebugStringSect->offset();
3271                         if ( offset < _file->_dwarfDebugStringSect->size() )
3272                                 result = &dwarfStrings[offset];
3273                         else
3274                                 warning("dwarf DW_FORM_strp (offset=0x%08X) is too big in %s", offset, this->_path);
3275                         di += 4;
3276                         break;
3277                 default:
3278                         warning("unknown dwarf string encoding (form=%lld) in %s", form, this->_path);
3279                         break;
3280         }
3281         return result;
3282 }
3283
3284 template <typename A>
3285 uint64_t Parser<A>::getDwarfOffset(uint64_t form, const uint8_t*& di, bool dwarf64)
3286 {
3287         if ( form == DW_FORM_sec_offset )
3288                 form = (dwarf64 ? DW_FORM_data8 : DW_FORM_data4);
3289         uint64_t result = -1;
3290         switch (form) {
3291                 case DW_FORM_data4:
3292                         result = A::P::E::get32(*(uint32_t*)di);
3293                         di += 4;
3294                         break;
3295                 case DW_FORM_data8:
3296                         result = A::P::E::get64(*(uint64_t*)di);
3297                         di += 8;
3298                         break;
3299                 default:
3300                         warning("unknown dwarf DW_FORM_ for DW_AT_stmt_list in %s", this->_path);
3301         }
3302         return result;
3303 }
3304
3305
3306 template <typename A>
3307 struct AtomAndLineInfo {
3308         Atom<A>*                        atom;
3309         ld::Atom::LineInfo      info;
3310 };
3311
3312
3313 // <rdar://problem/5591394> Add support to ld64 for N_FUN stabs when used for symbolic constants
3314 // Returns whether a stabStr belonging to an N_FUN stab represents a
3315 // symbolic constant rather than a function
3316 template <typename A>
3317 bool Parser<A>::isConstFunStabs(const char *stabStr)
3318 {
3319         const char* colon;
3320         // N_FUN can be used for both constants and for functions. In case it's a constant,
3321         // the format of the stabs string is "symname:c=<value>;"
3322         // ':' cannot appear in the symbol name, except if it's an Objective-C method
3323         // (in which case the symbol name starts with + or -, and then it's definitely
3324         //  not a constant)
3325         return (stabStr != NULL) && (stabStr[0] != '+') && (stabStr[0] != '-')
3326                         && ((colon = strchr(stabStr, ':')) != NULL)
3327                         && (colon[1] == 'c') && (colon[2] == '=');
3328 }
3329
3330
3331 template <typename A>
3332 void Parser<A>::parseDebugInfo()
3333 {
3334         // check for dwarf __debug_info section
3335         if ( _file->_dwarfDebugInfoSect == NULL ) {
3336                 // if no DWARF debug info, look for stabs
3337                 this->parseStabs();
3338                 return;
3339         }
3340         if ( _file->_dwarfDebugInfoSect->size() == 0 )
3341                 return;
3342
3343         uint64_t stmtList;
3344         const char* tuDir;
3345         const char* tuName;
3346         if ( !read_comp_unit(&tuName, &tuDir, &stmtList) ) {
3347                 // if can't parse dwarf, warn and give up
3348                 _file->_dwarfTranslationUnitPath = NULL;
3349                 warning("can't parse dwarf compilation unit info in %s", _path);
3350                 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoNone;
3351                 return;
3352         }
3353         if ( (tuName != NULL) && (tuName[0] == '/') ) {
3354                 _file->_dwarfTranslationUnitPath = tuName;
3355         }
3356         else if ( (tuDir != NULL) && (tuName != NULL) ) {
3357                 asprintf((char**)&(_file->_dwarfTranslationUnitPath), "%s/%s", tuDir, tuName);
3358         }
3359         else if ( tuDir == NULL ) {
3360                 _file->_dwarfTranslationUnitPath = tuName;
3361         }
3362         else {
3363                 _file->_dwarfTranslationUnitPath = NULL;
3364         }
3365
3366         // add line number info to atoms from dwarf
3367         std::vector<AtomAndLineInfo<A> > entries;
3368         entries.reserve(64);
3369         if ( _file->_debugInfoKind == ld::relocatable::File::kDebugInfoDwarf ) {
3370                 // file with just data will have no __debug_line info
3371                 if ( (_file->_dwarfDebugLineSect != NULL) && (_file->_dwarfDebugLineSect->size() != 0) ) {
3372                         // validate stmt_list
3373                         if ( (stmtList != (uint64_t)-1) && (stmtList < _file->_dwarfDebugLineSect->size()) ) {
3374                                 const uint8_t* debug_line = (uint8_t*)_file->fileContent() + _file->_dwarfDebugLineSect->offset();
3375                                 struct line_reader_data* lines = line_open(&debug_line[stmtList],
3376                                                                                                                 _file->_dwarfDebugLineSect->size() - stmtList, E::little_endian);
3377                                 struct line_info result;
3378                                 Atom<A>* curAtom = NULL;
3379                                 uint32_t curAtomOffset = 0;
3380                                 uint32_t curAtomAddress = 0;
3381                                 uint32_t curAtomSize = 0;
3382                                 std::map<uint32_t,const char*>  dwarfIndexToFile;
3383                                 if ( lines != NULL ) {
3384                                         while ( line_next(lines, &result, line_stop_pc) ) {
3385                                                 //fprintf(stderr, "curAtom=%p, result.pc=0x%llX, result.line=%llu, result.end_of_sequence=%d,"
3386                                                 //                                " curAtomAddress=0x%X, curAtomSize=0x%X\n",
3387                                                 //              curAtom, result.pc, result.line, result.end_of_sequence, curAtomAddress, curAtomSize);
3388                                                 // work around weird debug line table compiler generates if no functions in __text section
3389                                                 if ( (curAtom == NULL) && (result.pc == 0) && result.end_of_sequence && (result.file == 1))
3390                                                         continue;
3391                                                 // for performance, see if in next pc is in current atom
3392                                                 if ( (curAtom != NULL) && (curAtomAddress <= result.pc) && (result.pc < (curAtomAddress+curAtomSize)) ) {
3393                                                         curAtomOffset = result.pc - curAtomAddress;
3394                                                 }
3395                                                 // or pc at end of current atom
3396                                                 else if ( result.end_of_sequence && (curAtom != NULL) && (result.pc == (curAtomAddress+curAtomSize)) ) {
3397                                                         curAtomOffset = result.pc - curAtomAddress;
3398                                                 }
3399                                                 // or only one function that is a one line function
3400                                                 else if ( result.end_of_sequence && (curAtom == NULL) && (this->findAtomByAddress(0) != NULL) && (result.pc == this->findAtomByAddress(0)->size()) ) {
3401                                                         curAtom                 = this->findAtomByAddress(0);
3402                                                         curAtomOffset   = result.pc - curAtom->objectAddress();
3403                                                         curAtomAddress  = curAtom->objectAddress();
3404                                                         curAtomSize             = curAtom->size();
3405                                                 }
3406                                                 else {
3407                                                         // do slow look up of atom by address
3408                                                         try {
3409                                                                 curAtom = this->findAtomByAddress(result.pc);
3410                                                         }
3411                                                         catch (...) {
3412                                                                 // in case of bug in debug info, don't abort link, just limp on
3413                                                                 curAtom = NULL;
3414                                                         }
3415                                                         if ( curAtom == NULL )
3416                                                                 break; // file has line info but no functions
3417                                                         if ( result.end_of_sequence && (curAtomAddress+curAtomSize < result.pc) ) {
3418                                                                 // a one line function can be returned by line_next() as one entry with pc at end of blob
3419                                                                 // look for alt atom starting at end of previous atom
3420                                                                 uint32_t previousEnd = curAtomAddress+curAtomSize;
3421                                                                 Atom<A>* alt = this->findAtomByAddressOrNullIfStub(previousEnd);
3422                                                                 if ( alt == NULL )
3423                                                                         continue; // ignore spurious debug info for stubs
3424                                                                 if ( result.pc <= alt->objectAddress() + alt->size() ) {
3425                                                                         curAtom                 = alt;
3426                                                                         curAtomOffset   = result.pc - alt->objectAddress();
3427                                                                         curAtomAddress  = alt->objectAddress();
3428                                                                         curAtomSize             = alt->size();
3429                                                                 }
3430                                                                 else {
3431                                                                         curAtomOffset   = result.pc - curAtom->objectAddress();
3432                                                                         curAtomAddress  = curAtom->objectAddress();
3433                                                                         curAtomSize             = curAtom->size();
3434                                                                 }
3435                                                         }
3436                                                         else {
3437                                                                 curAtomOffset   = result.pc - curAtom->objectAddress();
3438                                                                 curAtomAddress  = curAtom->objectAddress();
3439                                                                 curAtomSize             = curAtom->size();
3440                                                         }
3441                                                 }
3442                                                 const char* filename;
3443                                                 std::map<uint32_t,const char*>::iterator pos = dwarfIndexToFile.find(result.file);
3444                                                 if ( pos == dwarfIndexToFile.end() ) {
3445                                                         filename = line_file(lines, result.file);
3446                                                         dwarfIndexToFile[result.file] = filename;
3447                                                 }
3448                                                 else {
3449                                                         filename = pos->second;
3450                                                 }
3451                                                 // only record for ~8000 line info records per function
3452                                                 if ( curAtom->roomForMoreLineInfoCount() ) {
3453                                                         AtomAndLineInfo<A> entry;
3454                                                         entry.atom = curAtom;
3455                                                         entry.info.atomOffset = curAtomOffset;
3456                                                         entry.info.fileName = filename;
3457                                                         entry.info.lineNumber = result.line;
3458                                                         //fprintf(stderr, "addr=0x%08llX, line=%lld, file=%s, atom=%s, atom.size=0x%X, end=%d\n",
3459                                                         //              result.pc, result.line, filename, curAtom->name(), curAtomSize, result.end_of_sequence);
3460                                                         entries.push_back(entry);
3461                                                         curAtom->incrementLineInfoCount();
3462                                                 }
3463                                                 if ( result.end_of_sequence ) {
3464                                                         curAtom = NULL;
3465                                                 }
3466                                         }
3467                                         line_free(lines);
3468                                 }
3469                         }
3470                 }
3471         }
3472
3473         // assign line info start offset for each atom
3474         uint8_t* p = _file->_atomsArray;
3475         uint32_t liOffset = 0;
3476         for(int i=_file->_atomsArrayCount; i > 0; --i) {
3477                 Atom<A>* atom = (Atom<A>*)p;
3478                 atom->_lineInfoStartIndex = liOffset;
3479                 liOffset += atom->_lineInfoCount;
3480                 atom->_lineInfoCount = 0;
3481                 p += sizeof(Atom<A>);
3482         }
3483         assert(liOffset == entries.size());
3484         _file->_lineInfos.reserve(liOffset);
3485
3486         // copy each line info for each atom
3487         for (typename std::vector<AtomAndLineInfo<A> >::iterator it = entries.begin(); it != entries.end(); ++it) {
3488                 uint32_t slot = it->atom->_lineInfoStartIndex + it->atom->_lineInfoCount;
3489                 _file->_lineInfos[slot] = it->info;
3490                 it->atom->_lineInfoCount++;
3491         }
3492
3493         // done with temp vector
3494         entries.clear();
3495 }
3496
3497 template <typename A>
3498 void Parser<A>::parseStabs()
3499 {
3500         // scan symbol table for stabs entries
3501         Atom<A>* currentAtom = NULL;
3502         pint_t currentAtomAddress = 0;
3503         enum { start, inBeginEnd, inFun } state = start;
3504         for (uint32_t symbolIndex = 0; symbolIndex < _symbolCount; ++symbolIndex ) {
3505                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
3506                 bool useStab = true;
3507                 uint8_t type = sym.n_type();
3508                 const char* symString = (sym.n_strx() != 0) ? this->nameFromSymbol(sym) : NULL;
3509                 if ( (type & N_STAB) != 0 ) {
3510                         _file->_debugInfoKind =  (_hasUUID ? ld::relocatable::File::kDebugInfoStabsUUID : ld::relocatable::File::kDebugInfoStabs);
3511                         ld::relocatable::File::Stab stab;
3512                         stab.atom       = NULL;
3513                         stab.type       = type;
3514                         stab.other      = sym.n_sect();
3515                         stab.desc       = sym.n_desc();
3516                         stab.value      = sym.n_value();
3517                         stab.string = NULL;
3518                         switch (state) {
3519                                 case start:
3520                                         switch (type) {
3521                                                 case N_BNSYM:
3522                                                         // beginning of function block
3523                                                         state = inBeginEnd;
3524                                                         // fall into case to lookup atom by addresss
3525                                                 case N_LCSYM:
3526                                                 case N_STSYM:
3527                                                         currentAtomAddress = sym.n_value();
3528                                                         currentAtom = this->findAtomByAddress(currentAtomAddress);
3529                                                         if ( currentAtom != NULL ) {
3530                                                                 stab.atom = currentAtom;
3531                                                                 stab.string = symString;
3532                                                         }
3533                                                         else {
3534                                                                 fprintf(stderr, "can't find atom for stabs BNSYM at %08llX in %s",
3535                                                                         (uint64_t)sym.n_value(), _path);
3536                                                         }
3537                                                         break;
3538                                                 case N_SO:
3539                                                 case N_OSO:
3540                                                 case N_OPT:
3541                                                 case N_LSYM:
3542                                                 case N_RSYM:
3543                                                 case N_PSYM:
3544                                                 case N_AST:
3545                                                         // not associated with an atom, just copy
3546                                                         stab.string = symString;
3547                                                         break;
3548                                                 case N_GSYM:
3549                                                 {
3550                                                         // n_value field is NOT atom address ;-(
3551                                                         // need to find atom by name match
3552                                                         const char* colon = strchr(symString, ':');
3553                                                         if ( colon != NULL ) {
3554                                                                 // build underscore leading name
3555                                                                 int nameLen = colon - symString;
3556                                                                 char symName[nameLen+2];
3557                                                                 strlcpy(&symName[1], symString, nameLen+1);
3558                                                                 symName[0] = '_';
3559                                                                 symName[nameLen+1] = '\0';
3560                                                                 currentAtom = this->findAtomByName(symName);
3561                                                                 if ( currentAtom != NULL ) {
3562                                                                         stab.atom = currentAtom;
3563                                                                         stab.string = symString;
3564                                                                 }
3565                                                         }
3566                                                         else {
3567                                                                 // might be a debug-note without trailing :G()
3568                                                                 currentAtom = this->findAtomByName(symString);
3569                                                                 if ( currentAtom != NULL ) {
3570                                                                         stab.atom = currentAtom;
3571                                                                         stab.string = symString;
3572                                                                 }
3573                                                         }
3574                                                         if ( stab.atom == NULL ) {
3575                                                                 // ld_classic added bogus GSYM stabs for old style dtrace probes
3576                                                                 if ( (strncmp(symString, "__dtrace_probe$", 15) != 0) )
3577                                                                         warning("can't find atom for N_GSYM stabs %s in %s", symString, _path);
3578                                                                 useStab = false;
3579                                                         }
3580                                                         break;
3581                                                 }
3582                                                 case N_FUN:
3583                                                         if ( isConstFunStabs(symString) ) {
3584                                                                 // constant not associated with a function
3585                                                                 stab.string = symString;
3586                                                         }
3587                                                         else {
3588                                                                 // old style stabs without BNSYM
3589                                                                 state = inFun;
3590                                                                 currentAtomAddress = sym.n_value();
3591                                                                 currentAtom = this->findAtomByAddress(currentAtomAddress);
3592                                                                 if ( currentAtom != NULL ) {
3593                                                                         stab.atom = currentAtom;
3594                                                                         stab.string = symString;
3595                                                                 }
3596                                                                 else {
3597                                                                         warning("can't find atom for stabs FUN at %08llX in %s",
3598                                                                                 (uint64_t)currentAtomAddress, _path);
3599                                                                 }
3600                                                         }
3601                                                         break;
3602                                                 case N_SOL:
3603                                                 case N_SLINE:
3604                                                         stab.string = symString;
3605                                                         // old stabs
3606                                                         break;
3607                                                 case N_BINCL:
3608                                                 case N_EINCL:
3609                                                 case N_EXCL:
3610                                                         stab.string = symString;
3611                                                         // -gfull built .o file
3612                                                         break;
3613                                                 default:
3614                                                         warning("unknown stabs type 0x%X in %s", type, _path);
3615                                         }
3616                                         break;
3617                                 case inBeginEnd:
3618                                         stab.atom = currentAtom;
3619                                         switch (type) {
3620                                                 case N_ENSYM:
3621                                                         state = start;
3622                                                         currentAtom = NULL;
3623                                                         break;
3624                                                 case N_LCSYM:
3625                                                 case N_STSYM:
3626                                                 {
3627                                                         Atom<A>* nestedAtom = this->findAtomByAddress(sym.n_value());
3628                                                         if ( nestedAtom != NULL ) {
3629                                                                 stab.atom = nestedAtom;
3630                                                                 stab.string = symString;
3631                                                         }
3632                                                         else {
3633                                                                 warning("can't find atom for stabs 0x%X at %08llX in %s",
3634                                                                         type, (uint64_t)sym.n_value(), _path);
3635                                                         }
3636                                                         break;
3637                                                 }
3638                                                 case N_LBRAC:
3639                                                 case N_RBRAC:
3640                                                 case N_SLINE:
3641                                                         // adjust value to be offset in atom
3642                                                         stab.value -= currentAtomAddress;
3643                                                 default:
3644                                                         stab.string = symString;
3645                                                         break;
3646                                         }
3647                                         break;
3648                                 case inFun:
3649                                         switch (type) {
3650                                                 case N_FUN:
3651                                                         if ( isConstFunStabs(symString) ) {
3652                                                                 stab.atom = currentAtom;
3653                                                                 stab.string = symString;
3654                                                         }
3655                                                         else {
3656                                                                 if ( sym.n_sect() != 0 ) {
3657                                                                         // found another start stab, must be really old stabs...
3658                                                                         currentAtomAddress = sym.n_value();
3659                                                                         currentAtom = this->findAtomByAddress(currentAtomAddress);
3660                                                                         if ( currentAtom != NULL ) {
3661                                                                                 stab.atom = currentAtom;
3662                                                                                 stab.string = symString;
3663                                                                         }
3664                                                                         else {
3665                                                                                 warning("can't find atom for stabs FUN at %08llX in %s",
3666                                                                                         (uint64_t)currentAtomAddress, _path);
3667                                                                         }
3668                                                                 }
3669                                                                 else {
3670                                                                         // found ending stab, switch back to start state
3671                                                                         stab.string = symString;
3672                                                                         stab.atom = currentAtom;
3673                                                                         state = start;
3674                                                                         currentAtom = NULL;
3675                                                                 }
3676                                                         }
3677                                                         break;
3678                                                 case N_LBRAC:
3679                                                 case N_RBRAC:
3680                                                 case N_SLINE:
3681                                                         // adjust value to be offset in atom
3682                                                         stab.value -= currentAtomAddress;
3683                                                         stab.atom = currentAtom;
3684                                                         break;
3685                                                 case N_SO:
3686                                                         stab.string = symString;
3687                                                         state = start;
3688                                                         break;
3689                                                 default:
3690                                                         stab.atom = currentAtom;
3691                                                         stab.string = symString;
3692                                                         break;
3693                                         }
3694                                         break;
3695                         }
3696                         // add to list of stabs for this .o file
3697                         if ( useStab )
3698                                 _file->_stabs.push_back(stab);
3699                 }
3700         }
3701 }
3702
3703
3704
3705 // Look at the compilation unit DIE and determine
3706 // its NAME, compilation directory (in COMP_DIR) and its
3707 // line number information offset (in STMT_LIST).  NAME and COMP_DIR
3708 // may be NULL (especially COMP_DIR) if they are not in the .o file;
3709 // STMT_LIST will be (uint64_t) -1.
3710 //
3711 // At present this assumes that there's only one compilation unit DIE.
3712 //
3713 template <typename A>
3714 bool Parser<A>::read_comp_unit(const char ** name, const char ** comp_dir,
3715                                                         uint64_t *stmt_list)
3716 {
3717         const uint8_t * debug_info;
3718         const uint8_t * debug_abbrev;
3719         const uint8_t * di;
3720         const uint8_t * da;
3721         const uint8_t * end;
3722         const uint8_t * enda;
3723         uint64_t sz;
3724         uint16_t vers;
3725         uint64_t abbrev_base;
3726         uint64_t abbrev;
3727         uint8_t address_size;
3728         bool dwarf64;
3729
3730         *name = NULL;
3731         *comp_dir = NULL;
3732         *stmt_list = (uint64_t) -1;
3733
3734         if ( (_file->_dwarfDebugInfoSect == NULL) || (_file->_dwarfDebugAbbrevSect == NULL) )
3735                 return false;
3736
3737         debug_info = (uint8_t*)_file->fileContent() + _file->_dwarfDebugInfoSect->offset();
3738         debug_abbrev = (uint8_t*)_file->fileContent() + _file->_dwarfDebugAbbrevSect->offset();
3739         di = debug_info;
3740
3741         if (_file->_dwarfDebugInfoSect->size() < 12)
3742                 /* Too small to be a real debug_info section.  */
3743                 return false;
3744         sz = A::P::E::get32(*(uint32_t*)di);
3745         di += 4;
3746         dwarf64 = sz == 0xffffffff;
3747         if (dwarf64)
3748                 sz = A::P::E::get64(*(uint64_t*)di), di += 8;
3749         else if (sz > 0xffffff00)
3750                 /* Unknown dwarf format.  */
3751                 return false;
3752
3753         /* Verify claimed size.  */
3754         if (sz + (di - debug_info) > _file->_dwarfDebugInfoSect->size() || sz <= (dwarf64 ? 23 : 11))
3755                 return false;
3756
3757         vers = A::P::E::get16(*(uint16_t*)di);
3758         if (vers < 2 || vers > 4)
3759         /* DWARF version wrong for this code.
3760            Chances are we could continue anyway, but we don't know for sure.  */
3761                 return false;
3762         di += 2;
3763
3764         /* Find the debug_abbrev section.  */
3765         abbrev_base = dwarf64 ? A::P::E::get64(*(uint64_t*)di) : A::P::E::get32(*(uint32_t*)di);
3766         di += dwarf64 ? 8 : 4;
3767
3768         if (abbrev_base > _file->_dwarfDebugAbbrevSect->size())
3769                 return false;
3770         da = debug_abbrev + abbrev_base;
3771         enda = debug_abbrev + _file->_dwarfDebugAbbrevSect->size();
3772
3773         address_size = *di++;
3774
3775         /* Find the abbrev number we're looking for.  */
3776         end = di + sz;
3777         abbrev = read_uleb128 (&di, end);
3778         if (abbrev == (uint64_t) -1)
3779                 return false;
3780
3781         /* Skip through the debug_abbrev section looking for that abbrev.  */
3782         for (;;)
3783         {
3784                 uint64_t this_abbrev = read_uleb128 (&da, enda);
3785                 uint64_t attr;
3786
3787                 if (this_abbrev == abbrev)
3788                         /* This is almost always taken.  */
3789                         break;
3790                 skip_leb128 (&da, enda); /* Skip the tag.  */
3791                 if (da == enda)
3792                         return false;
3793                 da++;  /* Skip the DW_CHILDREN_* value.  */
3794
3795                 do {
3796                         attr = read_uleb128 (&da, enda);
3797                         skip_leb128 (&da, enda);
3798                 } while (attr != 0 && attr != (uint64_t) -1);
3799                 if (attr != 0)
3800                         return false;
3801         }
3802
3803         /* Check that the abbrev is one for a DW_TAG_compile_unit.  */
3804         if (read_uleb128 (&da, enda) != DW_TAG_compile_unit)
3805         return false;
3806         if (da == enda)
3807         return false;
3808         da++;  /* Skip the DW_CHILDREN_* value.  */
3809
3810         /* Now, go through the DIE looking for DW_AT_name,
3811          DW_AT_comp_dir, and DW_AT_stmt_list.  */
3812         for (;;)
3813         {
3814                 uint64_t attr = read_uleb128 (&da, enda);
3815                 uint64_t form = read_uleb128 (&da, enda);
3816
3817                 if (attr == (uint64_t) -1)
3818                         return false;
3819                 else if (attr == 0)
3820                         return true;
3821                 if (form == DW_FORM_indirect)
3822                         form = read_uleb128 (&di, end);
3823
3824                 switch (attr) {
3825                         case DW_AT_name:
3826                                 *name = getDwarfString(form, di);
3827                                 break;
3828                         case DW_AT_comp_dir:
3829                                 *comp_dir = getDwarfString(form, di);
3830                                 break;
3831                         case DW_AT_stmt_list:
3832                                 *stmt_list = getDwarfOffset(form, di, dwarf64);
3833                                 break;
3834                         default:
3835                                 if (! skip_form (&di, end, form, address_size, dwarf64))
3836                                         return false;
3837                 }
3838         }
3839 }
3840
3841
3842
3843 template <typename A>
3844 File<A>::~File()
3845 {
3846         free(_sectionsArray);
3847         free(_atomsArray);
3848 }
3849
3850 template <typename A>
3851 const char* File<A>::translationUnitSource() const
3852 {
3853         return _dwarfTranslationUnitPath;
3854 }
3855
3856
3857
3858 template <typename A>
3859 bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
3860 {
3861         handler.doFile(*this);
3862         uint8_t* p = _atomsArray;
3863         for(int i=_atomsArrayCount; i > 0; --i) {
3864                 handler.doAtom(*((Atom<A>*)p));
3865                 p += sizeof(Atom<A>);
3866         }
3867         p = _aliasAtomsArray;
3868         for(int i=_aliasAtomsArrayCount; i > 0; --i) {
3869                 handler.doAtom(*((AliasAtom*)p));
3870                 p += sizeof(AliasAtom);
3871         }
3872
3873         return (_atomsArrayCount != 0) || (_aliasAtomsArrayCount != 0);
3874 }
3875
3876 template <typename A>
3877 const char* Section<A>::makeSegmentName(const macho_section<typename A::P>* sect)
3878 {
3879         // mach-o section record only has room for 16-byte seg/sect names
3880         // so a 16-byte name has no trailing zero
3881         const char* name = sect->segname();
3882         if ( strlen(name) < 16 )
3883                 return name;
3884         char* tmp = new char[17];
3885         strlcpy(tmp, name, 17);
3886         return tmp;
3887 }
3888
3889 template <typename A>
3890 const char* Section<A>::makeSectionName(const macho_section<typename A::P>* sect)
3891 {
3892         const char* name = sect->sectname();
3893         if ( strlen(name) < 16 )
3894                 return name;
3895
3896         // special case common long section names so we don't have to malloc
3897         if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
3898                 return "__objc_classrefs";
3899         if ( strncmp(sect->sectname(), "__objc_classlist", 16) == 0 )
3900                 return "__objc_classlist";
3901         if ( strncmp(sect->sectname(), "__objc_nlclslist", 16) == 0 )
3902                 return "__objc_nlclslist";
3903         if ( strncmp(sect->sectname(), "__objc_nlcatlist", 16) == 0 )
3904                 return "__objc_nlcatlist";
3905         if ( strncmp(sect->sectname(), "__objc_protolist", 16) == 0 )
3906                 return "__objc_protolist";
3907         if ( strncmp(sect->sectname(), "__objc_protorefs", 16) == 0 )
3908                 return "__objc_protorefs";
3909         if ( strncmp(sect->sectname(), "__objc_superrefs", 16) == 0 )
3910                 return "__objc_superrefs";
3911         if ( strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0 )
3912                 return "__objc_imageinfo";
3913         if ( strncmp(sect->sectname(), "__objc_stringobj", 16) == 0 )
3914                 return "__objc_stringobj";
3915         if ( strncmp(sect->sectname(), "__gcc_except_tab", 16) == 0 )
3916                 return "__gcc_except_tab";
3917
3918         char* tmp = new char[17];
3919         strlcpy(tmp, name, 17);
3920         return tmp;
3921 }
3922
3923 template <typename A>
3924 bool Section<A>::readable(const macho_section<typename A::P>* sect)
3925 {
3926         return true;
3927 }
3928
3929 template <typename A>
3930 bool Section<A>::writable(const macho_section<typename A::P>* sect)
3931 {
3932         // mach-o .o files do not contain segment permissions
3933         // we just know TEXT is special
3934         return ( strcmp(sect->segname(), "__TEXT") != 0 );
3935 }
3936
3937 template <typename A>
3938 bool Section<A>::exectuable(const macho_section<typename A::P>* sect)
3939 {
3940         // mach-o .o files do not contain segment permissions
3941         // we just know TEXT is special
3942         return ( strcmp(sect->segname(), "__TEXT") == 0 );
3943 }
3944
3945
3946 template <typename A>
3947 ld::Section::Type Section<A>::sectionType(const macho_section<typename A::P>* sect)
3948 {
3949         switch ( sect->flags() & SECTION_TYPE ) {
3950                 case S_ZEROFILL:
3951                         return ld::Section::typeZeroFill;
3952                 case S_CSTRING_LITERALS:
3953                         if ( (strcmp(sect->sectname(), "__cstring") == 0) && (strcmp(sect->segname(), "__TEXT") == 0) )
3954                                 return ld::Section::typeCString;
3955                         else
3956                                 return ld::Section::typeNonStdCString;
3957                 case S_4BYTE_LITERALS:
3958                         return ld::Section::typeLiteral4;
3959                 case S_8BYTE_LITERALS:
3960                         return ld::Section::typeLiteral8;
3961                 case S_LITERAL_POINTERS:
3962                         return ld::Section::typeCStringPointer;
3963                 case S_NON_LAZY_SYMBOL_POINTERS:
3964                         return ld::Section::typeNonLazyPointer;
3965                 case S_LAZY_SYMBOL_POINTERS:
3966                         return ld::Section::typeLazyPointer;
3967                 case S_SYMBOL_STUBS:
3968                         return ld::Section::typeStub;
3969                 case S_MOD_INIT_FUNC_POINTERS:
3970                         return ld::Section::typeInitializerPointers;
3971                 case S_MOD_TERM_FUNC_POINTERS:
3972                         return ld::Section::typeTerminatorPointers;
3973                 case S_INTERPOSING:
3974                         return ld::Section::typeUnclassified;
3975                 case S_16BYTE_LITERALS:
3976                         return ld::Section::typeLiteral16;
3977                 case S_REGULAR:
3978                 case S_COALESCED:
3979                         if ( sect->flags() & S_ATTR_PURE_INSTRUCTIONS ) {
3980                                 return ld::Section::typeCode;
3981                         }
3982                         else if ( strcmp(sect->segname(), "__TEXT") == 0 ) {
3983                                 if ( strcmp(sect->sectname(), "__eh_frame") == 0 )
3984                                         return ld::Section::typeCFI;
3985                                 else if ( strcmp(sect->sectname(), "__ustring") == 0 )
3986                                         return ld::Section::typeUTF16Strings;
3987                                 else if ( strcmp(sect->sectname(), "__textcoal_nt") == 0 )
3988                                         return ld::Section::typeCode;
3989                                 else if ( strcmp(sect->sectname(), "__StaticInit") == 0 )
3990                                         return ld::Section::typeCode;
3991                                 else if ( strcmp(sect->sectname(), "__constructor") == 0 )
3992                                         return ld::Section::typeInitializerPointers;
3993                         }
3994                         else if ( strcmp(sect->segname(), "__DATA") == 0 ) {
3995                                 if ( strcmp(sect->sectname(), "__cfstring") == 0 )
3996                                         return ld::Section::typeCFString;
3997                                 else if ( strcmp(sect->sectname(), "__dyld") == 0 )
3998                                         return ld::Section::typeDyldInfo;
3999                                 else if ( strcmp(sect->sectname(), "__program_vars") == 0 )
4000                                         return ld::Section::typeDyldInfo;
4001                                 else if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
4002                                         return ld::Section::typeObjCClassRefs;
4003                                 else if ( strcmp(sect->sectname(), "__objc_catlist") == 0 )
4004                                         return ld::Section::typeObjC2CategoryList;
4005                         }
4006                         else if ( strcmp(sect->segname(), "__OBJC") == 0 ) {
4007                                 if ( strcmp(sect->sectname(), "__class") == 0 )
4008                                         return ld::Section::typeObjC1Classes;
4009                         }
4010                         break;
4011                 case S_THREAD_LOCAL_REGULAR:
4012                         return ld::Section::typeTLVInitialValues;
4013                 case S_THREAD_LOCAL_ZEROFILL:
4014                         return ld::Section::typeTLVZeroFill;
4015                 case S_THREAD_LOCAL_VARIABLES:
4016                         return ld::Section::typeTLVDefs;
4017                 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
4018                         return ld::Section::typeTLVInitializerPointers;
4019         }
4020         return ld::Section::typeUnclassified;
4021 }
4022
4023
4024 template <typename A>
4025 Atom<A>* Section<A>::findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end)
4026 {
4027         // do a binary search of atom array
4028         uint32_t atomCount = end - start;
4029         Atom<A>* base = start;
4030         for (uint32_t n = atomCount; n > 0; n /= 2) {
4031                 Atom<A>* pivot = &base[n/2];
4032                 pint_t atomStartAddr = pivot->_objAddress;
4033                 pint_t atomEndAddr = atomStartAddr + pivot->_size;
4034                 if ( atomStartAddr <= addr ) {
4035                         // address in normal atom
4036                         if (addr < atomEndAddr)
4037                                 return pivot;
4038                         // address in "end" label (but not in alias)
4039                         if ( (pivot->_size == 0) && (addr == atomEndAddr) && !pivot->isAlias() )
4040                                 return pivot;
4041                 }
4042                 if ( addr >= atomEndAddr ) {
4043                         // key > pivot
4044                         // move base to atom after pivot
4045                         base = &pivot[1];
4046                         --n;
4047                 }
4048                 else {
4049                         // key < pivot
4050                         // keep same base
4051                 }
4052         }
4053         return NULL;
4054 }
4055
4056 template <typename A>
4057 ld::Atom::Alignment Section<A>::alignmentForAddress(pint_t addr)
4058 {
4059         const uint32_t sectionAlignment = this->_machOSection->align();
4060         uint32_t modulus = (addr % (1 << sectionAlignment));
4061         if ( modulus > 0xFFFF )
4062                 warning("alignment for symbol at address 0x%08llX in %s exceeds 2^16", (uint64_t)addr, this->file().path());
4063         return ld::Atom::Alignment(sectionAlignment, modulus);
4064 }
4065
4066 template <typename A>
4067 uint32_t Section<A>::sectionNum(class Parser<A>& parser) const
4068 {
4069         if ( _machOSection == NULL )
4070                 return 0;
4071         else
4072                 return 1 + (this->_machOSection - parser.firstMachOSection());
4073 }
4074
4075 // arm does not have zero cost exceptions
4076 template <>
4077 uint32_t CFISection<arm>::cfiCount(Parser<arm>& parser)
4078 {
4079         if ( parser.armUsesZeroCostExceptions() ) {
4080                 // create ObjectAddressSpace object for use by libunwind
4081                 OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
4082                 return libunwind::CFI_Parser<OAS>::getCFICount(oas,
4083                                                                                         this->_machOSection->addr(), this->_machOSection->size());
4084         }
4085         return 0;
4086 }
4087
4088 template <typename A>
4089 uint32_t CFISection<A>::cfiCount(Parser<A>& parser)
4090 {
4091         // create ObjectAddressSpace object for use by libunwind
4092         OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
4093         return libunwind::CFI_Parser<OAS>::getCFICount(oas,
4094                                                                                 this->_machOSection->addr(), this->_machOSection->size());
4095 }
4096
4097 template <typename A>
4098 void CFISection<A>::warnFunc(void* ref, uint64_t funcAddr, const char* msg)
4099 {
4100         Parser<A>* parser = (Parser<A>*)ref;
4101         if ( ! parser->warnUnwindConversionProblems() )
4102                 return;
4103         if ( funcAddr != CFI_INVALID_ADDRESS ) {
4104                 // atoms are not constructed yet, so scan symbol table for labels
4105                 const char* name = parser->scanSymbolTableForAddress(funcAddr);
4106                 warning("could not create compact unwind for %s: %s", name, msg);
4107         }
4108         else {
4109                 warning("could not create compact unwind: %s", msg);
4110         }
4111 }
4112
4113 template <>
4114 bool CFISection<x86_64>::needsRelocating()
4115 {
4116         return true;
4117 }
4118
4119 template <>
4120 bool CFISection<arm64>::needsRelocating()
4121 {
4122         return true;
4123 }
4124
4125 template <typename A>
4126 bool CFISection<A>::needsRelocating()
4127 {
4128         return false;
4129 }
4130
4131 template <>
4132 void CFISection<x86_64>::cfiParse(class Parser<x86_64>& parser, uint8_t* buffer,
4133                                                                         libunwind::CFI_Atom_Info<CFISection<x86_64>::OAS>::CFI_Atom_Info cfiArray[],
4134                                                                         uint32_t& count, const pint_t cuStarts[], uint32_t cuCount)
4135 {
4136         // copy __eh_frame data to buffer
4137         memcpy(buffer, file().fileContent() + this->_machOSection->offset(), this->_machOSection->size());
4138
4139         // and apply relocations
4140         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + this->_machOSection->reloff());
4141         const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
4142         for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
4143                 uint64_t value = 0;
4144                 switch ( reloc->r_type() ) {
4145                         case X86_64_RELOC_SUBTRACTOR:
4146                                 value =  0 - parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4147                                 ++reloc;
4148                                 if ( reloc->r_extern() )
4149                                         value += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4150                                 break;
4151                         case X86_64_RELOC_UNSIGNED:
4152                                 value = parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4153                                 break;
4154                         case X86_64_RELOC_GOT:
4155                                 // this is used for the reference to the personality function in CIEs
4156                                 // store the symbol number of the personality function for later use as a Fixup
4157                                 value = reloc->r_symbolnum();
4158                                 break;
4159                         default:
4160                                 fprintf(stderr, "CFISection::cfiParse() unexpected relocation type at r_address=0x%08X\n", reloc->r_address());
4161                                 break;
4162                 }
4163                 uint64_t*       p64;
4164                 uint32_t*       p32;
4165                 switch ( reloc->r_length() ) {
4166                         case 3:
4167                                 p64 = (uint64_t*)&buffer[reloc->r_address()];
4168                                 E::set64(*p64, value + E::get64(*p64));
4169                                 break;
4170                         case 2:
4171                                 p32 = (uint32_t*)&buffer[reloc->r_address()];
4172                                 E::set32(*p32, value + E::get32(*p32));
4173                                 break;
4174                         default:
4175                                 fprintf(stderr, "CFISection::cfiParse() unexpected relocation size at r_address=0x%08X\n", reloc->r_address());
4176                                 break;
4177                 }
4178         }
4179
4180         // create ObjectAddressSpace object for use by libunwind
4181         OAS oas(*this, buffer);
4182
4183         // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
4184         const char* msg;
4185         msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86_64>::parseCFIs(
4186                                                         oas, this->_machOSection->addr(), this->_machOSection->size(),
4187                                                         cuStarts, cuCount, parser.keepDwarfUnwind(), parser.forceDwarfConversion(), parser.neverConvertDwarf(),
4188                                                         cfiArray, count, (void*)&parser, warnFunc);
4189         if ( msg != NULL )
4190                 throwf("malformed __eh_frame section: %s", msg);
4191 }
4192
4193 template <>
4194 void CFISection<x86>::cfiParse(class Parser<x86>& parser, uint8_t* buffer,
4195                                                                         libunwind::CFI_Atom_Info<CFISection<x86>::OAS>::CFI_Atom_Info cfiArray[],
4196                                                                         uint32_t& count, const pint_t cuStarts[], uint32_t cuCount)
4197 {
4198         // create ObjectAddressSpace object for use by libunwind
4199         OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
4200
4201         // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
4202         const char* msg;
4203         msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86>::parseCFIs(
4204                                                         oas, this->_machOSection->addr(), this->_machOSection->size(),
4205                                                         cuStarts, cuCount, parser.keepDwarfUnwind(), parser.forceDwarfConversion(), parser.neverConvertDwarf(),
4206                                                         cfiArray, count, (void*)&parser, warnFunc);
4207         if ( msg != NULL )
4208                 throwf("malformed __eh_frame section: %s", msg);
4209 }
4210
4211
4212
4213
4214 template <>
4215 void CFISection<arm>::cfiParse(class Parser<arm>& parser, uint8_t* buffer,
4216                                                                         libunwind::CFI_Atom_Info<CFISection<arm>::OAS>::CFI_Atom_Info cfiArray[],
4217                                                                         uint32_t& count, const pint_t cuStarts[], uint32_t cuCount)
4218 {
4219         if ( !parser.armUsesZeroCostExceptions() ) {
4220                 // most arm do not use zero cost exceptions
4221                 assert(count == 0);
4222                 return;
4223         }
4224         // create ObjectAddressSpace object for use by libunwind
4225         OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
4226
4227         // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
4228         const char* msg;
4229         msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_arm>::parseCFIs(
4230                                                         oas, this->_machOSection->addr(), this->_machOSection->size(),
4231                                                         cuStarts, cuCount, parser.keepDwarfUnwind(), parser.forceDwarfConversion(), parser.neverConvertDwarf(),
4232                                                         cfiArray, count, (void*)&parser, warnFunc);
4233         if ( msg != NULL )
4234                 throwf("malformed __eh_frame section: %s", msg);
4235 }
4236
4237
4238
4239
4240 template <>
4241 void CFISection<arm64>::cfiParse(class Parser<arm64>& parser, uint8_t* buffer,
4242                                                                         libunwind::CFI_Atom_Info<CFISection<arm64>::OAS>::CFI_Atom_Info cfiArray[],
4243                                                                         uint32_t& count, const pint_t cuStarts[], uint32_t cuCount)
4244 {
4245         // copy __eh_frame data to buffer
4246         memcpy(buffer, file().fileContent() + this->_machOSection->offset(), this->_machOSection->size());
4247
4248         // and apply relocations
4249         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + this->_machOSection->reloff());
4250         const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
4251         for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
4252                 uint64_t* p64 = (uint64_t*)&buffer[reloc->r_address()];
4253                 uint32_t* p32 = (uint32_t*)&buffer[reloc->r_address()];
4254                 uint32_t addend32 = E::get32(*p32);
4255                 uint64_t addend64 = E::get64(*p64);
4256                 uint64_t value = 0;
4257                 switch ( reloc->r_type() ) {
4258                         case ARM64_RELOC_SUBTRACTOR:
4259                                 value =  0 - parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4260                                 ++reloc;
4261                                 if ( reloc->r_extern() )
4262                                         value += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4263                                 break;
4264                         case ARM64_RELOC_UNSIGNED:
4265                                 value = parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4266                                 break;
4267                         case ARM64_RELOC_POINTER_TO_GOT:
4268                                 // this is used for the reference to the personality function in CIEs
4269                                 // store the symbol number of the personality function for later use as a Fixup
4270                                 value = reloc->r_symbolnum();
4271                                 addend32 = 0;
4272                                 addend64 = 0;
4273                                 break;
4274                         default:
4275                                 fprintf(stderr, "CFISection::cfiParse() unexpected relocation type at r_address=0x%08X\n", reloc->r_address());
4276                                 break;
4277                 }
4278                 switch ( reloc->r_length() ) {
4279                         case 3:
4280                                 E::set64(*p64, value + addend64);
4281                                 break;
4282                         case 2:
4283                                 E::set32(*p32, value + addend32);
4284                                 break;
4285                         default:
4286                                 fprintf(stderr, "CFISection::cfiParse() unexpected relocation size at r_address=0x%08X\n", reloc->r_address());
4287                                 break;
4288                 }
4289         }
4290
4291
4292         // create ObjectAddressSpace object for use by libunwind
4293         OAS oas(*this, buffer);
4294
4295         // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
4296         const char* msg;
4297         msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_arm64>::parseCFIs(
4298                                                         oas, this->_machOSection->addr(), this->_machOSection->size(),
4299                                                         cuStarts, cuCount, parser.keepDwarfUnwind(), parser.forceDwarfConversion(), parser.neverConvertDwarf(),
4300                                                         cfiArray, count, (void*)&parser, warnFunc);
4301         if ( msg != NULL )
4302                 throwf("malformed __eh_frame section: %s", msg);
4303 }
4304
4305
4306 template <typename A>
4307 uint32_t CFISection<A>::computeAtomCount(class Parser<A>& parser,
4308                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
4309                                                                                         const struct Parser<A>::CFI_CU_InfoArrays& cfis)
4310 {
4311         return cfis.cfiCount;
4312 }
4313
4314
4315
4316 template <typename A>
4317 uint32_t CFISection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4318                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
4319                                                                         const struct Parser<A>::CFI_CU_InfoArrays& cfis)
4320 {
4321         this->_beginAtoms = (Atom<A>*)p;
4322         // walk CFI_Atom_Info array and create atom for each entry
4323         const CFI_Atom_Info* start = &cfis.cfiArray[0];
4324         const CFI_Atom_Info* end   = &cfis.cfiArray[cfis.cfiCount];
4325         for(const CFI_Atom_Info* a=start; a < end; ++a) {
4326                 Atom<A>* space = (Atom<A>*)p;
4327                 new (space) Atom<A>(*this, (a->isCIE ? "CIE" : "FDE"), a->address, a->size,
4328                                                                                 ld::Atom::definitionRegular, ld::Atom::combineNever, ld::Atom::scopeTranslationUnit,
4329                                                                                 ld::Atom::typeCFI, ld::Atom::symbolTableNotInFinalLinkedImages,
4330                                                                                 false, false, false, ld::Atom::Alignment(0));
4331                 p += sizeof(Atom<A>);
4332         }
4333         this->_endAtoms = (Atom<A>*)p;
4334         return cfis.cfiCount;
4335 }
4336
4337
4338 template <> bool CFISection<x86_64>::bigEndian() { return false; }
4339 template <> bool CFISection<x86>::bigEndian() { return false; }
4340 template <> bool CFISection<arm>::bigEndian() { return false; }
4341 template <> bool CFISection<arm64>::bigEndian() { return false; }
4342
4343
4344 template <>
4345 void CFISection<x86_64>::addCiePersonalityFixups(class Parser<x86_64>& parser, const CFI_Atom_Info* cieInfo)
4346 {
4347         uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
4348         if ( personalityEncoding == 0x9B ) {
4349                 // compiler always produces X86_64_RELOC_GOT with addend of 4 to personality function
4350                 // CFISection<x86_64>::cfiParse() set targetAddress to be symbolIndex + 4 + addressInCIE
4351                 uint32_t symbolIndex = cieInfo->u.cieInfo.personality.targetAddress - 4
4352                                                                         - cieInfo->address - cieInfo->u.cieInfo.personality.offsetInCFI;
4353                 const macho_nlist<P>& sym = parser.symbolFromIndex(symbolIndex);
4354                 const char* personalityName = parser.nameFromSymbol(sym);
4355
4356                 Atom<x86_64>* cieAtom = this->findAtomByAddress(cieInfo->address);
4357                 Parser<x86_64>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
4358                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, personalityName);
4359                 parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, 4);
4360                 parser.addFixup(src, ld::Fixup::k3of3, ld::Fixup::kindStoreX86PCRel32GOT);
4361         }
4362         else if ( personalityEncoding != 0 ) {
4363                 throwf("unsupported address encoding (%02X) of personality function in CIE",
4364                                 personalityEncoding);
4365         }
4366 }
4367
4368 template <>
4369 void CFISection<x86>::addCiePersonalityFixups(class Parser<x86>& parser, const CFI_Atom_Info* cieInfo)
4370 {
4371         uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
4372         if ( (personalityEncoding == 0x9B) || (personalityEncoding == 0x90) ) {
4373                 uint32_t offsetInCFI = cieInfo->u.cieInfo.personality.offsetInCFI;
4374                 uint32_t nlpAddr = cieInfo->u.cieInfo.personality.targetAddress;
4375                 Atom<x86>* cieAtom = this->findAtomByAddress(cieInfo->address);
4376                 Atom<x86>* nlpAtom = parser.findAtomByAddress(nlpAddr);
4377                 assert(nlpAtom->contentType() == ld::Atom::typeNonLazyPointer);
4378                 Parser<x86>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
4379
4380                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, nlpAtom);
4381                 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
4382                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, offsetInCFI);
4383                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
4384         }
4385         else if ( personalityEncoding != 0 ) {
4386                 throwf("unsupported address encoding (%02X) of personality function in CIE", personalityEncoding);
4387         }
4388 }
4389
4390 #if SUPPORT_ARCH_arm64
4391 template <>
4392 void CFISection<arm64>::addCiePersonalityFixups(class Parser<arm64>& parser, const CFI_Atom_Info* cieInfo)
4393 {
4394         uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
4395         if ( personalityEncoding == 0x9B ) {
4396                 // compiler always produces ARM64_RELOC_GOT r_pcrel=1 to personality function
4397                 // CFISection<arm64>::cfiParse() set targetAddress to be symbolIndex + addressInCIE
4398                 uint32_t symbolIndex = cieInfo->u.cieInfo.personality.targetAddress
4399                                                                         - cieInfo->address - cieInfo->u.cieInfo.personality.offsetInCFI;
4400                 const macho_nlist<P>& sym = parser.symbolFromIndex(symbolIndex);
4401                 const char* personalityName = parser.nameFromSymbol(sym);
4402
4403                 Atom<arm64>* cieAtom = this->findAtomByAddress(cieInfo->address);
4404                 Parser<arm64>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
4405                 parser.addFixup(src, ld::Fixup::k1of2, ld::Fixup::kindSetTargetAddress, false, personalityName);
4406                 parser.addFixup(src, ld::Fixup::k2of2, ld::Fixup::kindStoreARM64PCRelToGOT);
4407         }
4408         else if ( personalityEncoding != 0 ) {
4409                 throwf("unsupported address encoding (%02X) of personality function in CIE",
4410                                 personalityEncoding);
4411         }
4412 }
4413 #endif
4414
4415 template <>
4416 void CFISection<arm>::addCiePersonalityFixups(class Parser<arm>& parser, const CFI_Atom_Info* cieInfo)
4417 {
4418         uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
4419         if ( (personalityEncoding == 0x9B) || (personalityEncoding == 0x90) ) {
4420                 uint32_t offsetInCFI = cieInfo->u.cieInfo.personality.offsetInCFI;
4421                 uint32_t nlpAddr = cieInfo->u.cieInfo.personality.targetAddress;
4422                 Atom<arm>* cieAtom = this->findAtomByAddress(cieInfo->address);
4423                 Atom<arm>* nlpAtom = parser.findAtomByAddress(nlpAddr);
4424                 assert(nlpAtom->contentType() == ld::Atom::typeNonLazyPointer);
4425                 Parser<arm>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
4426
4427                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, nlpAtom);
4428                 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
4429                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, offsetInCFI);
4430                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
4431         }
4432         else if ( personalityEncoding != 0 ) {
4433                 throwf("unsupported address encoding (%02X) of personality function in CIE", personalityEncoding);
4434         }
4435 }
4436
4437
4438
4439 template <typename A>
4440 void CFISection<A>::addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo)
4441 {
4442         assert(0 && "addCiePersonalityFixups() not implemented for arch");
4443 }
4444
4445 template <typename A>
4446 void CFISection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cfis)
4447 {
4448         ld::Fixup::Kind store32 = bigEndian() ? ld::Fixup::kindStoreBigEndian32 : ld::Fixup::kindStoreLittleEndian32;
4449         ld::Fixup::Kind store64 = bigEndian() ? ld::Fixup::kindStoreBigEndian64 : ld::Fixup::kindStoreLittleEndian64;
4450
4451         // add all references for FDEs, including implicit group references
4452         const CFI_Atom_Info* end = &cfis.cfiArray[cfis.cfiCount];
4453         for(const CFI_Atom_Info* p = &cfis.cfiArray[0]; p < end; ++p) {
4454                 if ( p->isCIE ) {
4455                         // add reference to personality function if used
4456                         if ( p->u.cieInfo.personality.targetAddress != CFI_INVALID_ADDRESS ) {
4457                                 this->addCiePersonalityFixups(parser, p);
4458                         }
4459                 }
4460                 else {
4461                         // find FDE Atom
4462                         Atom<A>* fdeAtom = this->findAtomByAddress(p->address);
4463                         // find function Atom
4464                         Atom<A>* functionAtom = parser.findAtomByAddress(p->u.fdeInfo.function.targetAddress);
4465                         // find CIE Atom
4466                         Atom<A>* cieAtom = this->findAtomByAddress(p->u.fdeInfo.cie.targetAddress);
4467                         // find LSDA Atom
4468                         Atom<A>* lsdaAtom = NULL;
4469                         if ( p->u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS ) {
4470                                 lsdaAtom = parser.findAtomByAddress(p->u.fdeInfo.lsda.targetAddress);
4471                         }
4472                         // add reference from FDE to CIE (always 32-bit pc-rel)
4473                         typename Parser<A>::SourceLocation fdeToCieSrc(fdeAtom, p->u.fdeInfo.cie.offsetInCFI);
4474                         parser.addFixup(fdeToCieSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, fdeAtom);
4475                         parser.addFixup(fdeToCieSrc, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, p->u.fdeInfo.cie.offsetInCFI);
4476                         parser.addFixup(fdeToCieSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
4477                         parser.addFixup(fdeToCieSrc, ld::Fixup::k4of4, store32, cieAtom);
4478
4479                         // add reference from FDE to function
4480                         typename Parser<A>::SourceLocation fdeToFuncSrc(fdeAtom, p->u.fdeInfo.function.offsetInCFI);
4481                         switch (p->u.fdeInfo.function.encodingOfTargetAddress) {
4482                                 case DW_EH_PE_pcrel|DW_EH_PE_ptr:
4483                                         if ( sizeof(typename A::P::uint_t) == 8 ) {
4484                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
4485                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4486                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
4487                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store64);
4488                                                 break;
4489                                         }
4490                                         // else fall into 32-bit case
4491                                 case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
4492                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
4493                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4494                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
4495                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store32);
4496                                         break;
4497                                 default:
4498                                         throw "unsupported encoding in FDE of pointer to function";
4499                         }
4500
4501                         // add reference from FDE to LSDA
4502                         typename Parser<A>::SourceLocation fdeToLsdaSrc(fdeAtom,  p->u.fdeInfo.lsda.offsetInCFI);
4503                         if ( lsdaAtom != NULL ) {
4504                                 switch (p->u.fdeInfo.lsda.encodingOfTargetAddress) {
4505                                         case DW_EH_PE_pcrel|DW_EH_PE_ptr:
4506                                                 if ( sizeof(typename A::P::uint_t) == 8 ) {
4507                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
4508                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4509                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
4510                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store64);
4511                                                         break;
4512                                                 }
4513                                                 // else fall into 32-bit case
4514                                         case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
4515                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
4516                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4517                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
4518                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store32);
4519                                         break;
4520                                         default:
4521                                                 throw "unsupported encoding in FDE of pointer to LSDA";
4522                                 }
4523                         }
4524
4525                         // FDE is in group lead by function atom
4526                         typename Parser<A>::SourceLocation fdeSrc(functionAtom,0);
4527                         parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateFDE, fdeAtom);
4528
4529                         // LSDA is in group lead by function atom
4530                         if ( lsdaAtom != NULL ) {
4531                                 parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, lsdaAtom);
4532                         }
4533                 }
4534         }
4535 }
4536
4537
4538
4539
4540 template <typename A>
4541 const void*      CFISection<A>::OAS::mappedAddress(pint_t addr)
4542 {
4543         if ( (_ehFrameStartAddr <= addr) && (addr < _ehFrameEndAddr) )
4544                 return &_ehFrameContent[addr-_ehFrameStartAddr];
4545         else {
4546                 // requested bytes are not in __eh_frame section
4547                 // this can occur when examining the instruction bytes in the __text
4548                 File<A>& file = _ehFrameSection.file();
4549                 for (uint32_t i=0; i < file._sectionsArrayCount; ++i ) {
4550                         const macho_section<typename A::P>* sect = file._sectionsArray[i]->machoSection();
4551                         // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
4552                         if ( sect != NULL ) {
4553                                 if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
4554                                         return file.fileContent() + sect->offset() + addr - sect->addr();
4555                                 }
4556                         }
4557                 }
4558                 throwf("__eh_frame parsing problem.  Can't find target of reference to address 0x%08llX", (uint64_t)addr);
4559         }
4560 }
4561
4562
4563 template <typename A>
4564 uint64_t CFISection<A>::OAS::getULEB128(pint_t& logicalAddr, pint_t end)
4565 {
4566         uintptr_t size = (end - logicalAddr);
4567         libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
4568         libunwind::LocalAddressSpace::pint_t sladdr = laddr;
4569         uint64_t result = libunwind::LocalAddressSpace::getULEB128(laddr, laddr+size);
4570         logicalAddr += (laddr-sladdr);
4571         return result;
4572 }
4573
4574 template <typename A>
4575 int64_t CFISection<A>::OAS::getSLEB128(pint_t& logicalAddr, pint_t end)
4576 {
4577         uintptr_t size = (end - logicalAddr);
4578         libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
4579         libunwind::LocalAddressSpace::pint_t sladdr = laddr;
4580         int64_t result = libunwind::LocalAddressSpace::getSLEB128(laddr, laddr+size);
4581         logicalAddr += (laddr-sladdr);
4582         return result;
4583 }
4584
4585 template <typename A>
4586 typename A::P::uint_t CFISection<A>::OAS::getEncodedP(pint_t& addr, pint_t end, uint8_t encoding)
4587 {
4588         pint_t startAddr = addr;
4589         pint_t p = addr;
4590         pint_t result;
4591
4592         // first get value
4593         switch (encoding & 0x0F) {
4594                 case DW_EH_PE_ptr:
4595                         result = getP(addr);
4596                         p += sizeof(pint_t);
4597                         addr = (pint_t)p;
4598                         break;
4599                 case DW_EH_PE_uleb128:
4600                         result = getULEB128(addr, end);
4601                         break;
4602                 case DW_EH_PE_udata2:
4603                         result = get16(addr);
4604                         p += 2;
4605                         addr = (pint_t)p;
4606                         break;
4607                 case DW_EH_PE_udata4:
4608                         result = get32(addr);
4609                         p += 4;
4610                         addr = (pint_t)p;
4611                         break;
4612                 case DW_EH_PE_udata8:
4613                         result = get64(addr);
4614                         p += 8;
4615                         addr = (pint_t)p;
4616                         break;
4617                 case DW_EH_PE_sleb128:
4618                         result = getSLEB128(addr, end);
4619                         break;
4620                 case DW_EH_PE_sdata2:
4621                         result = (int16_t)get16(addr);
4622                         p += 2;
4623                         addr = (pint_t)p;
4624                         break;
4625                 case DW_EH_PE_sdata4:
4626                         result = (int32_t)get32(addr);
4627                         p += 4;
4628                         addr = (pint_t)p;
4629                         break;
4630                 case DW_EH_PE_sdata8:
4631                         result = get64(addr);
4632                         p += 8;
4633                         addr = (pint_t)p;
4634                         break;
4635                 default:
4636                         throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4637         }
4638
4639         // then add relative offset
4640         switch ( encoding & 0x70 ) {
4641                 case DW_EH_PE_absptr:
4642                         // do nothing
4643                         break;
4644                 case DW_EH_PE_pcrel:
4645                         result += startAddr;
4646                         break;
4647                 case DW_EH_PE_textrel:
4648                         throw "DW_EH_PE_textrel pointer encoding not supported";
4649                         break;
4650                 case DW_EH_PE_datarel:
4651                         throw "DW_EH_PE_datarel pointer encoding not supported";
4652                         break;
4653                 case DW_EH_PE_funcrel:
4654                         throw "DW_EH_PE_funcrel pointer encoding not supported";
4655                         break;
4656                 case DW_EH_PE_aligned:
4657                         throw "DW_EH_PE_aligned pointer encoding not supported";
4658                         break;
4659                 default:
4660                         throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4661                         break;
4662         }
4663
4664 //  Note: DW_EH_PE_indirect is only used in CIEs to refernce the personality pointer
4665 //  When parsing .o files that pointer contains zero, so we don't to return that.
4666 //  Instead we skip the dereference and return the address of the pointer.
4667 //      if ( encoding & DW_EH_PE_indirect )
4668 //              result = getP(result);
4669
4670         return result;
4671 }
4672
4673 template <>
4674 const char* CUSection<x86_64>::personalityName(class Parser<x86_64>& parser, const macho_relocation_info<x86_64::P>* reloc)
4675 {
4676         if ( reloc->r_extern() ) {
4677                 assert((reloc->r_type() == X86_64_RELOC_UNSIGNED) && "wrong reloc type on personality column in __compact_unwind section");
4678                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4679                 return parser.nameFromSymbol(sym);
4680         }
4681         else {
4682                 const pint_t* content = (pint_t*)(this->file().fileContent() + this->_machOSection->offset() + reloc->r_address());
4683                 pint_t personalityAddr = *content;
4684                 assert((parser.sectionForAddress(personalityAddr)->type() == ld::Section::typeCode) && "personality column in __compact_unwind section is not pointer to function");
4685                 // atoms may not be constructed yet, so scan symbol table for labels
4686                 const char* name = parser.scanSymbolTableForAddress(personalityAddr);
4687                 return name;
4688         }
4689 }
4690
4691 template <>
4692 const char* CUSection<x86>::personalityName(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
4693 {
4694         if ( reloc->r_extern() ) {
4695                 assert((reloc->r_type() == GENERIC_RELOC_VANILLA) && "wrong reloc type on personality column in __compact_unwind section");
4696                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4697                 return parser.nameFromSymbol(sym);
4698         }
4699         else {
4700                 // support __LD, __compact_unwind personality entries which are pointer to personality non-lazy pointer
4701                 const pint_t* content = (pint_t*)(this->file().fileContent() + this->_machOSection->offset() + reloc->r_address());
4702                 pint_t nlPointerAddr = *content;
4703                 Section<x86>* nlSection = parser.sectionForAddress(nlPointerAddr);
4704                 if ( nlSection->type() == ld::Section::typeCode ) {
4705                         // personality function is defined in this .o file, so this is a direct reference to it
4706                         // atoms may not be constructed yet, so scan symbol table for labels
4707                         const char* name = parser.scanSymbolTableForAddress(nlPointerAddr);
4708                         return name;
4709                 }
4710                 else {
4711                         uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(nlPointerAddr, nlSection->machoSection());
4712                         const macho_nlist<P>& nlSymbol = parser.symbolFromIndex(symIndex);
4713                         return parser.nameFromSymbol(nlSymbol);
4714                 }
4715         }
4716 }
4717
4718 #if SUPPORT_ARCH_arm64
4719 template <>
4720 const char* CUSection<arm64>::personalityName(class Parser<arm64>& parser, const macho_relocation_info<arm64::P>* reloc)
4721 {
4722         if ( reloc->r_extern() ) {
4723                 assert((reloc->r_type() == ARM64_RELOC_UNSIGNED) && "wrong reloc type on personality column in __compact_unwind section");
4724                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4725                 return parser.nameFromSymbol(sym);
4726         }
4727         else {
4728                 const pint_t* content = (pint_t*)(this->file().fileContent() + this->_machOSection->offset() + reloc->r_address());
4729                 pint_t personalityAddr = *content;
4730                 Section<arm64>* personalitySection = parser.sectionForAddress(personalityAddr);
4731                 assert((personalitySection->type() == ld::Section::typeCode) && "personality column in __compact_unwind section is not pointer to function");
4732                 // atoms may not be constructed yet, so scan symbol table for labels
4733                 const char* name = parser.scanSymbolTableForAddress(personalityAddr);
4734                 return name;
4735         }
4736 }
4737 #endif
4738
4739 #if SUPPORT_ARCH_arm_any
4740 template <>
4741 const char* CUSection<arm>::personalityName(class Parser<arm>& parser, const macho_relocation_info<arm::P>* reloc)
4742 {
4743         if ( reloc->r_extern() ) {
4744                 assert((reloc->r_type() == ARM_RELOC_VANILLA) && "wrong reloc type on personality column in __compact_unwind section");
4745                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4746                 return parser.nameFromSymbol(sym);
4747         }
4748         else {
4749                 // support __LD, __compact_unwind personality entries which are pointer to personality non-lazy pointer
4750                 const pint_t* content = (pint_t*)(this->file().fileContent() + this->_machOSection->offset() + reloc->r_address());
4751                 pint_t nlPointerAddr = *content;
4752                 Section<arm>* nlSection = parser.sectionForAddress(nlPointerAddr);
4753                 if ( nlSection->type() == ld::Section::typeCode ) {
4754                         // personality function is defined in this .o file, so this is a direct reference to it
4755                         // atoms may not be constructed yet, so scan symbol table for labels
4756                         const char* name = parser.scanSymbolTableForAddress(nlPointerAddr);
4757                         return name;
4758                 }
4759                 else {
4760                         uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(nlPointerAddr, nlSection->machoSection());
4761                         const macho_nlist<P>& nlSymbol = parser.symbolFromIndex(symIndex);
4762                         return parser.nameFromSymbol(nlSymbol);
4763                 }
4764         }
4765 }
4766 #endif
4767
4768
4769 template <typename A>
4770 const char* CUSection<A>::personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
4771 {
4772         return NULL;
4773 }
4774
4775 template <>
4776 bool CUSection<x86>::encodingMeansUseDwarf(compact_unwind_encoding_t enc)
4777 {
4778         return ((enc & UNWIND_X86_MODE_MASK) == UNWIND_X86_MODE_DWARF);
4779 }
4780
4781 template <>
4782 bool CUSection<x86_64>::encodingMeansUseDwarf(compact_unwind_encoding_t enc)
4783 {
4784         return ((enc & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_DWARF);
4785 }
4786
4787 #if SUPPORT_ARCH_arm_any
4788 template <>
4789 bool CUSection<arm>::encodingMeansUseDwarf(compact_unwind_encoding_t enc)
4790 {
4791         return ((enc & UNWIND_ARM_MODE_MASK) == UNWIND_ARM_MODE_DWARF);
4792 }
4793 #endif
4794
4795 #if SUPPORT_ARCH_arm64
4796 template <>
4797 bool CUSection<arm64>::encodingMeansUseDwarf(compact_unwind_encoding_t enc)
4798 {
4799         return ((enc & UNWIND_ARM64_MODE_MASK) == UNWIND_ARM64_MODE_DWARF);
4800 }
4801 #endif
4802
4803 template <typename A>
4804 int CUSection<A>::infoSorter(const void* l, const void* r)
4805 {
4806         // sort references by symbol index, then address
4807         const Info* left = (Info*)l;
4808         const Info* right = (Info*)r;
4809         if ( left->functionSymbolIndex == right->functionSymbolIndex )
4810                 return (left->functionStartAddress - right->functionStartAddress);
4811         else
4812                 return (left->functionSymbolIndex - right->functionSymbolIndex);
4813 }
4814
4815 template <typename A>
4816 void CUSection<A>::parse(class Parser<A>& parser, uint32_t cnt, Info array[])
4817 {
4818         // walk section content and copy to Info array
4819         const macho_compact_unwind_entry<P>* const entries = (macho_compact_unwind_entry<P>*)(this->file().fileContent() + this->_machOSection->offset());
4820         for (uint32_t i=0; i < cnt; ++i) {
4821                 Info* info = &array[i];
4822                 const macho_compact_unwind_entry<P>* entry = &entries[i];
4823                 info->functionStartAddress      = entry->codeStart();
4824                 info->functionSymbolIndex   = 0xFFFFFFFF;
4825                 info->rangeLength                       = entry->codeLen();
4826                 info->compactUnwindInfo         = entry->compactUnwindInfo();
4827                 info->personality                       = NULL;
4828                 info->lsdaAddress                       = entry->lsda();
4829                 info->function                          = NULL;
4830                 info->lsda                                      = NULL;
4831                 if ( (info->compactUnwindInfo & UNWIND_PERSONALITY_MASK) != 0 )
4832                         warning("no bits should be set in UNWIND_PERSONALITY_MASK of compact unwind encoding in __LD,__compact_unwind section");
4833                 if ( info->lsdaAddress != 0 ) {
4834                         info->compactUnwindInfo |= UNWIND_HAS_LSDA;
4835                 }
4836         }
4837
4838         // scan relocs, extern relocs are needed for personality references (possibly for function/lsda refs??)
4839         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(this->file().fileContent() + this->_machOSection->reloff());
4840         const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
4841         for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
4842                 if ( reloc->r_extern() ) {
4843                         // only expect external relocs on some colummns
4844                         if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::personalityFieldOffset() ) {
4845                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4846                                 array[entryIndex].personality = this->personalityName(parser, reloc);
4847                         }
4848                         else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::lsdaFieldOffset() ) {
4849                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4850                                 const macho_nlist<P>& lsdaSym = parser.symbolFromIndex(reloc->r_symbolnum());
4851                                 if ( (lsdaSym.n_type() & N_TYPE) == N_SECT )
4852                                         array[entryIndex].lsdaAddress = lsdaSym.n_value();
4853                                 else
4854                                         warning("unexpected extern relocation to lsda in __compact_unwind section");
4855                         }
4856                         else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::codeStartFieldOffset() ) {
4857                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4858                                 array[entryIndex].functionSymbolIndex = reloc->r_symbolnum();
4859                                 array[entryIndex].functionStartAddress += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
4860                         }
4861                         else {
4862                                 warning("unexpected extern relocation in __compact_unwind section");
4863                         }
4864                 }
4865                 else {
4866                         if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::personalityFieldOffset() ) {
4867                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4868                                 array[entryIndex].personality = this->personalityName(parser, reloc);
4869                         }
4870                 }
4871         }
4872
4873         // sort array by function start address so unwind infos will be contiguous for a given function
4874         ::qsort(array, cnt, sizeof(Info), infoSorter);
4875 }
4876
4877 template <typename A>
4878 uint32_t CUSection<A>::count()
4879 {
4880         const macho_section<P>* machoSect =     this->machoSection();
4881         if ( (machoSect->size() % sizeof(macho_compact_unwind_entry<P>)) != 0 )
4882                 throw "malformed __LD,__compact_unwind section, bad length";
4883
4884         return machoSect->size() / sizeof(macho_compact_unwind_entry<P>);
4885 }
4886
4887 template <typename A>
4888 void CUSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cus)
4889 {
4890         Info* const arrayStart = cus.cuArray;
4891         Info* const arrayEnd = &cus.cuArray[cus.cuCount];
4892         for (Info* info=arrayStart; info < arrayEnd; ++info) {
4893                 // find function atom from address
4894                 info->function = parser.findAtomByAddress(info->functionStartAddress);
4895                 // find lsda atom from address
4896                 if ( info->lsdaAddress != 0 ) {
4897                         info->lsda = parser.findAtomByAddress(info->lsdaAddress);
4898                         // add lsda subordinate
4899                         typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4900                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, info->lsda);
4901                 }
4902                 if ( info->personality != NULL ) {
4903                         // add personality subordinate
4904                         typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4905                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinatePersonality, false, info->personality);
4906                 }
4907         }
4908
4909 }
4910
4911 template <typename A>
4912 SymboledSection<A>::SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
4913         : Section<A>(f, s), _type(ld::Atom::typeUnclassified)
4914 {
4915         switch ( s->flags() & SECTION_TYPE ) {
4916                 case S_ZEROFILL:
4917                         _type = ld::Atom::typeZeroFill;
4918                         break;
4919                 case S_MOD_INIT_FUNC_POINTERS:
4920                         _type = ld::Atom::typeInitializerPointers;
4921                         break;
4922                 case S_MOD_TERM_FUNC_POINTERS:
4923                         _type = ld::Atom::typeTerminatorPointers;
4924                         break;
4925                 case S_THREAD_LOCAL_VARIABLES:
4926                         _type = ld::Atom::typeTLV;
4927                         break;
4928                 case S_THREAD_LOCAL_ZEROFILL:
4929                         _type = ld::Atom::typeTLVZeroFill;
4930                         break;
4931                 case S_THREAD_LOCAL_REGULAR:
4932                         _type = ld::Atom::typeTLVInitialValue;
4933                         break;
4934                 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
4935                         _type = ld::Atom::typeTLVInitializerPointers;
4936                         break;
4937                 case S_REGULAR:
4938                         if ( strncmp(s->sectname(), "__gcc_except_tab", 16) == 0 )
4939                                 _type = ld::Atom::typeLSDA;
4940                         else if ( this->type() == ld::Section::typeInitializerPointers )
4941                                 _type = ld::Atom::typeInitializerPointers;
4942                         break;
4943         }
4944 }
4945
4946
4947 template <typename A>
4948 bool SymboledSection<A>::dontDeadStrip()
4949 {
4950         switch ( _type ) {
4951                 case ld::Atom::typeInitializerPointers:
4952                 case ld::Atom::typeTerminatorPointers:
4953                         return true;
4954                 default:
4955                         // model an object file without MH_SUBSECTIONS_VIA_SYMBOLS as one in which nothing can be dead stripped
4956                         if ( ! this->_file.canScatterAtoms() )
4957                                 return true;
4958                         // call inherited
4959                         return Section<A>::dontDeadStrip();
4960         }
4961         return false;
4962 }
4963
4964
4965 template <typename A>
4966 uint32_t SymboledSection<A>::computeAtomCount(class Parser<A>& parser,
4967                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
4968                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&)
4969 {
4970         const pint_t startAddr = this->_machOSection->addr();
4971         const pint_t endAddr = startAddr + this->_machOSection->size();
4972         const uint32_t sectNum = this->sectionNum(parser);
4973
4974         uint32_t count = 0;
4975         pint_t  addr;
4976         pint_t  size;
4977         const macho_nlist<P>* sym;
4978         while ( it.next(parser, *this, sectNum, startAddr, endAddr, &addr, &size, &sym) ) {
4979                 ++count;
4980         }
4981         //fprintf(stderr, "computeAtomCount(%s,%s) => %d\n", this->segmentName(), this->sectionName(), count);
4982         return count;
4983 }
4984
4985 template <typename A>
4986 uint32_t SymboledSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4987                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
4988                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
4989 {
4990         this->_beginAtoms = (Atom<A>*)p;
4991
4992         //fprintf(stderr, "SymboledSection::appendAtoms() in section %s\n", this->_machOSection->sectname());
4993         const pint_t startAddr = this->_machOSection->addr();
4994         const pint_t endAddr = startAddr + this->_machOSection->size();
4995         const uint32_t sectNum = this->sectionNum(parser);
4996
4997         uint32_t count = 0;
4998         pint_t  addr;
4999         pint_t  size;
5000         const macho_nlist<P>* label;
5001         while ( it.next(parser, *this, sectNum, startAddr, endAddr, &addr, &size, &label) ) {
5002                 Atom<A>* allocatedSpace = (Atom<A>*)p;
5003                 // is break because of label or CFI?
5004                 if ( label != NULL ) {
5005                         // The size is computed based on the address of the next label (or the end of the section for the last label)
5006                         // If there are two labels at the same address, we want them one to be an alias of the other.
5007                         // If the label is at the end of a section, it is has zero size, but is not an alias
5008                         const bool isAlias = ( (size == 0) && (addr <  endAddr) );
5009                         new (allocatedSpace) Atom<A>(*this, parser, *label, size, isAlias);
5010                         if ( isAlias )
5011                                 this->_hasAliases = true;
5012                         if ( parser.altEntryFromSymbol(*label) )
5013                                 this->_altEntries.insert(allocatedSpace);
5014                 }
5015                 else {
5016                         ld::Atom::SymbolTableInclusion inclusion = ld::Atom::symbolTableNotIn;
5017                         ld::Atom::ContentType ctype = this->contentType();
5018                         if ( ctype == ld::Atom::typeLSDA )
5019                                 inclusion = ld::Atom::symbolTableInWithRandomAutoStripLabel;
5020                         new (allocatedSpace) Atom<A>(*this, "anon", addr, size, ld::Atom::definitionRegular, ld::Atom::combineNever,
5021                                                                                 ld::Atom::scopeTranslationUnit, ctype, inclusion,
5022                                                                                 this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
5023                 }
5024                 p += sizeof(Atom<A>);
5025                 ++count;
5026         }
5027
5028         this->_endAtoms = (Atom<A>*)p;
5029         return count;
5030 }
5031
5032
5033 template <>
5034 ld::Atom::SymbolTableInclusion ImplicitSizeSection<arm64>::symbolTableInclusion()
5035 {
5036         return ld::Atom::symbolTableInWithRandomAutoStripLabel;
5037 }
5038
5039 template <typename A>
5040 ld::Atom::SymbolTableInclusion ImplicitSizeSection<A>::symbolTableInclusion()
5041 {
5042         return ld::Atom::symbolTableNotIn;
5043 }
5044
5045
5046 template <typename A>
5047 uint32_t ImplicitSizeSection<A>::computeAtomCount(class Parser<A>& parser,
5048                                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
5049                                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
5050 {
5051         uint32_t count = 0;
5052         const macho_section<P>* sect = this->machoSection();
5053         const pint_t startAddr = sect->addr();
5054         const pint_t endAddr = startAddr + sect->size();
5055         for (pint_t addr = startAddr; addr < endAddr; addr += elementSizeAtAddress(addr) ) {
5056                 if ( useElementAt(parser, it, addr) )
5057                         ++count;
5058         }
5059         if ( it.fileHasOverlappingSymbols && (sect->size() != 0) && (this->combine(parser, startAddr) == ld::Atom::combineByNameAndContent) ) {
5060                 // if there are multiple labels in this section for the same address, then clone them into multi atoms
5061                 pint_t  prevSymbolAddr = (pint_t)(-1);
5062                 uint8_t prevSymbolSectNum = 0;
5063                 bool prevIgnore = false;
5064                 for(uint32_t i=0; i < it.sortedSymbolCount; ++i) {
5065                         const macho_nlist<P>& sym = parser.symbolFromIndex(it.sortedSymbolIndexes[i]);
5066                         const pint_t symbolAddr = sym.n_value();
5067                         const uint8_t symbolSectNum = sym.n_sect();
5068                         const bool ignore = this->ignoreLabel(parser.nameFromSymbol(sym));
5069                         if ( !ignore && !prevIgnore && (symbolAddr == prevSymbolAddr) && (prevSymbolSectNum == symbolSectNum) && (symbolSectNum == this->sectionNum(parser)) ) {
5070                                 ++count;
5071                         }
5072                         prevSymbolAddr = symbolAddr;
5073                         prevSymbolSectNum = symbolSectNum;
5074                         prevIgnore = ignore;
5075                 }
5076         }
5077         return count;
5078 }
5079
5080 template <typename A>
5081 uint32_t ImplicitSizeSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
5082                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
5083                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
5084 {
5085         this->_beginAtoms = (Atom<A>*)p;
5086
5087         const macho_section<P>* sect = this->machoSection();
5088         const pint_t startAddr = sect->addr();
5089         const pint_t endAddr = startAddr + sect->size();
5090         const uint32_t sectNum = this->sectionNum(parser);
5091         //fprintf(stderr, "ImplicitSizeSection::appendAtoms() in section %s\n", sect->sectname());
5092         uint32_t count = 0;
5093         pint_t  foundAddr;
5094         pint_t  size;
5095         const macho_nlist<P>* foundLabel;
5096         Atom<A>* allocatedSpace;
5097         while ( it.next(parser, *this, sectNum, startAddr, endAddr, &foundAddr, &size, &foundLabel) ) {
5098                 if ( foundLabel != NULL ) {
5099                         bool skip = false;
5100                         pint_t labeledAtomSize = this->elementSizeAtAddress(foundAddr);
5101                         allocatedSpace = (Atom<A>*)p;
5102                         if ( this->ignoreLabel(parser.nameFromSymbol(*foundLabel)) ) {
5103                                 if ( size == 0 ) {
5104                                         // <rdar://problem/10018737>
5105                                         // a size of zero means there is another label at same location
5106                                         // and we are supposed to ignore this label
5107                                         skip = true;
5108                                 }
5109                                 else {
5110                                         //fprintf(stderr, "  0x%08llX make annon, size=%lld\n", (uint64_t)foundAddr, (uint64_t)size);
5111                                         new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, foundAddr), foundAddr,
5112                                                                                         this->elementSizeAtAddress(foundAddr), this->definition(),
5113                                                                                         this->combine(parser, foundAddr), this->scopeAtAddress(parser, foundAddr),
5114                                                                                         this->contentType(), this->symbolTableInclusion(),
5115                                                                                         this->dontDeadStrip(), false, false, this->alignmentForAddress(foundAddr));
5116                                 }
5117                         }
5118                         else {
5119                                 // make named atom for label
5120                                 //fprintf(stderr, "  0x%08llX make labeled\n", (uint64_t)foundAddr);
5121                                 new (allocatedSpace) Atom<A>(*this, parser, *foundLabel, labeledAtomSize);
5122                         }
5123                         if ( !skip ) {
5124                                 ++count;
5125                                 p += sizeof(Atom<A>);
5126                                 foundAddr += labeledAtomSize;
5127                                 size -= labeledAtomSize;
5128                         }
5129                 }
5130                 // some number of anonymous atoms
5131                 for (pint_t addr = foundAddr; addr < (foundAddr+size); addr += elementSizeAtAddress(addr) ) {
5132                         // make anon atoms for area before label
5133                         if ( this->useElementAt(parser, it, addr) ) {
5134                                 //fprintf(stderr, "  0x%08llX make annon, size=%lld\n", (uint64_t)addr, (uint64_t)elementSizeAtAddress(addr));
5135                                 allocatedSpace = (Atom<A>*)p;
5136                                 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, addr), addr, this->elementSizeAtAddress(addr),
5137                                                                                         this->definition(), this->combine(parser, addr), this->scopeAtAddress(parser, addr),
5138                                                                                         this->contentType(), this->symbolTableInclusion(),
5139                                                                                         this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
5140                                 ++count;
5141                                 p += sizeof(Atom<A>);
5142                         }
5143                 }
5144         }
5145
5146         this->_endAtoms = (Atom<A>*)p;
5147
5148         return count;
5149 }
5150
5151 template <typename A>
5152 bool Literal4Section<A>::ignoreLabel(const char* label) const
5153 {
5154         return (label[0] == 'L') || (label[0] == 'l');
5155 }
5156
5157 template <typename A>
5158 unsigned long Literal4Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5159 {
5160         const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
5161         return *literalContent;
5162 }
5163
5164 template <typename A>
5165 bool Literal4Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5166                                                                                                         const ld::IndirectBindingTable& ind) const
5167 {
5168         assert(this->type() == rhs.section().type());
5169         const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
5170
5171         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5172         assert(rhsAtom != NULL);
5173         if ( rhsAtom != NULL ) {
5174                 const uint32_t* rhsLiteralContent = (uint32_t*)rhsAtom->contentPointer();
5175                 return (*literalContent == *rhsLiteralContent);
5176         }
5177         return false;
5178 }
5179
5180
5181 template <typename A>
5182 bool Literal8Section<A>::ignoreLabel(const char* label) const
5183 {
5184         return (label[0] == 'L') || (label[0] == 'l');
5185 }
5186
5187 template <typename A>
5188 unsigned long Literal8Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5189 {
5190 #if __LP64__
5191         const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
5192         return *literalContent;
5193 #else
5194         unsigned long hash = 5381;
5195         const uint8_t* byteContent = atom->contentPointer();
5196         for (int i=0; i < 8; ++i) {
5197                 hash = hash * 33 + byteContent[i];
5198         }
5199         return hash;
5200 #endif
5201 }
5202
5203 template <typename A>
5204 bool Literal8Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5205                                                                                                         const ld::IndirectBindingTable& ind) const
5206 {
5207         if ( rhs.section().type() != ld::Section::typeLiteral8 )
5208                 return false;
5209         assert(this->type() == rhs.section().type());
5210         const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
5211
5212         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5213         assert(rhsAtom != NULL);
5214         if ( rhsAtom != NULL ) {
5215                 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
5216                 return (*literalContent == *rhsLiteralContent);
5217         }
5218         return false;
5219 }
5220
5221 template <typename A>
5222 bool Literal16Section<A>::ignoreLabel(const char* label) const
5223 {
5224         return (label[0] == 'L') || (label[0] == 'l');
5225 }
5226
5227 template <typename A>
5228 unsigned long Literal16Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5229 {
5230         unsigned long hash = 5381;
5231         const uint8_t* byteContent = atom->contentPointer();
5232         for (int i=0; i < 16; ++i) {
5233                 hash = hash * 33 + byteContent[i];
5234         }
5235         return hash;
5236 }
5237
5238 template <typename A>
5239 bool Literal16Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5240                                                                                                         const ld::IndirectBindingTable& ind) const
5241 {
5242         if ( rhs.section().type() != ld::Section::typeLiteral16 )
5243                 return false;
5244         assert(this->type() == rhs.section().type());
5245         const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
5246
5247         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5248         assert(rhsAtom != NULL);
5249         if ( rhsAtom != NULL ) {
5250                 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
5251                 return ((literalContent[0] == rhsLiteralContent[0]) && (literalContent[1] == rhsLiteralContent[1]));
5252         }
5253         return false;
5254 }
5255
5256
5257
5258 template <typename A>
5259 typename A::P::uint_t CStringSection<A>::elementSizeAtAddress(pint_t addr)
5260 {
5261         const macho_section<P>* sect = this->machoSection();
5262         const char* stringContent = (char*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
5263         return strlen(stringContent) + 1;
5264 }
5265
5266 template <typename A>
5267 bool CStringSection<A>::useElementAt(Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
5268 {
5269         return true;
5270 }
5271
5272 template <typename A>
5273 bool CStringSection<A>::ignoreLabel(const char* label) const
5274 {
5275         return (label[0] == 'L') || (label[0] == 'l');
5276 }
5277
5278
5279 template <typename A>
5280 Atom<A>* CStringSection<A>::findAtomByAddress(pint_t addr)
5281 {
5282         Atom<A>* result = this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms);
5283         return result;
5284 }
5285
5286 template <typename A>
5287 unsigned long CStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5288 {
5289         unsigned long hash = 5381;
5290         const char* stringContent = (char*)atom->contentPointer();
5291         for (const char* s = stringContent; *s != '\0'; ++s) {
5292                 hash = hash * 33 + *s;
5293         }
5294         return hash;
5295 }
5296
5297
5298 template <typename A>
5299 bool CStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5300                                                                                                         const ld::IndirectBindingTable& ind) const
5301 {
5302         if ( rhs.section().type() != ld::Section::typeCString )
5303                 return false;
5304         assert(this->type() == rhs.section().type());
5305         assert(strcmp(this->sectionName(), rhs.section().sectionName())== 0);
5306         assert(strcmp(this->segmentName(), rhs.section().segmentName())== 0);
5307         const char* stringContent = (char*)atom->contentPointer();
5308
5309         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5310         assert(rhsAtom != NULL);
5311         if ( rhsAtom != NULL ) {
5312                 if ( atom->_size != rhsAtom->_size )
5313                         return false;
5314                 const char* rhsStringContent = (char*)rhsAtom->contentPointer();
5315                 return (strcmp(stringContent, rhsStringContent) == 0);
5316         }
5317         return false;
5318 }
5319
5320
5321 template <>
5322 ld::Fixup::Kind NonLazyPointerSection<x86>::fixupKind()
5323 {
5324         return ld::Fixup::kindStoreLittleEndian32;
5325 }
5326
5327 template <>
5328 ld::Fixup::Kind NonLazyPointerSection<arm>::fixupKind()
5329 {
5330         return ld::Fixup::kindStoreLittleEndian32;
5331 }
5332
5333 template <>
5334 ld::Fixup::Kind NonLazyPointerSection<arm64>::fixupKind()
5335 {
5336         return ld::Fixup::kindStoreLittleEndian64;
5337 }
5338
5339
5340 template <>
5341 void NonLazyPointerSection<x86_64>::makeFixups(class Parser<x86_64>& parser, const struct Parser<x86_64>::CFI_CU_InfoArrays&)
5342 {
5343         assert(0 && "x86_64 should not have non-lazy-pointer sections in .o files");
5344 }
5345
5346 template <typename A>
5347 void NonLazyPointerSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
5348 {
5349         // add references for each NLP atom based on indirect symbol table
5350         const macho_section<P>* sect = this->machoSection();
5351         const pint_t endAddr = sect->addr() + sect->size();
5352         for( pint_t addr = sect->addr(); addr < endAddr; addr += sizeof(pint_t)) {
5353                 typename Parser<A>::SourceLocation      src;
5354                 typename Parser<A>::TargetDesc          target;
5355                 src.atom = this->findAtomByAddress(addr);
5356                 src.offsetInAtom = 0;
5357                 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
5358                 target.atom = NULL;
5359                 target.name = NULL;
5360                 target.weakImport = false;
5361                 target.addend = 0;
5362                 if ( symIndex == INDIRECT_SYMBOL_LOCAL ) {
5363                         // use direct reference for local symbols
5364                         const pint_t* nlpContent = (pint_t*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
5365                         pint_t targetAddr = P::getP(*nlpContent);
5366                         target.atom = parser.findAtomByAddress(targetAddr);
5367                         target.weakImport = false;
5368                         target.addend = (targetAddr - target.atom->objectAddress());
5369                         // <rdar://problem/8385011> if pointer to thumb function, mask of thumb bit (not an addend of +1)
5370                         if ( target.atom->isThumb() )
5371                                 target.addend &= (-2);
5372                         assert(src.atom->combine() == ld::Atom::combineNever);
5373                 }
5374                 else {
5375                         const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
5376                         // use direct reference for local symbols
5377                         if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
5378                                 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5379                                 assert(src.atom->combine() == ld::Atom::combineNever);
5380                         }
5381                         else {
5382                                 target.name = parser.nameFromSymbol(sym);
5383                                 target.weakImport = parser.weakImportFromSymbol(sym);
5384                                 assert(src.atom->combine() == ld::Atom::combineByNameAndReferences);
5385                         }
5386                 }
5387                 parser.addFixups(src, this->fixupKind(), target);
5388         }
5389 }
5390
5391 template <typename A>
5392 ld::Atom::Combine NonLazyPointerSection<A>::combine(Parser<A>& parser, pint_t addr)
5393 {
5394         const macho_section<P>* sect = this->machoSection();
5395         uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
5396         if ( symIndex == INDIRECT_SYMBOL_LOCAL)
5397                 return ld::Atom::combineNever;
5398
5399         // don't coalesce non-lazy-pointers to local symbols
5400         const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
5401         if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) )
5402                 return ld::Atom::combineNever;
5403
5404         return ld::Atom::combineByNameAndReferences;
5405 }
5406
5407 template <typename A>
5408 const char* NonLazyPointerSection<A>::targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind)
5409 {
5410         assert(atom->combine() == ld::Atom::combineByNameAndReferences);
5411         assert(atom->fixupCount() == 1);
5412         ld::Fixup::iterator fit = atom->fixupsBegin();
5413         const char* name = NULL;
5414         switch ( fit->binding ) {
5415                 case ld::Fixup::bindingByNameUnbound:
5416                         name = fit->u.name;
5417                         break;
5418                 case ld::Fixup::bindingByContentBound:
5419                         name = fit->u.target->name();
5420                         break;
5421                 case ld::Fixup::bindingsIndirectlyBound:
5422                         name = ind.indirectName(fit->u.bindingIndex);
5423                         break;
5424                 default:
5425                         assert(0);
5426         }
5427         assert(name != NULL);
5428         return name;
5429 }
5430
5431 template <typename A>
5432 unsigned long NonLazyPointerSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5433 {
5434         assert(atom->combine() == ld::Atom::combineByNameAndReferences);
5435         unsigned long hash = 9508;
5436         for (const char* s = this->targetName(atom, ind); *s != '\0'; ++s) {
5437                 hash = hash * 33 + *s;
5438         }
5439         return hash;
5440 }
5441
5442 template <typename A>
5443 bool NonLazyPointerSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5444                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
5445 {
5446         if ( rhs.section().type() != ld::Section::typeNonLazyPointer )
5447                 return false;
5448         assert(this->type() == rhs.section().type());
5449         // there can be many non-lazy pointer in different section names
5450         // we only want to coalesce in same section name
5451         if ( *this != rhs.section() )
5452                 return false;
5453         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5454         assert(rhsAtom !=  NULL);
5455         const char* thisName = this->targetName(atom, indirectBindingTable);
5456         const char* rhsName = this->targetName(rhsAtom, indirectBindingTable);
5457         return (strcmp(thisName, rhsName) == 0);
5458 }
5459
5460 template <typename A>
5461 ld::Atom::Scope NonLazyPointerSection<A>::scopeAtAddress(Parser<A>& parser, pint_t addr)
5462 {
5463         const macho_section<P>* sect = this->machoSection();
5464         uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
5465         if ( symIndex == INDIRECT_SYMBOL_LOCAL)
5466                 return ld::Atom::scopeTranslationUnit;
5467         else
5468                 return ld::Atom::scopeLinkageUnit;
5469 }
5470
5471 template <typename A>
5472 const uint8_t* CFStringSection<A>::targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
5473                                                                                                         ContentType* ct, unsigned int* count)
5474 {
5475         *ct = contentUnknown;
5476         for (ld::Fixup::iterator fit=atom->fixupsBegin(), end=atom->fixupsEnd(); fit != end; ++fit) {
5477                 const ld::Atom* targetAtom = NULL;
5478                 switch ( fit->binding ) {
5479                         case ld::Fixup::bindingByNameUnbound:
5480                                 // ignore reference to ___CFConstantStringClassReference
5481                                 // we are just looking for reference to backing string data
5482                                 assert(fit->offsetInAtom == 0);
5483                                 assert(strcmp(fit->u.name, "___CFConstantStringClassReference") == 0);
5484                                 break;
5485                         case ld::Fixup::bindingDirectlyBound:
5486                         case ld::Fixup::bindingByContentBound:
5487                                 targetAtom = fit->u.target;
5488                                 break;
5489                         case ld::Fixup::bindingsIndirectlyBound:
5490                                 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5491                                 break;
5492                         default:
5493                                 assert(0 && "bad binding type");
5494                 }
5495                 assert(targetAtom != NULL);
5496                 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5497                 if ( targetAtom->section().type() == ld::Section::typeCString ) {
5498                         *ct = contentUTF8;
5499                         *count = targetAtom->size();
5500                 }
5501                 else if ( targetAtom->section().type() == ld::Section::typeUTF16Strings ) {
5502                         *ct = contentUTF16;
5503                         *count = (targetAtom->size()+1)/2; // round up incase of buggy compiler that has only one trailing zero byte
5504                 }
5505                 else {
5506                         *ct = contentUnknown;
5507                         *count = 0;
5508                         return NULL;
5509                 }
5510                 return target->contentPointer();
5511         }
5512         assert(0);
5513         return NULL;
5514 }
5515
5516 template <typename A>
5517 unsigned long CFStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5518 {
5519         // base hash of CFString on hash of cstring it wraps
5520         ContentType cType;
5521         unsigned long hash;
5522         unsigned int charCount;
5523         const uint8_t* content = this->targetContent(atom, ind, &cType, &charCount);
5524         switch ( cType ) {
5525                 case contentUTF8:
5526                         hash = 9408;
5527                         for (const char* s = (char*)content; *s != '\0'; ++s) {
5528                                 hash = hash * 33 + *s;
5529                         }
5530                         return hash;
5531                 case contentUTF16:
5532                         hash = 407955;
5533                         --charCount; // don't add last 0x0000 to hash because some buggy compilers only have trailing single byte
5534                         for (const uint16_t* s = (uint16_t*)content; charCount > 0; ++s, --charCount) {
5535                                 hash = hash * 1025 + *s;
5536                         }
5537                         return hash;
5538                 case contentUnknown:
5539                         // <rdar://problem/14134211> For malformed CFStrings, hash to address of atom so they have unique hashes
5540                         return ULONG_MAX - (unsigned long)(atom);
5541         }
5542         return 0;
5543 }
5544
5545
5546 template <typename A>
5547 bool CFStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5548                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
5549 {
5550         if ( atom == &rhs )
5551                 return true;
5552         if ( rhs.section().type() != ld::Section::typeCFString)
5553                 return false;
5554         assert(this->type() == rhs.section().type());
5555         assert(strcmp(this->sectionName(), "__cfstring") == 0);
5556
5557         ContentType thisType;
5558         unsigned int charCount;
5559         const uint8_t* cstringContent = this->targetContent(atom, indirectBindingTable, &thisType, &charCount);
5560         ContentType rhsType;
5561         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5562         assert(rhsAtom !=  NULL);
5563         unsigned int rhsCharCount;
5564         const uint8_t* rhsStringContent = this->targetContent(rhsAtom, indirectBindingTable, &rhsType, &rhsCharCount);
5565
5566         if ( thisType != rhsType )
5567                 return false;
5568
5569         if ( thisType == contentUnknown )
5570                 return false;
5571
5572         if ( rhsType == contentUnknown )
5573                 return false;
5574
5575         // no need to compare content of pointers are already the same
5576         if ( cstringContent == rhsStringContent )
5577                 return true;
5578
5579         // no need to compare content if size is different
5580         if ( charCount != rhsCharCount )
5581                 return false;
5582
5583         switch ( thisType ) {
5584                 case contentUTF8:
5585                         return (strcmp((char*)cstringContent, (char*)rhsStringContent) == 0);
5586                 case contentUTF16:
5587                         {
5588                                 const uint16_t* cstringContent16 = (uint16_t*)cstringContent;
5589                                 const uint16_t* rhsStringContent16 = (uint16_t*)rhsStringContent;
5590                                 for (unsigned int i = 0; i < charCount; ++i) {
5591                                         if ( cstringContent16[i] != rhsStringContent16[i] )
5592                                                 return false;
5593                                 }
5594                                 return true;
5595                         }
5596                 case contentUnknown:
5597                         return false;
5598         }
5599         return false;
5600 }
5601
5602
5603 template <typename A>
5604 typename A::P::uint_t ObjC1ClassSection<A>::elementSizeAtAddress(pint_t addr)
5605 {
5606         // nominal size for each class is 48 bytes, but sometimes the compiler
5607         // over aligns and there is padding after class data
5608         const macho_section<P>* sct = this->machoSection();
5609         uint32_t align = 1 << sct->align();
5610         uint32_t size = ((12 * sizeof(pint_t)) + align-1) & (-align);
5611         return size;
5612 }
5613
5614 template <typename A>
5615 const char* ObjC1ClassSection<A>::unlabeledAtomName(Parser<A>& parser, pint_t addr)
5616 {
5617         // 8-bytes into class object is pointer to class name
5618         const macho_section<P>* sct = this->machoSection();
5619         uint32_t classObjcFileOffset = sct->offset() - sct->addr() + addr;
5620         const uint8_t* mappedFileContent = this->file().fileContent();
5621         pint_t nameAddr = P::getP(*((pint_t*)(mappedFileContent+classObjcFileOffset+2*sizeof(pint_t))));
5622
5623         // find section containing string address to get string bytes
5624         const macho_section<P>* const sections = parser.firstMachOSection();
5625         const uint32_t sectionCount = parser.machOSectionCount();
5626         for (uint32_t i=0; i < sectionCount; ++i) {
5627                 const macho_section<P>* aSect = &sections[i];
5628                 if ( (aSect->addr() <= nameAddr) && (nameAddr < (aSect->addr()+aSect->size())) ) {
5629                         assert((aSect->flags() & SECTION_TYPE) == S_CSTRING_LITERALS);
5630                         uint32_t nameFileOffset = aSect->offset() - aSect->addr() + nameAddr;
5631                         const char* name = (char*)mappedFileContent + nameFileOffset;
5632                         // spin through symbol table to find absolute symbol corresponding to this class
5633                         for (uint32_t s=0; s < parser.symbolCount(); ++s) {
5634                                 const macho_nlist<P>& sym =     parser.symbolFromIndex(s);
5635                                 if ( (sym.n_type() & N_TYPE) != N_ABS )
5636                                         continue;
5637                                 const char* absName = parser.nameFromSymbol(sym);
5638                                 if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
5639                                         if ( strcmp(&absName[17], name) == 0 )
5640                                                 return absName;
5641                                 }
5642                         }
5643                         assert(0 && "obj class name not found in symbol table");
5644                 }
5645         }
5646         assert(0 && "obj class name not found");
5647         return "unknown objc class";
5648 }
5649
5650
5651 template <typename A>
5652 const char* ObjC2ClassRefsSection<A>::targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5653 {
5654         assert(atom->fixupCount() == 1);
5655         ld::Fixup::iterator fit = atom->fixupsBegin();
5656         const char* className = NULL;
5657         switch ( fit->binding ) {
5658                 case ld::Fixup::bindingByNameUnbound:
5659                         className = fit->u.name;
5660                         break;
5661                 case ld::Fixup::bindingDirectlyBound:
5662                 case ld::Fixup::bindingByContentBound:
5663                         className = fit->u.target->name();
5664                         break;
5665                 case ld::Fixup::bindingsIndirectlyBound:
5666                         className = ind.indirectName(fit->u.bindingIndex);
5667                         break;
5668                 default:
5669                         assert(0 && "unsupported binding in objc2 class ref section");
5670         }
5671         assert(className != NULL);
5672         return className;
5673 }
5674
5675
5676 template <typename A>
5677 unsigned long ObjC2ClassRefsSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5678 {
5679         unsigned long hash = 978;
5680         for (const char* s = targetClassName(atom, ind); *s != '\0'; ++s) {
5681                 hash = hash * 33 + *s;
5682         }
5683         return hash;
5684 }
5685
5686 template <typename A>
5687 bool ObjC2ClassRefsSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5688                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
5689 {
5690         assert(this->type() == rhs.section().type());
5691         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5692         assert(rhsAtom !=  NULL);
5693         const char* thisClassName = targetClassName(atom, indirectBindingTable);
5694         const char* rhsClassName = targetClassName(rhsAtom, indirectBindingTable);
5695         return (strcmp(thisClassName, rhsClassName) == 0);
5696 }
5697
5698
5699 template <typename A>
5700 const char* Objc1ClassReferences<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5701 {
5702         assert(atom->fixupCount() == 2);
5703         ld::Fixup::iterator fit = atom->fixupsBegin();
5704         if ( fit->kind == ld::Fixup::kindSetTargetAddress )
5705                 ++fit;
5706         const ld::Atom* targetAtom = NULL;
5707         switch ( fit->binding ) {
5708                 case ld::Fixup::bindingByContentBound:
5709                         targetAtom = fit->u.target;
5710                         break;
5711                 case ld::Fixup::bindingsIndirectlyBound:
5712                         targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5713                         if ( targetAtom == NULL ) {
5714                                 fprintf(stderr, "missing target named %s\n", ind.indirectName(fit->u.bindingIndex));
5715                         }
5716                         break;
5717                 default:
5718                         assert(0);
5719         }
5720         assert(targetAtom != NULL);
5721         const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5722         assert(target !=  NULL);
5723         return (char*)target->contentPointer();
5724 }
5725
5726
5727 template <typename A>
5728 const char* PointerToCStringSection<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5729 {
5730         assert(atom->fixupCount() == 1);
5731         ld::Fixup::iterator fit = atom->fixupsBegin();
5732         const ld::Atom* targetAtom = NULL;
5733         switch ( fit->binding ) {
5734                 case ld::Fixup::bindingByContentBound:
5735                         targetAtom = fit->u.target;
5736                         break;
5737                 case ld::Fixup::bindingsIndirectlyBound:
5738                         targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5739                         break;
5740                 case ld::Fixup::bindingDirectlyBound:
5741                         targetAtom = fit->u.target;
5742                         break;
5743                 default:
5744                         assert(0 && "unsupported reference to selector");
5745         }
5746         assert(targetAtom != NULL);
5747         const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5748         assert(target != NULL);
5749         assert(target->contentType() == ld::Atom::typeCString);
5750         return (char*)target->contentPointer();
5751 }
5752
5753 template <typename A>
5754 unsigned long PointerToCStringSection<A>::contentHash(const class Atom<A>* atom,
5755                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
5756 {
5757         // make hash from section name and target cstring name
5758         unsigned long hash = 123;
5759         for (const char* s = this->sectionName(); *s != '\0'; ++s) {
5760                 hash = hash * 33 + *s;
5761         }
5762         for (const char* s = this->targetCString(atom, indirectBindingTable); *s != '\0'; ++s) {
5763                 hash = hash * 33 + *s;
5764         }
5765         return hash;
5766 }
5767
5768 template <typename A>
5769 bool PointerToCStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5770                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
5771 {
5772         assert(this->type() == rhs.section().type());
5773         // there can be pointers-to-cstrings in different section names
5774         // we only want to coalesce in same section name
5775         if ( *this != rhs.section() )
5776                 return false;
5777
5778         // get string content for this
5779         const char* cstringContent = this->targetCString(atom, indirectBindingTable);
5780         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5781         assert(rhsAtom !=  NULL);
5782         const char* rhsCstringContent = this->targetCString(rhsAtom, indirectBindingTable);
5783
5784         assert(cstringContent != NULL);
5785         assert(rhsCstringContent != NULL);
5786         return (strcmp(cstringContent, rhsCstringContent) == 0);
5787 }
5788
5789
5790
5791 template <typename A>
5792 unsigned long UTF16StringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5793 {
5794         unsigned long hash = 5381;
5795         const uint16_t* stringContent = (uint16_t*)atom->contentPointer();
5796         // some buggy compilers end utf16 data with single byte, so don't use last word in hash computation
5797         unsigned int count = (atom->size()/2) - 1;
5798         for (const uint16_t* s = stringContent; count > 0; ++s, --count) {
5799                 hash = hash * 33 + *s;
5800         }
5801         return hash;
5802 }
5803
5804 template <typename A>
5805 bool UTF16StringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5806                                                                                                         const ld::IndirectBindingTable& ind) const
5807 {
5808         if ( rhs.section().type() != ld::Section::typeUTF16Strings )
5809                 return false;
5810         assert(0);
5811         return false;
5812 }
5813
5814
5815
5816
5817
5818
5819
5820 template <>
5821 uint32_t Section<x86_64>::x86_64PcRelOffset(uint8_t r_type)
5822 {
5823         switch ( r_type ) {
5824                 case X86_64_RELOC_SIGNED:
5825                         return 4;
5826                 case X86_64_RELOC_SIGNED_1:
5827                         return 5;
5828                 case X86_64_RELOC_SIGNED_2:
5829                         return 6;
5830                 case X86_64_RELOC_SIGNED_4:
5831                         return 8;
5832         }
5833         return 0;
5834 }
5835
5836
5837 template <>
5838 bool Section<x86_64>::addRelocFixup(class Parser<x86_64>& parser, const macho_relocation_info<P>* reloc)
5839 {
5840         const macho_section<P>* sect = this->machoSection();
5841         uint64_t srcAddr = sect->addr() + reloc->r_address();
5842         Parser<x86_64>::SourceLocation  src;
5843         Parser<x86_64>::TargetDesc              target;
5844         Parser<x86_64>::TargetDesc              toTarget;
5845         src.atom = this->findAtomByAddress(srcAddr);
5846         src.offsetInAtom = srcAddr - src.atom->_objAddress;
5847         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
5848         uint64_t contentValue = 0;
5849         const macho_relocation_info<x86_64::P>* nextReloc = &reloc[1];
5850         bool result = false;
5851         bool useDirectBinding;
5852         switch ( reloc->r_length() ) {
5853                 case 0:
5854                         contentValue = *fixUpPtr;
5855                         break;
5856                 case 1:
5857                         contentValue = (int64_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
5858                         break;
5859                 case 2:
5860                         contentValue = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
5861                         break;
5862                 case 3:
5863                         contentValue = E::get64(*((uint64_t*)fixUpPtr));
5864                         break;
5865         }
5866         target.atom = NULL;
5867         target.name = NULL;
5868         target.weakImport = false;
5869         target.addend = 0;
5870         if ( reloc->r_extern() ) {
5871                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
5872                 // use direct reference for local symbols
5873                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5874                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5875                         target.addend += contentValue;
5876                 }
5877                 else {
5878                         target.name = parser.nameFromSymbol(sym);
5879                         target.weakImport = parser.weakImportFromSymbol(sym);
5880                         target.addend = contentValue;
5881                 }
5882                 // cfstrings should always use direct reference to backing store
5883                 if ( (this->type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
5884                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5885                         target.addend = contentValue;
5886                 }
5887         }
5888         else {
5889                 if ( reloc->r_pcrel()  )
5890                         contentValue += srcAddr + x86_64PcRelOffset(reloc->r_type());
5891                 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5892         }
5893         switch ( reloc->r_type() ) {
5894                 case X86_64_RELOC_UNSIGNED:
5895                         if ( reloc->r_pcrel() )
5896                                 throw "pcrel and X86_64_RELOC_UNSIGNED not supported";
5897                         switch ( reloc->r_length() ) {
5898                                 case 0:
5899                                 case 1:
5900                                         throw "length < 2 and X86_64_RELOC_UNSIGNED not supported";
5901                                 case 2:
5902                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5903                                         break;
5904                                 case 3:
5905                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian64, target);
5906                                         break;
5907                         }
5908                         break;
5909                 case X86_64_RELOC_SIGNED:
5910                 case X86_64_RELOC_SIGNED_1:
5911                 case X86_64_RELOC_SIGNED_2:
5912                 case X86_64_RELOC_SIGNED_4:
5913                         if ( ! reloc->r_pcrel() )
5914                                 throw "not pcrel and X86_64_RELOC_SIGNED* not supported";
5915                         if ( reloc->r_length() != 2 )
5916                                 throw "length != 2 and X86_64_RELOC_SIGNED* not supported";
5917                         switch ( reloc->r_type() ) {
5918                                 case X86_64_RELOC_SIGNED:
5919                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
5920                                         break;
5921                                 case X86_64_RELOC_SIGNED_1:
5922                                         if ( reloc->r_extern() )
5923                                                 target.addend += 1;
5924                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_1, target);
5925                                         break;
5926                                 case X86_64_RELOC_SIGNED_2:
5927                                         if ( reloc->r_extern() )
5928                                                 target.addend += 2;
5929                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_2, target);
5930                                         break;
5931                                 case X86_64_RELOC_SIGNED_4:
5932                                         if ( reloc->r_extern() )
5933                                                 target.addend += 4;
5934                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_4, target);
5935                                         break;
5936                         }
5937                         break;
5938                 case X86_64_RELOC_BRANCH:
5939                         if ( ! reloc->r_pcrel() )
5940                                 throw "not pcrel and X86_64_RELOC_BRANCH not supported";
5941                         switch ( reloc->r_length() ) {
5942                                 case 2:
5943                                         if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5944                                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
5945                                                 parser.addDtraceExtraInfos(src, &target.name[16]);
5946                                         }
5947                                         else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5948                                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
5949                                                 parser.addDtraceExtraInfos(src, &target.name[20]);
5950                                         }
5951                                         else {
5952                                                 parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel32, target);
5953                                         }
5954                                         break;
5955                                 case 0:
5956                                         parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel8, target);
5957                                         break;
5958                                 default:
5959                                         throwf("length=%d and X86_64_RELOC_BRANCH not supported", reloc->r_length());
5960                         }
5961                         break;
5962                 case X86_64_RELOC_GOT:
5963                         if ( ! reloc->r_extern() )
5964                                 throw "not extern and X86_64_RELOC_GOT not supported";
5965                         if ( ! reloc->r_pcrel() )
5966                                 throw "not pcrel and X86_64_RELOC_GOT not supported";
5967                         if ( reloc->r_length() != 2 )
5968                                 throw "length != 2 and X86_64_RELOC_GOT not supported";
5969                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOT, target);
5970                         break;
5971                 case X86_64_RELOC_GOT_LOAD:
5972                         if ( ! reloc->r_extern() )
5973                                 throw "not extern and X86_64_RELOC_GOT_LOAD not supported";
5974                         if ( ! reloc->r_pcrel() )
5975                                 throw "not pcrel and X86_64_RELOC_GOT_LOAD not supported";
5976                         if ( reloc->r_length() != 2 )
5977                                 throw "length != 2 and X86_64_RELOC_GOT_LOAD not supported";
5978                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOTLoad, target);
5979                         break;
5980                 case X86_64_RELOC_SUBTRACTOR:
5981                         if ( reloc->r_pcrel() )
5982                                 throw "X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5983                         if ( reloc->r_length() < 2 )
5984                                 throw "X86_64_RELOC_SUBTRACTOR must have r_length of 2 or 3";
5985                         if ( !reloc->r_extern() )
5986                                 throw "X86_64_RELOC_SUBTRACTOR must have r_extern=1";
5987                         if ( nextReloc->r_type() != X86_64_RELOC_UNSIGNED )
5988                                 throw "X86_64_RELOC_SUBTRACTOR must be followed by X86_64_RELOC_UNSIGNED";
5989                         result = true;
5990                         if ( nextReloc->r_pcrel() )
5991                                 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5992                         if ( nextReloc->r_length() != reloc->r_length() )
5993                                 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR must have same r_length";
5994                         if ( nextReloc->r_extern() ) {
5995                                 const macho_nlist<P>& sym = parser.symbolFromIndex(nextReloc->r_symbolnum());
5996                                 // use direct reference for local symbols
5997                                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5998                                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), toTarget);
5999                                         toTarget.addend = contentValue;
6000                                         useDirectBinding = true;
6001                                 }
6002                                 else {
6003                                         toTarget.name = parser.nameFromSymbol(sym);
6004                                         toTarget.weakImport = parser.weakImportFromSymbol(sym);
6005                                         toTarget.addend = contentValue;
6006                                         useDirectBinding = false;
6007                                 }
6008                         }
6009                         else {
6010                                 parser.findTargetFromAddressAndSectionNum(contentValue, nextReloc->r_symbolnum(), toTarget);
6011                                 useDirectBinding = (toTarget.atom->scope() == ld::Atom::scopeTranslationUnit);
6012                         }
6013                         if ( useDirectBinding )
6014                                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.atom);
6015                         else
6016                                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.weakImport, toTarget.name);
6017                         parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, toTarget.addend);
6018                         if ( target.atom == NULL )
6019                                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, false, target.name);
6020                         else
6021                                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, target.atom);
6022                         if ( reloc->r_length() == 2 )
6023                                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
6024                         else
6025                                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian64);
6026                         break;
6027                 case X86_64_RELOC_TLV:
6028                         if ( ! reloc->r_extern() )
6029                                 throw "not extern and X86_64_RELOC_TLV not supported";
6030                         if ( ! reloc->r_pcrel() )
6031                                 throw "not pcrel and X86_64_RELOC_TLV not supported";
6032                         if ( reloc->r_length() != 2 )
6033                                 throw "length != 2 and X86_64_RELOC_TLV not supported";
6034                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
6035                         break;
6036                 default:
6037                         throwf("unknown relocation type %d", reloc->r_type());
6038         }
6039         return result;
6040 }
6041
6042
6043
6044 template <>
6045 bool Section<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<P>* reloc)
6046 {
6047         const macho_section<P>* sect = this->machoSection();
6048         uint32_t srcAddr;
6049         const uint8_t* fixUpPtr;
6050         uint32_t contentValue = 0;
6051         ld::Fixup::Kind kind = ld::Fixup::kindNone;
6052         Parser<x86>::SourceLocation     src;
6053         Parser<x86>::TargetDesc         target;
6054
6055         if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
6056                 srcAddr = sect->addr() + reloc->r_address();
6057                 src.atom = this->findAtomByAddress(srcAddr);
6058                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
6059                 fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6060                 switch ( reloc->r_type() ) {
6061                 case GENERIC_RELOC_VANILLA:
6062                         switch ( reloc->r_length() ) {
6063                                 case 0:
6064                                         contentValue = (int32_t)(int8_t)*fixUpPtr;
6065                                         if ( reloc->r_pcrel() ) {
6066                                                 kind = ld::Fixup::kindStoreX86BranchPCRel8;
6067                                                 contentValue += srcAddr + sizeof(uint8_t);
6068                                         }
6069                                         else
6070                                                 throw "r_length=0 and r_pcrel=0 not supported";
6071                                         break;
6072                                 case 1:
6073                                         contentValue = (int32_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
6074                                         if ( reloc->r_pcrel() ) {
6075                                                 kind = ld::Fixup::kindStoreX86PCRel16;
6076                                                 contentValue += srcAddr + sizeof(uint16_t);
6077                                         }
6078                                         else
6079                                                 kind = ld::Fixup::kindStoreLittleEndian16;
6080                                         break;
6081                                 case 2:
6082                                         contentValue = E::get32(*((uint32_t*)fixUpPtr));
6083                                         if ( reloc->r_pcrel() ) {
6084                                                 kind = ld::Fixup::kindStoreX86BranchPCRel32;
6085                                                 contentValue += srcAddr + sizeof(uint32_t);
6086                                         }
6087                                         else
6088                                                 kind = ld::Fixup::kindStoreLittleEndian32;
6089                                         break;
6090                                 case 3:
6091                                         throw "r_length=3 not supported";
6092                         }
6093                         if ( reloc->r_extern() ) {
6094                                 target.atom = NULL;
6095                                 const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
6096                                 target.name = parser.nameFromSymbol(targetSymbol);
6097                                 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
6098                                 target.addend = (int32_t)contentValue;
6099                         }
6100                         else {
6101                                 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
6102                         }
6103                         if ( (kind == ld::Fixup::kindStoreX86BranchPCRel32) && (target.name != NULL) ) {
6104                                 if ( strncmp(target.name, "___dtrace_probe$", 16) == 0 ) {
6105                                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
6106                                         parser.addDtraceExtraInfos(src, &target.name[16]);
6107                                         return false;
6108                                 }
6109                                 else if ( strncmp(target.name, "___dtrace_isenabled$", 20) == 0 ) {
6110                                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
6111                                         parser.addDtraceExtraInfos(src, &target.name[20]);
6112                                         return false;
6113                                 }
6114                         }
6115                         parser.addFixups(src, kind, target);
6116                         return false;
6117                         break;
6118                 case GENERIC_RLEOC_TLV:
6119                         {
6120                                 if ( !reloc->r_extern() )
6121                                         throw "r_extern=0 and r_type=GENERIC_RLEOC_TLV not supported";
6122                                 if ( reloc->r_length() != 2 )
6123                                         throw "r_length!=2 and r_type=GENERIC_RLEOC_TLV not supported";
6124                                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
6125                                 // use direct reference for local symbols
6126                                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
6127                                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
6128                                 }
6129                                 else {
6130                                         target.atom = NULL;
6131                                         target.name = parser.nameFromSymbol(sym);
6132                                         target.weakImport = parser.weakImportFromSymbol(sym);
6133                                 }
6134                                 target.addend = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
6135                                 if ( reloc->r_pcrel() ) {
6136                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
6137                                 }
6138                                 else {
6139                                         parser.addFixups(src, ld::Fixup::kindStoreX86Abs32TLVLoad, target);
6140                                 }
6141                                 return false;
6142                         }
6143                         break;
6144                 default:
6145                         throwf("unsupported i386 relocation type (%d)", reloc->r_type());
6146                 }
6147         }
6148         else {
6149                 // scattered relocation
6150                 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
6151                 srcAddr = sect->addr() + sreloc->r_address();
6152                 src.atom = this->findAtomByAddress(srcAddr);
6153                 assert(src.atom != NULL);
6154                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
6155                 fixUpPtr = file().fileContent() + sect->offset() + sreloc->r_address();
6156                 uint32_t relocValue = sreloc->r_value();
6157                 bool result = false;
6158                 // file format allows pair to be scattered or not
6159                 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
6160                 const macho_relocation_info<P>* nextReloc = &reloc[1];
6161                 bool nextRelocIsPair = false;
6162                 uint32_t nextRelocAddress = 0;
6163                 uint32_t nextRelocValue = 0;
6164                 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
6165                         if ( nextReloc->r_type() == GENERIC_RELOC_PAIR ) {
6166                                 nextRelocIsPair = true;
6167                                 nextRelocAddress = nextReloc->r_address();
6168                                 result = true;  // iterator should skip next reloc, since we've consumed it here
6169                         }
6170                 }
6171                 else {
6172                         if ( nextSReloc->r_type() == GENERIC_RELOC_PAIR ) {
6173                                 nextRelocIsPair = true;
6174                                 nextRelocAddress = nextSReloc->r_address();
6175                                 nextRelocValue = nextSReloc->r_value();
6176                         }
6177                 }
6178                 switch (sreloc->r_type()) {
6179                         case GENERIC_RELOC_VANILLA:
6180                                 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
6181                                 target.atom = parser.findAtomByAddress(relocValue);
6182                                 if ( sreloc->r_pcrel() ) {
6183                                         switch ( sreloc->r_length() ) {
6184                                                 case 0:
6185                                                         contentValue = srcAddr + 1 + *fixUpPtr;
6186                                                         target.addend = (int32_t)contentValue - (int32_t)relocValue;
6187                                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel8, target);
6188                                                         break;
6189                                                 case 1:
6190                                                         contentValue = srcAddr + 2 + LittleEndian::get16(*((uint16_t*)fixUpPtr));
6191                                                         target.addend = (int32_t)contentValue - (int32_t)relocValue;
6192                                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel16, target);
6193                                                         break;
6194                                                 case 2:
6195                                                         contentValue = srcAddr + 4 + LittleEndian::get32(*((uint32_t*)fixUpPtr));
6196                                                         target.addend = (int32_t)contentValue - (int32_t)relocValue;
6197                                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
6198                                                         break;
6199                                                 case 3:
6200                                                         throw "unsupported r_length=3 for scattered pc-rel vanilla reloc";
6201                                                         break;
6202                                         }
6203                                 }
6204                                 else {
6205                                         if ( sreloc->r_length() != 2 )
6206                                                 throwf("unsupported r_length=%d for scattered vanilla reloc", sreloc->r_length());
6207                                         contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6208                                         target.addend = (int32_t)contentValue - (int32_t)(target.atom->objectAddress());
6209                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
6210                                 }
6211                                 break;
6212                         case GENERIC_RELOC_SECTDIFF:
6213                         case GENERIC_RELOC_LOCAL_SECTDIFF:
6214                                 {
6215                                         if ( !nextRelocIsPair )
6216                                                 throw "GENERIC_RELOC_SECTDIFF missing following pair";
6217                                         switch ( sreloc->r_length() ) {
6218                                                 case 0:
6219                                                 case 3:
6220                                                         throw "bad length for GENERIC_RELOC_SECTDIFF";
6221                                                 case 1:
6222                                                         contentValue = (int32_t)(int16_t)LittleEndian::get16(*((uint16_t*)fixUpPtr));
6223                                                         kind = ld::Fixup::kindStoreLittleEndian16;
6224                                                         break;
6225                                                 case 2:
6226                                                         contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6227                                                         kind = ld::Fixup::kindStoreLittleEndian32;
6228                                                         break;
6229                                         }
6230                                         Atom<x86>* fromAtom  = parser.findAtomByAddress(nextRelocValue);
6231                                         uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
6232                                         parser.findTargetFromAddress(sreloc->r_value(), target);
6233                                         // check for addend encoded in the section content
6234                                         int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
6235                                         if ( addend < 0 ) {
6236                                                 // switch binding base on coalescing
6237                                                 if ( target.atom == NULL ) {
6238                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
6239                                                 }
6240                                                 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
6241                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
6242                                                 }
6243                                                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
6244                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
6245                                                 }
6246                                                 else {
6247                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
6248                                                 }
6249                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
6250                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6251                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
6252                                                 parser.addFixup(src, ld::Fixup::k5of5, kind);
6253                                         }
6254                                         else {
6255                                                 // switch binding base on coalescing
6256                                                 if ( target.atom == NULL ) {
6257                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
6258                                                 }
6259                                                 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
6260                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
6261                                                 }
6262                                                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
6263                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
6264                                                 }
6265                                                 else {
6266                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
6267                                                 }
6268                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend+addend);
6269                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6270                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
6271                                                 parser.addFixup(src, ld::Fixup::k5of5, kind);
6272                                         }
6273                                 }
6274                                 break;
6275                 }
6276                 return result;
6277         }
6278 }
6279
6280
6281
6282
6283
6284 #if SUPPORT_ARCH_arm_any
6285 template <>
6286 bool Section<arm>::addRelocFixup(class Parser<arm>& parser, const macho_relocation_info<P>* reloc)
6287 {
6288         const macho_section<P>* sect = this->machoSection();
6289         bool result = false;
6290         uint32_t srcAddr;
6291         uint32_t dstAddr;
6292         uint32_t* fixUpPtr;
6293         int32_t displacement = 0;
6294         uint32_t instruction = 0;
6295         pint_t contentValue = 0;
6296         Parser<arm>::SourceLocation     src;
6297         Parser<arm>::TargetDesc         target;
6298         const macho_relocation_info<P>* nextReloc;
6299
6300         if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
6301                 bool externSymbolIsThumbDef = false;
6302                 srcAddr = sect->addr() + reloc->r_address();
6303                 src.atom = this->findAtomByAddress(srcAddr);
6304                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
6305                 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + reloc->r_address());
6306                 if ( reloc->r_type() != ARM_RELOC_PAIR )
6307                         instruction = LittleEndian::get32(*fixUpPtr);
6308                 if ( reloc->r_extern() ) {
6309                         const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
6310                         // use direct reference for local symbols
6311                         if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) && (((targetSymbol.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(targetSymbol)[0] == 'L')) ) {
6312                                 parser.findTargetFromAddressAndSectionNum(targetSymbol.n_value(), targetSymbol.n_sect(), target);
6313                         }
6314                         else {
6315                                 target.atom = NULL;
6316                                 target.name = parser.nameFromSymbol(targetSymbol);
6317                                 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
6318                                 if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) &&  (targetSymbol.n_desc() & N_ARM_THUMB_DEF) )
6319                                         externSymbolIsThumbDef = true;
6320                         }
6321                 }
6322                 switch ( reloc->r_type() ) {
6323                         case ARM_RELOC_BR24:
6324                                 // Sign-extend displacement
6325                                 displacement = (instruction & 0x00FFFFFF) << 2;
6326                                 if ( (displacement & 0x02000000) != 0 )
6327                                         displacement |= 0xFC000000;
6328                                 // The pc added will be +8 from the pc
6329                                 displacement += 8;
6330                                 // If this is BLX add H << 1
6331                                 if ((instruction & 0xFE000000) == 0xFA000000)
6332                                         displacement += ((instruction & 0x01000000) >> 23);
6333                                 if ( reloc->r_extern() ) {
6334                                         dstAddr = srcAddr + displacement;
6335                                         // <rdar://problem/16652542> support large .o files
6336                                         if ( srcAddr > 0x2000000 ) {
6337                                                 dstAddr -= ((srcAddr + 0x1FFFFFF) & 0xFC000000);
6338                                         }
6339                                         target.addend = dstAddr;
6340                                         if ( externSymbolIsThumbDef )
6341                                                 target.addend &= -2; // remove thumb bit
6342                                 }
6343                                 else {
6344                                         dstAddr = srcAddr + displacement;
6345                                         parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
6346                                 }
6347                                 // special case "calls" for dtrace
6348                                 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
6349                                         parser.addFixup(src, ld::Fixup::k1of1,
6350                                                                                                                         ld::Fixup::kindStoreARMDtraceCallSiteNop, false, target.name);
6351                                         parser.addDtraceExtraInfos(src, &target.name[16]);
6352                                 }
6353                                 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
6354                                         parser.addFixup(src, ld::Fixup::k1of1,
6355                                                                                                                         ld::Fixup::kindStoreARMDtraceIsEnableSiteClear, false, target.name);
6356                                         parser.addDtraceExtraInfos(src, &target.name[20]);
6357                                 }
6358                                 else {
6359                                         parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
6360                                 }
6361                                 break;
6362                         case ARM_THUMB_RELOC_BR22:
6363                                 // thumb2 added two more bits to displacement, complicating the displacement decoding
6364                                 {
6365                                         uint32_t s = (instruction >> 10) & 0x1;
6366                                         uint32_t j1 = (instruction >> 29) & 0x1;
6367                                         uint32_t j2 = (instruction >> 27) & 0x1;
6368                                         uint32_t imm10 = instruction & 0x3FF;
6369                                         uint32_t imm11 = (instruction >> 16) & 0x7FF;
6370                                         uint32_t i1 = (j1 == s);
6371                                         uint32_t i2 = (j2 == s);
6372                                         uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
6373                                         int32_t sdis = dis;
6374                                         if ( s )
6375                                                 sdis |= 0xFE000000;
6376                                         displacement = sdis;
6377                                 }
6378                                 // The pc added will be +4 from the pc
6379                                 displacement += 4;
6380                                 // If the instruction was blx, force the low 2 bits to be clear
6381                                 dstAddr = srcAddr + displacement;
6382                                 if ((instruction & 0xD0000000) == 0xC0000000)
6383                                         dstAddr &= 0xFFFFFFFC;
6384
6385                                 if ( reloc->r_extern() ) {
6386                                         // <rdar://problem/16652542> support large .o files
6387                                         if ( srcAddr > 0x1000000 ) {
6388                                                 dstAddr -= ((srcAddr + 0xFFFFFF) & 0xFE000000);
6389                                         }
6390                                         target.addend = (int64_t)(int32_t)dstAddr;
6391                                 }
6392                                 else {
6393                                         parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
6394                                 }
6395                                 // special case "calls" for dtrace
6396                                 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
6397                                         parser.addFixup(src, ld::Fixup::k1of1,
6398                                                                                                                         ld::Fixup::kindStoreThumbDtraceCallSiteNop, false, target.name);
6399                                         parser.addDtraceExtraInfos(src, &target.name[16]);
6400                                 }
6401                                 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
6402                                         parser.addFixup(src, ld::Fixup::k1of1,
6403                                                                                                                         ld::Fixup::kindStoreThumbDtraceIsEnableSiteClear, false, target.name);
6404                                         parser.addDtraceExtraInfos(src, &target.name[20]);
6405                                 }
6406                                 else {
6407                                         parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
6408                                 }
6409                                 break;
6410                         case ARM_RELOC_VANILLA:
6411                                 if ( reloc->r_length() != 2 )
6412                                         throw "bad length for ARM_RELOC_VANILLA";
6413                                 contentValue = LittleEndian::get32(*fixUpPtr);
6414                                 if ( reloc->r_extern() ) {
6415                                         target.addend = (int32_t)contentValue;
6416                                         if ( externSymbolIsThumbDef )
6417                                                 target.addend &= -2; // remove thumb bit
6418                                 }
6419                                 else {
6420                                         parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
6421                                         // possible non-extern relocation turned into by-name ref because target is a weak-def
6422                                         if ( target.atom != NULL ) {
6423                                                 if ( target.atom->isThumb() )
6424                                                         target.addend &= -2; // remove thumb bit
6425                                                 // if reference to LSDA, add group subordinate fixup
6426                                                 if ( target.atom->contentType() == ld::Atom::typeLSDA ) {
6427                                                         Parser<arm>::SourceLocation     src2;
6428                                                         src2.atom = src.atom;
6429                                                         src2.offsetInAtom = 0;
6430                                                         parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, target.atom);
6431                                                 }
6432                                         }
6433                                 }
6434                                 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
6435                                 break;
6436                         case ARM_THUMB_32BIT_BRANCH:
6437                                 // silently ignore old unnecessary reloc
6438                                 break;
6439                         case ARM_RELOC_HALF:
6440                                 nextReloc = &reloc[1];
6441                                 if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
6442                                         uint32_t instruction16;
6443                                         uint32_t other16 = (nextReloc->r_address() & 0xFFFF);
6444                                         bool isThumb;
6445                                         if ( reloc->r_length() & 2 ) {
6446                                                 isThumb = true;
6447                                                 uint32_t i =    ((instruction & 0x00000400) >> 10);
6448                                                 uint32_t imm4 =  (instruction & 0x0000000F);
6449                                                 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
6450                                                 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
6451                                                 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
6452                                         }
6453                                         else {
6454                                                 isThumb = false;
6455                                                 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
6456                                                 uint32_t imm12 = (instruction & 0x00000FFF);
6457                                                 instruction16 = (imm4 << 12) | imm12;
6458                                         }
6459                                         if ( reloc->r_length() & 1 ) {
6460                                                 // high 16
6461                                                 dstAddr = ((instruction16 << 16) | other16);
6462                         if ( reloc->r_extern() ) {
6463                             target.addend = dstAddr;
6464                                                         if ( externSymbolIsThumbDef )
6465                                                                 target.addend &= -2; // remove thumb bit
6466                                                 }
6467                         else {
6468                             parser.findTargetFromAddress(dstAddr, target);
6469                             if ( target.atom->isThumb() )
6470                                 target.addend &= (-2); // remove thumb bit
6471                         }
6472                                                 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16), target);
6473                                         }
6474                                         else {
6475                                                 // low 16
6476                                                 dstAddr = (other16 << 16) | instruction16;
6477                         if ( reloc->r_extern() ) {
6478                             target.addend = dstAddr;
6479                                                         if ( externSymbolIsThumbDef )
6480                                                                 target.addend &= -2; // remove thumb bit
6481                         }
6482                         else {
6483                             parser.findTargetFromAddress(dstAddr, target);
6484                             if ( target.atom->isThumb() )
6485                                 target.addend &= (-2); // remove thumb bit
6486                         }
6487                                                 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16), target);
6488                                         }
6489                                         result = true;
6490                                 }
6491                                 else
6492                                         throw "for ARM_RELOC_HALF, next reloc is not ARM_RELOC_PAIR";
6493                                 break;
6494                         default:
6495                                 throwf("unknown relocation type %d", reloc->r_type());
6496                                 break;
6497                 }
6498         }
6499         else {
6500                 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
6501                 // file format allows pair to be scattered or not
6502                 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
6503                 nextReloc = &reloc[1];
6504                 srcAddr = sect->addr() + sreloc->r_address();
6505                 dstAddr = sreloc->r_value();
6506                 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + sreloc->r_address());
6507                 instruction = LittleEndian::get32(*fixUpPtr);
6508                 src.atom = this->findAtomByAddress(srcAddr);
6509                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
6510                 bool nextRelocIsPair = false;
6511                 uint32_t nextRelocAddress = 0;
6512                 uint32_t nextRelocValue = 0;
6513                 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
6514                         if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
6515                                 nextRelocIsPair = true;
6516                                 nextRelocAddress = nextReloc->r_address();
6517                                 result = true;
6518                         }
6519                 }
6520                 else {
6521                         if ( nextSReloc->r_type() == ARM_RELOC_PAIR ) {
6522                                 nextRelocIsPair = true;
6523                                 nextRelocAddress = nextSReloc->r_address();
6524                                 nextRelocValue = nextSReloc->r_value();
6525                                 result = true;
6526                         }
6527                 }
6528                 switch ( sreloc->r_type() ) {
6529                         case ARM_RELOC_VANILLA:
6530                                 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
6531                                 if ( sreloc->r_length() != 2 )
6532                                         throw "bad length for ARM_RELOC_VANILLA";
6533                                 target.atom = parser.findAtomByAddress(sreloc->r_value());
6534                                 if ( target.atom == NULL )
6535                                         throwf("bad r_value (0x%08X) for ARM_RELOC_VANILLA\n", sreloc->r_value());
6536                                 contentValue = LittleEndian::get32(*fixUpPtr);
6537                                 target.addend = contentValue - target.atom->_objAddress;
6538                                 if ( target.atom->isThumb() )
6539                                         target.addend &= -2; // remove thumb bit
6540                                 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
6541                                 break;
6542                         case ARM_RELOC_BR24:
6543                                 // Sign-extend displacement
6544                                 displacement = (instruction & 0x00FFFFFF) << 2;
6545                                 if ( (displacement & 0x02000000) != 0 )
6546                                         displacement |= 0xFC000000;
6547                                 // The pc added will be +8 from the pc
6548                                 displacement += 8;
6549                                 // If this is BLX add H << 1
6550                                 if ((instruction & 0xFE000000) == 0xFA000000)
6551                                         displacement += ((instruction & 0x01000000) >> 23);
6552                                 target.atom = parser.findAtomByAddress(sreloc->r_value());
6553                                 target.addend = (int64_t)(srcAddr + displacement) - (int64_t)(target.atom->_objAddress);
6554                                 parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
6555                                 break;
6556                         case ARM_THUMB_RELOC_BR22:
6557                                 // thumb2 added two more bits to displacement, complicating the displacement decoding
6558                                 {
6559                                         uint32_t s = (instruction >> 10) & 0x1;
6560                                         uint32_t j1 = (instruction >> 29) & 0x1;
6561                                         uint32_t j2 = (instruction >> 27) & 0x1;
6562                                         uint32_t imm10 = instruction & 0x3FF;
6563                                         uint32_t imm11 = (instruction >> 16) & 0x7FF;
6564                                         uint32_t i1 = (j1 == s);
6565                                         uint32_t i2 = (j2 == s);
6566                                         uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
6567                                         int32_t sdis = dis;
6568                                         if ( s )
6569                                                 sdis |= 0xFE000000;
6570                                         displacement = sdis;
6571                                 }
6572                                 // The pc added will be +4 from the pc
6573                                 displacement += 4;
6574                                 dstAddr = srcAddr+displacement;
6575                                 // If the instruction was blx, force the low 2 bits to be clear
6576                                 if ((instruction & 0xF8000000) == 0xE8000000)
6577                                         dstAddr &= 0xFFFFFFFC;
6578                                 target.atom = parser.findAtomByAddress(sreloc->r_value());
6579                                 target.addend = dstAddr - target.atom->_objAddress;
6580                                 parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
6581                                 break;
6582                         case ARM_RELOC_SECTDIFF:
6583                         case ARM_RELOC_LOCAL_SECTDIFF:
6584                                 {
6585                                         if ( ! nextRelocIsPair )
6586                                                 throw "ARM_RELOC_SECTDIFF missing following pair";
6587                                         if ( sreloc->r_length() != 2 )
6588                                                 throw "bad length for ARM_RELOC_SECTDIFF";
6589                                         contentValue = LittleEndian::get32(*fixUpPtr);
6590                                         Atom<arm>* fromAtom  = parser.findAtomByAddress(nextRelocValue);
6591                                         uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
6592                                         uint32_t offsetInTarget;
6593                                         Atom<arm>* targetAtom = parser.findAtomByAddressOrLocalTargetOfStub(sreloc->r_value(), &offsetInTarget);
6594                                         // check for addend encoded in the section content
6595                     int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
6596                                         if ( targetAtom->isThumb() )
6597                                                 addend &= -2; // remove thumb bit
6598                                         // if reference to LSDA, add group subordinate fixup
6599                                         if ( targetAtom->contentType() == ld::Atom::typeLSDA ) {
6600                                                 Parser<arm>::SourceLocation     src2;
6601                                                 src2.atom = src.atom;
6602                                                 src2.offsetInAtom = 0;
6603                                                 parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, targetAtom);
6604                                         }
6605                                         if ( addend < 0 ) {
6606                                                 // switch binding base on coalescing
6607                                                 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6608                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
6609                                                 }
6610                                                 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6611                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6612                                                 }
6613                                                 else {
6614                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6615                                                 }
6616                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, offsetInTarget);
6617                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6618                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
6619                                                 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
6620                                         }
6621                                         else {
6622                                                 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6623                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
6624                                                 }
6625                                                 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6626                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6627                                                 }
6628                                                 else {
6629                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6630                                                 }
6631                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)(offsetInTarget+addend));
6632                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6633                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
6634                                                 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
6635                                         }
6636                                 }
6637                                 break;
6638                         case ARM_RELOC_HALF_SECTDIFF:
6639                                 if ( nextRelocIsPair ) {
6640                                         instruction = LittleEndian::get32(*fixUpPtr);
6641                                         Atom<arm>* fromAtom  = parser.findAtomByAddress(nextRelocValue);
6642                                         uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
6643                                         Atom<arm>* targetAtom  = parser.findAtomByAddress(sreloc->r_value());
6644                                         uint32_t offsetInTarget = sreloc->r_value() - targetAtom->_objAddress;
6645                                         uint32_t instruction16;
6646                                         uint32_t other16 = (nextRelocAddress & 0xFFFF);
6647                                         bool isThumb;
6648                                         if ( sreloc->r_length() & 2 ) {
6649                                                 isThumb = true;
6650                                                 uint32_t i =    ((instruction & 0x00000400) >> 10);
6651                                                 uint32_t imm4 =  (instruction & 0x0000000F);
6652                                                 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
6653                                                 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
6654                                                 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
6655                                         }
6656                                         else {
6657                                                 isThumb = false;
6658                                                 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
6659                                                 uint32_t imm12 = (instruction & 0x00000FFF);
6660                                                 instruction16 = (imm4 << 12) | imm12;
6661                                         }
6662                                         if ( sreloc->r_length() & 1 )
6663                                                 dstAddr = ((instruction16 << 16) | other16);
6664                                         else
6665                                                 dstAddr = (other16 << 16) | instruction16;
6666                                         if ( targetAtom->isThumb() )
6667                                                 dstAddr &= (-2); // remove thumb bit
6668                     int32_t addend = dstAddr - (sreloc->r_value() - nextRelocValue);
6669                                         if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6670                                                 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
6671                                         }
6672                                         else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6673                                                 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6674                                         }
6675                                         else {
6676                                                 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6677                                         }
6678                                         parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)offsetInTarget+addend);
6679                                         parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
6680                                         parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
6681                                         if ( sreloc->r_length() & 1 ) {
6682                                                 // high 16
6683                                                 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
6684                                         }
6685                                         else {
6686                                                 // low 16
6687                                                 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
6688                                         }
6689                                         result = true;
6690                                 }
6691                                 else
6692                                         throw "ARM_RELOC_HALF_SECTDIFF reloc missing following pair";
6693                                 break;
6694                         case ARM_RELOC_HALF:
6695                                 if ( nextRelocIsPair ) {
6696                                         instruction = LittleEndian::get32(*fixUpPtr);
6697                                         Atom<arm>* targetAtom  = parser.findAtomByAddress(sreloc->r_value());
6698                                         uint32_t instruction16;
6699                                         uint32_t other16 = (nextRelocAddress & 0xFFFF);
6700                                         bool isThumb;
6701                                         if ( sreloc->r_length() & 2 ) {
6702                                                 isThumb = true;
6703                                                 uint32_t i =    ((instruction & 0x00000400) >> 10);
6704                                                 uint32_t imm4 =  (instruction & 0x0000000F);
6705                                                 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
6706                                                 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
6707                                                 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
6708                                         }
6709                                         else {
6710                                                 isThumb = false;
6711                                                 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
6712                                                 uint32_t imm12 = (instruction & 0x00000FFF);
6713                                                 instruction16 = (imm4 << 12) | imm12;
6714                                         }
6715                                         if ( sreloc->r_length() & 1 )
6716                                                 dstAddr = ((instruction16 << 16) | other16);
6717                                         else
6718                                                 dstAddr = (other16 << 16) | instruction16;
6719                                         if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6720                                                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, targetAtom);
6721                                         }
6722                                         else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6723                                                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6724                                         }
6725                                         else {
6726                                                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6727                                         }
6728                                         parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, dstAddr - targetAtom->_objAddress);
6729                                         if ( sreloc->r_length() & 1 ) {
6730                                                 // high 16
6731                                                 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
6732                                         }
6733                                         else {
6734                                                 // low 16
6735                                                 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
6736                                         }
6737                                         result = true;
6738                                 }
6739                                 else
6740                                         throw "scattered ARM_RELOC_HALF reloc missing following pair";
6741                                 break;
6742                         default:
6743                                 throwf("unknown ARM scattered relocation type %d", sreloc->r_type());
6744                 }
6745         }
6746         return result;
6747 }
6748 #endif
6749
6750
6751 #if SUPPORT_ARCH_arm64
6752 template <>
6753 bool Section<arm64>::addRelocFixup(class Parser<arm64>& parser, const macho_relocation_info<P>* reloc)
6754 {
6755         bool result = false;
6756         Parser<arm64>::SourceLocation   src;
6757         Parser<arm64>::TargetDesc               target = { NULL, NULL, false, 0 };
6758         Parser<arm64>::TargetDesc               toTarget;
6759         int32_t prefixRelocAddend = 0;
6760         if ( reloc->r_type() == ARM64_RELOC_ADDEND ) {
6761                 uint32_t rawAddend = reloc->r_symbolnum();
6762                 prefixRelocAddend = rawAddend;
6763                 if ( rawAddend & 0x00800000 )
6764                         prefixRelocAddend |= 0xFF000000; // sign extend 24-bit signed int to 32-bits
6765                 uint32_t addendAddress = reloc->r_address();
6766                 ++reloc;  //advance to next reloc record
6767                 result = true;
6768                 if ( reloc->r_address() != addendAddress )
6769                         throw "ARM64_RELOC_ADDEND r_address does not match next reloc's r_address";
6770         }
6771         const macho_section<P>* sect = this->machoSection();
6772         uint64_t srcAddr = sect->addr() + reloc->r_address();
6773         src.atom = this->findAtomByAddress(srcAddr);
6774         src.offsetInAtom = srcAddr - src.atom->_objAddress;
6775         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6776         uint64_t contentValue = 0;
6777         const macho_relocation_info<arm64::P>* nextReloc = &reloc[1];
6778         bool useDirectBinding;
6779         uint32_t instruction;
6780         uint32_t encodedAddend;
6781         switch ( reloc->r_length() ) {
6782                 case 0:
6783                         contentValue = *fixUpPtr;
6784                         break;
6785                 case 1:
6786                         contentValue = (int64_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
6787                         break;
6788                 case 2:
6789                         contentValue = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
6790                         break;
6791                 case 3:
6792                         contentValue = E::get64(*((uint64_t*)fixUpPtr));
6793                         break;
6794         }
6795         if ( reloc->r_extern() ) {
6796                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
6797                 const char* symbolName = parser.nameFromSymbol(sym);
6798                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (symbolName[0] == 'L') || (symbolName[0] == 'l')) ) {
6799                         // use direct reference for local symbols
6800                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
6801                         //target.addend += contentValue;
6802                 }
6803                 else if ( ((sym.n_type() & N_TYPE) == N_SECT) && (src.atom->_objAddress <= sym.n_value()) && (sym.n_value() < (src.atom->_objAddress+src.atom->size())) ) {
6804                         // <rdar://problem/13700961> spurious warning when weak function has reference to itself
6805                         // use direct reference when atom targets itself
6806                         target.atom = src.atom;
6807                         target.name = NULL;
6808                 }
6809                 else {
6810                         target.name = symbolName;
6811                         target.weakImport = parser.weakImportFromSymbol(sym);
6812                         //target.addend = contentValue;
6813                 }
6814                 // cfstrings should always use direct reference to backing store
6815                 if ( (this->type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
6816                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
6817                         //target.addend = contentValue;
6818                 }
6819         }
6820         else {
6821                 if ( reloc->r_pcrel()  )
6822                         contentValue += srcAddr;
6823                 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
6824         }
6825         switch ( reloc->r_type() ) {
6826                 case ARM64_RELOC_UNSIGNED:
6827                         if ( reloc->r_pcrel() )
6828                                 throw "pcrel and ARM64_RELOC_UNSIGNED not supported";
6829                         target.addend = contentValue;
6830                         switch ( reloc->r_length() ) {
6831                                 case 0:
6832                                 case 1:
6833                                         throw "length < 2 and ARM64_RELOC_UNSIGNED not supported";
6834                                 case 2:
6835                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
6836                                         break;
6837                                 case 3:
6838                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian64, target);
6839                                         break;
6840                         }
6841                         break;
6842                 case ARM64_RELOC_BRANCH26:
6843                         if ( ! reloc->r_pcrel() )
6844                                 throw "not pcrel and ARM64_RELOC_BRANCH26 not supported";
6845                         if ( ! reloc->r_extern() )
6846                                 throw "r_extern == 0 and ARM64_RELOC_BRANCH26 not supported";
6847                         if ( reloc->r_length() != 2 )
6848                                 throw "r_length != 2 and ARM64_RELOC_BRANCH26 not supported";
6849                         if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
6850                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreARM64DtraceCallSiteNop, false, target.name);
6851                                 parser.addDtraceExtraInfos(src, &target.name[16]);
6852                         }
6853                         else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
6854                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreARM64DtraceIsEnableSiteClear, false, target.name);
6855                                 parser.addDtraceExtraInfos(src, &target.name[20]);
6856                         }
6857                         else {
6858                                 target.addend = prefixRelocAddend;
6859                                 instruction = contentValue;
6860                                 encodedAddend = (instruction & 0x03FFFFFF) << 2;
6861                                 if ( encodedAddend != 0 ) {
6862                                         if ( prefixRelocAddend == 0 ) {
6863                                                 warning("branch26 instruction at 0x%08X has embedded addend. ARM64_RELOC_ADDEND should be used instead", reloc->r_address());
6864                                                 target.addend = encodedAddend;
6865                                         }
6866                                         else {
6867                                                 throwf("branch26 instruction at 0x%08X has embedded addend and ARM64_RELOC_ADDEND also used", reloc->r_address());
6868                                         }
6869                                 }
6870                                 parser.addFixups(src, ld::Fixup::kindStoreARM64Branch26, target);
6871                         }
6872                         break;
6873                 case ARM64_RELOC_PAGE21:
6874                         if ( ! reloc->r_pcrel() )
6875                                 throw "not pcrel and ARM64_RELOC_PAGE21 not supported";
6876                         if ( ! reloc->r_extern() )
6877                                 throw "r_extern == 0 and ARM64_RELOC_PAGE21 not supported";
6878                         if ( reloc->r_length() != 2 )
6879                                 throw "length != 2 and ARM64_RELOC_PAGE21 not supported";
6880                         target.addend = prefixRelocAddend;
6881                         instruction = contentValue;
6882                         encodedAddend  = ((instruction & 0x60000000) >> 29) | ((instruction & 0x01FFFFE0) >> 3);
6883                         encodedAddend *= 4096; // internally addend is in bytes, so scale
6884                         if ( encodedAddend != 0 ) {
6885                                 if ( prefixRelocAddend == 0 ) {
6886                                         warning("adrp instruction at 0x%08X has embedded addend. ARM64_RELOC_ADDEND should be used instead", reloc->r_address());
6887                                         target.addend = encodedAddend;
6888                                 }
6889                                 else {
6890                                         throwf("adrp instruction at 0x%08X has embedded addend and ARM64_RELOC_ADDEND also used", reloc->r_address());
6891                                 }
6892                         }
6893                         parser.addFixups(src, ld::Fixup::kindStoreARM64Page21, target);
6894                         break;
6895                 case ARM64_RELOC_PAGEOFF12:
6896                         if ( reloc->r_pcrel() )
6897                                 throw "pcrel and ARM64_RELOC_PAGEOFF12 not supported";
6898                         if ( ! reloc->r_extern() )
6899                                 throw "r_extern == 0 and ARM64_RELOC_PAGEOFF12 not supported";
6900                         if ( reloc->r_length() != 2 )
6901                                 throw "length != 2 and ARM64_RELOC_PAGEOFF12 not supported";
6902                         target.addend = prefixRelocAddend;
6903                         instruction = contentValue;
6904                         encodedAddend = ((instruction & 0x003FFC00) >> 10);
6905             // internally addend is in bytes.  Some instructions have an implicit scale factor
6906             if ( (instruction & 0x3B000000) == 0x39000000 ) {
6907                 switch ( instruction & 0xC0000000 ) {
6908                     case 0x00000000:
6909                         break;
6910                     case 0x40000000:
6911                         encodedAddend *= 2;
6912                         break;
6913                     case 0x80000000:
6914                         encodedAddend *= 4;
6915                         break;
6916                     case 0xC0000000:
6917                         encodedAddend *= 8;
6918                         break;
6919                                 }
6920             }
6921                         if ( encodedAddend != 0 ) {
6922                                 if ( prefixRelocAddend == 0 ) {
6923                                         warning("pageoff12 instruction at 0x%08X has embedded addend. ARM64_RELOC_ADDEND should be used instead", reloc->r_address());
6924                                         target.addend = encodedAddend;
6925                                 }
6926                                 else {
6927                                         throwf("pageoff12 instruction at 0x%08X has embedded addend and ARM64_RELOC_ADDEND also used", reloc->r_address());
6928                                 }
6929                         }
6930                         parser.addFixups(src, ld::Fixup::kindStoreARM64PageOff12, target);
6931                         break;
6932                 case ARM64_RELOC_GOT_LOAD_PAGE21:
6933                         if ( ! reloc->r_pcrel() )
6934                                 throw "not pcrel and ARM64_RELOC_GOT_LOAD_PAGE21 not supported";
6935                         if ( ! reloc->r_extern() )
6936                                 throw "r_extern == 0 and ARM64_RELOC_GOT_LOAD_PAGE21 not supported";
6937                         if ( reloc->r_length() != 2 )
6938                                 throw "length != 2 and ARM64_RELOC_GOT_LOAD_PAGE21 not supported";
6939                         if ( prefixRelocAddend != 0 )
6940                                 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_GOT_LOAD_PAGE21 not supported";
6941                         instruction = contentValue;
6942                         target.addend = ((instruction & 0x60000000) >> 29) | ((instruction & 0x01FFFFE0) >> 3);
6943             if ( target.addend != 0 )
6944                 throw "non-zero addend with ARM64_RELOC_GOT_LOAD_PAGE21 is not supported";
6945                         parser.addFixups(src, ld::Fixup::kindStoreARM64GOTLoadPage21, target);
6946                         break;
6947                 case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
6948                         if ( reloc->r_pcrel() )
6949                                 throw "pcrel and ARM64_RELOC_GOT_LOAD_PAGEOFF12 not supported";
6950                         if ( ! reloc->r_extern() )
6951                                 throw "r_extern == 0 and ARM64_RELOC_GOT_LOAD_PAGEOFF12 not supported";
6952                         if ( reloc->r_length() != 2 )
6953                                 throw "length != 2 and ARM64_RELOC_GOT_LOAD_PAGEOFF12 not supported";
6954                         if ( prefixRelocAddend != 0 )
6955                                 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_GOT_LOAD_PAGEOFF12 not supported";
6956                         instruction = contentValue;
6957                         target.addend = ((instruction & 0x003FFC00) >> 10);
6958                         parser.addFixups(src, ld::Fixup::kindStoreARM64GOTLoadPageOff12, target);
6959                         break;
6960                 case ARM64_RELOC_TLVP_LOAD_PAGE21:
6961                         if ( ! reloc->r_pcrel() )
6962                                 throw "not pcrel and ARM64_RELOC_TLVP_LOAD_PAGE21 not supported";
6963                         if ( ! reloc->r_extern() )
6964                                 throw "r_extern == 0 and ARM64_RELOC_TLVP_LOAD_PAGE21 not supported";
6965                         if ( reloc->r_length() != 2 )
6966                                 throw "length != 2 and ARM64_RELOC_TLVP_LOAD_PAGE21 not supported";
6967                         if ( prefixRelocAddend != 0 )
6968                                 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_TLVP_LOAD_PAGE21 not supported";
6969                         instruction = contentValue;
6970                         target.addend = ((instruction & 0x60000000) >> 29) | ((instruction & 0x01FFFFE0) >> 3);
6971             if ( target.addend != 0 )
6972                 throw "non-zero addend with ARM64_RELOC_GOT_LOAD_PAGE21 is not supported";
6973                         parser.addFixups(src, ld::Fixup::kindStoreARM64TLVPLoadPage21, target);
6974                         break;
6975                 case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
6976                         if ( reloc->r_pcrel() )
6977                                 throw "pcrel and ARM64_RELOC_TLVP_LOAD_PAGEOFF12 not supported";
6978                         if ( ! reloc->r_extern() )
6979                                 throw "r_extern == 0 and ARM64_RELOC_TLVP_LOAD_PAGEOFF12 not supported";
6980                         if ( reloc->r_length() != 2 )
6981                                 throw "length != 2 and ARM64_RELOC_TLVP_LOAD_PAGEOFF12 not supported";
6982                         if ( prefixRelocAddend != 0 )
6983                                 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_TLVP_LOAD_PAGEOFF12 not supported";
6984                         instruction = contentValue;
6985                         target.addend = ((instruction & 0x003FFC00) >> 10);
6986                         parser.addFixups(src, ld::Fixup::kindStoreARM64TLVPLoadPageOff12, target);
6987                         break;
6988                 case ARM64_RELOC_SUBTRACTOR:
6989                         if ( reloc->r_pcrel() )
6990                                 throw "ARM64_RELOC_SUBTRACTOR cannot be pc-relative";
6991                         if ( reloc->r_length() < 2 )
6992                                 throw "ARM64_RELOC_SUBTRACTOR must have r_length of 2 or 3";
6993                         if ( !reloc->r_extern() )
6994                                 throw "ARM64_RELOC_SUBTRACTOR must have r_extern=1";
6995                         if ( nextReloc->r_type() != ARM64_RELOC_UNSIGNED )
6996                                 throw "ARM64_RELOC_SUBTRACTOR must be followed by ARM64_RELOC_UNSIGNED";
6997                         if ( prefixRelocAddend != 0 )
6998                                 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_SUBTRACTOR not supported";
6999                         result = true;
7000                         if ( nextReloc->r_pcrel() )
7001                                 throw "ARM64_RELOC_UNSIGNED following a ARM64_RELOC_SUBTRACTOR cannot be pc-relative";
7002                         if ( nextReloc->r_length() != reloc->r_length() )
7003                                 throw "ARM64_RELOC_UNSIGNED following a ARM64_RELOC_SUBTRACTOR must have same r_length";
7004                         if ( nextReloc->r_extern() ) {
7005                                 const macho_nlist<P>& sym = parser.symbolFromIndex(nextReloc->r_symbolnum());
7006                                 // use direct reference for local symbols
7007                                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
7008                                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), toTarget);
7009                                         toTarget.addend = contentValue;
7010                                         useDirectBinding = true;
7011                                 }
7012                                 else {
7013                                         toTarget.name = parser.nameFromSymbol(sym);
7014                                         toTarget.weakImport = parser.weakImportFromSymbol(sym);
7015                                         toTarget.addend = contentValue;
7016                                         useDirectBinding = false;
7017                                 }
7018                         }
7019                         else {
7020                                 parser.findTargetFromAddressAndSectionNum(contentValue, nextReloc->r_symbolnum(), toTarget);
7021                                 useDirectBinding = (toTarget.atom->scope() == ld::Atom::scopeTranslationUnit);
7022                         }
7023                         if ( useDirectBinding )
7024                                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.atom);
7025                         else
7026                                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.weakImport, toTarget.name);
7027                         parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, toTarget.addend);
7028                         if ( target.atom == NULL )
7029                                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, false, target.name);
7030                         else
7031                                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, target.atom);
7032                         if ( reloc->r_length() == 2 )
7033                                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
7034                         else
7035                                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian64);
7036                         break;
7037         case ARM64_RELOC_POINTER_TO_GOT:
7038                         if ( ! reloc->r_extern() )
7039                                 throw "r_extern == 0 and ARM64_RELOC_POINTER_TO_GOT not supported";
7040                         if ( prefixRelocAddend != 0 )
7041                                 throw "ARM64_RELOC_ADDEND followed by ARM64_RELOC_POINTER_TO_GOT not supported";
7042                         if ( reloc->r_pcrel() ) {
7043                 if ( reloc->r_length() != 2 )
7044                     throw "r_length != 2 and r_extern = 1 and ARM64_RELOC_POINTER_TO_GOT not supported";
7045                 parser.addFixups(src, ld::Fixup::kindStoreARM64PCRelToGOT, target);
7046             }
7047             else {
7048                 if ( reloc->r_length() != 3 )
7049                     throw "r_length != 3 and r_extern = 0 and ARM64_RELOC_POINTER_TO_GOT not supported";
7050                 parser.addFixups(src, ld::Fixup::kindStoreARM64PointerToGOT, target);
7051             }
7052             break;
7053                 default:
7054                         throwf("unknown relocation type %d", reloc->r_type());
7055         }
7056         return result;
7057 }
7058 #endif
7059
7060 template <typename A>
7061 bool ObjC1ClassSection<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
7062 {
7063         // inherited
7064         FixedSizeSection<A>::addRelocFixup(parser, reloc);
7065
7066         assert(0 && "needs template specialization");
7067         return false;
7068 }
7069
7070 template <>
7071 bool ObjC1ClassSection<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
7072 {
7073         // if this is the reloc for the super class name string, add implicit reference to super class
7074         if ( ((reloc->r_address() & R_SCATTERED) == 0) && (reloc->r_type() == GENERIC_RELOC_VANILLA) ) {
7075                 assert( reloc->r_length() == 2 );
7076                 assert( ! reloc->r_pcrel() );
7077
7078                 const macho_section<P>* sect = this->machoSection();
7079                 Parser<x86>::SourceLocation     src;
7080                 uint32_t srcAddr = sect->addr() + reloc->r_address();
7081                 src.atom = this->findAtomByAddress(srcAddr);
7082                 src.offsetInAtom = srcAddr - src.atom->objectAddress();
7083                 if ( src.offsetInAtom == 4 ) {
7084                         Parser<x86>::TargetDesc         stringTarget;
7085                         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
7086                         uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
7087                         parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
7088
7089                         assert(stringTarget.atom != NULL);
7090                         assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
7091                         const char* superClassBaseName = (char*)stringTarget.atom->rawContentPointer();
7092                         char* superClassName = new char[strlen(superClassBaseName) + 20];
7093                         strcpy(superClassName, ".objc_class_name_");
7094                         strcat(superClassName, superClassBaseName);
7095
7096                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, superClassName);
7097                 }
7098         }
7099         // inherited
7100         return FixedSizeSection<x86>::addRelocFixup(parser, reloc);
7101 }
7102
7103
7104
7105 template <typename A>
7106 bool Objc1ClassReferences<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
7107 {
7108         // inherited
7109         PointerToCStringSection<A>::addRelocFixup(parser, reloc);
7110
7111         assert(0 && "needs template specialization");
7112         return false;
7113 }
7114
7115
7116
7117 template <>
7118 bool Objc1ClassReferences<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
7119 {
7120         // add implict class refs, fixups not usable yet, so look at relocations
7121         assert( (reloc->r_address() & R_SCATTERED) == 0 );
7122         assert( reloc->r_type() == GENERIC_RELOC_VANILLA );
7123         assert( reloc->r_length() == 2 );
7124         assert( ! reloc->r_pcrel() );
7125
7126         const macho_section<P>* sect = this->machoSection();
7127         Parser<x86>::SourceLocation     src;
7128         uint32_t srcAddr = sect->addr() + reloc->r_address();
7129         src.atom = this->findAtomByAddress(srcAddr);
7130         src.offsetInAtom = srcAddr - src.atom->objectAddress();
7131         Parser<x86>::TargetDesc         stringTarget;
7132         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
7133         uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
7134         parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
7135
7136         assert(stringTarget.atom != NULL);
7137         assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
7138         const char* baseClassName = (char*)stringTarget.atom->rawContentPointer();
7139         char* objcClassName = new char[strlen(baseClassName) + 20];
7140         strcpy(objcClassName, ".objc_class_name_");
7141         strcat(objcClassName, baseClassName);
7142
7143         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, objcClassName);
7144
7145         // inherited
7146         return PointerToCStringSection<x86>::addRelocFixup(parser, reloc);
7147 }
7148
7149 #if SUPPORT_ARCH_arm64
7150 template <>
7151 void Section<arm64>::addLOH(class Parser<arm64>& parser, int kind, int count, const uint64_t addrs[]) {
7152         switch (kind) {
7153                 case LOH_ARM64_ADRP_ADRP:
7154                 case LOH_ARM64_ADRP_LDR:
7155                 case LOH_ARM64_ADRP_ADD:
7156                 case LOH_ARM64_ADRP_LDR_GOT:
7157                         if ( count != 2 )
7158                                 warning("arm64 Linker Optimiztion Hint %d has wrong number of arguments", kind);
7159                         break;
7160                 case LOH_ARM64_ADRP_ADD_LDR:
7161                 case LOH_ARM64_ADRP_LDR_GOT_LDR:
7162                 case LOH_ARM64_ADRP_ADD_STR:
7163                 case LOH_ARM64_ADRP_LDR_GOT_STR:
7164                         if ( count != 3 )
7165                                 warning("arm64 Linker Optimiztion Hint %d has wrong number of arguments", kind);
7166         }
7167
7168         // pick lowest address in tuple for use as offsetInAtom
7169         uint64_t lowestAddress = addrs[0];
7170         for(int i=1; i < count; ++i) {
7171                 if ( addrs[i] < lowestAddress )
7172                         lowestAddress = addrs[i];
7173         }
7174         // verify all other address are in same atom
7175         Atom<arm64>* inAtom = parser.findAtomByAddress(lowestAddress);
7176         const uint64_t atomStartAddr = inAtom->objectAddress();
7177         const uint64_t atomEndAddr = atomStartAddr + inAtom->size();
7178         for(int i=0; i < count; ++i) {
7179                 if ( (addrs[i] < atomStartAddr) || (addrs[i] >= atomEndAddr) ) {
7180                         warning("arm64 Linker Optimiztion Hint addresses are not in same atom: 0x%08llX and 0x%08llX",
7181                                 lowestAddress, addrs[i]);
7182                         return; // skip this LOH
7183                 }
7184                 if ( (addrs[i] & 0x3) != 0 ) {
7185                         warning("arm64 Linker Optimiztion Hint address is not 4-byte aligned: 0x%08llX", addrs[i]);
7186                         return; // skip this LOH
7187                 }
7188                 if ( (addrs[i] - lowestAddress) > 0xFFFF ) {
7189                         if ( parser.verboseOptimizationHints() ) {
7190                                 warning("arm64 Linker Optimiztion Hint addresses are too far apart: 0x%08llX and 0x%08llX",
7191                                         lowestAddress, addrs[i]);
7192                         }
7193                         return; // skip this LOH
7194                 }
7195         }
7196
7197         // encoded kind, count, and address deltas in 64-bit addend
7198         ld::Fixup::LOH_arm64 extra;
7199         extra.addend = 0;
7200         extra.info.kind = kind;
7201         extra.info.count = count-1;
7202         extra.info.delta1 = (addrs[0] - lowestAddress) >> 2;
7203         extra.info.delta2 = (count > 1) ? ((addrs[1] - lowestAddress) >> 2) : 0;
7204         extra.info.delta3 = (count > 2) ? ((addrs[2] - lowestAddress) >> 2) : 0;
7205         extra.info.delta4 = (count > 3) ? ((addrs[3] - lowestAddress) >> 2) : 0;
7206         typename Parser<arm64>::SourceLocation src(inAtom, lowestAddress- inAtom->objectAddress());
7207         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindLinkerOptimizationHint, extra.addend);
7208 }
7209 #endif
7210
7211 template <typename A>
7212 void Section<A>::addLOH(class Parser<A>& parser, int kind, int count, const uint64_t addrs[]) {
7213
7214 }
7215
7216 template <typename A>
7217 void Section<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
7218 {
7219         const macho_section<P>* sect = this->machoSection();
7220         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + sect->reloff());
7221         const uint32_t relocCount = sect->nreloc();
7222         for (uint32_t r = 0; r < relocCount; ++r) {
7223                 try {
7224                         if ( this->addRelocFixup(parser, &relocs[r]) )
7225                                 ++r; // skip next
7226                 }
7227                 catch (const char* msg) {
7228                         throwf("in section %s,%s reloc %u: %s", sect->segname(), Section<A>::makeSectionName(sect), r, msg);
7229                 }
7230         }
7231
7232         // add follow-on fixups if .o file is missing .subsections_via_symbols
7233         if ( this->addFollowOnFixups() ) {
7234                 Atom<A>* end = &_endAtoms[-1];
7235                 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
7236                         typename Parser<A>::SourceLocation src(p, 0);
7237                         Atom<A>* nextAtom = &p[1];
7238                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
7239                 }
7240         }
7241         else if ( this->type() == ld::Section::typeCode ) {
7242                 // if FDE broke text not at a symbol, use followOn to keep code together
7243                 Atom<A>* end = &_endAtoms[-1];
7244                 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
7245                         typename Parser<A>::SourceLocation src(p, 0);
7246                         Atom<A>* nextAtom = &p[1];
7247                         if ( (p->symbolTableInclusion() == ld::Atom::symbolTableIn) && (nextAtom->symbolTableInclusion() == ld::Atom::symbolTableNotIn) ) {
7248                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
7249                         }
7250                 }
7251         }
7252         if ( !this->_altEntries.empty() && !this->addFollowOnFixups() ) {
7253                 if ( _altEntries.count(_beginAtoms) != 0 )
7254                         warning("N_ALT_ENTRY bit set on first atom in section %s/%s", sect->segname(), Section<A>::makeSectionName(sect));
7255
7256                 Atom<A>* end = &_endAtoms[-1];
7257                 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
7258                         Atom<A>* nextAtom = &p[1];
7259                         if ( _altEntries.count(nextAtom) != 0 ) {
7260                                 typename Parser<A>::SourceLocation src(p, 0);
7261                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
7262                                 typename Parser<A>::SourceLocation src2(nextAtom, 0);
7263                                 parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinate, p);
7264                         }
7265                 }
7266         }
7267
7268         // <rdar://problem/9218847> track data-in-code
7269         if ( parser.hasDataInCodeLabels() && (this->type() == ld::Section::typeCode) ) {
7270                 for (uint32_t i=0; i < parser.symbolCount(); ++i) {
7271                         const macho_nlist<P>& sym =     parser.symbolFromIndex(i);
7272                         // ignore stabs
7273                         if ( (sym.n_type() & N_STAB) != 0 )
7274                                 continue;
7275                         // ignore non-definitions
7276                         if ( (sym.n_type() & N_TYPE) != N_SECT )
7277                                 continue;
7278
7279                         // 'L' labels do not denote atom breaks
7280                         const char* symbolName = parser.nameFromSymbol(sym);
7281                         if ( symbolName[0] == 'L' ) {
7282                                 if ( strncmp(symbolName, "L$start$", 8) == 0 ) {
7283                                         ld::Fixup::Kind kind = ld::Fixup::kindNone;
7284                                         if ( strncmp(&symbolName[8], "data$", 5) == 0 )
7285                                                 kind = ld::Fixup::kindDataInCodeStartData;
7286                                         else if ( strncmp(&symbolName[8], "code$", 5) == 0 )
7287                                                 kind = ld::Fixup::kindDataInCodeEnd;
7288                                         else if ( strncmp(&symbolName[8], "jt8$", 4) == 0 )
7289                                                 kind = ld::Fixup::kindDataInCodeStartJT8;
7290                                         else if ( strncmp(&symbolName[8], "jt16$", 4) == 0 )
7291                                                 kind = ld::Fixup::kindDataInCodeStartJT16;
7292                                         else if ( strncmp(&symbolName[8], "jt32$", 4) == 0 )
7293                                                 kind = ld::Fixup::kindDataInCodeStartJT32;
7294                                         else if ( strncmp(&symbolName[8], "jta32$", 4) == 0 )
7295                                                 kind = ld::Fixup::kindDataInCodeStartJTA32;
7296                                         else
7297                                                 warning("unknown L$start$ label %s in file %s", symbolName, this->file().path());
7298                                         if ( kind != ld::Fixup::kindNone ) {
7299                                                 Atom<A>* inAtom = parser.findAtomByAddress(sym.n_value());
7300                                                 typename Parser<A>::SourceLocation src(inAtom, sym.n_value() - inAtom->objectAddress());
7301                                                 parser.addFixup(src, ld::Fixup::k1of1, kind);
7302                                         }
7303                                 }
7304                         }
7305                 }
7306         }
7307
7308         // <rdar://problem/11150575> Handle LC_DATA_IN_CODE in object files
7309         if ( this->type() == ld::Section::typeCode ) {
7310                 const pint_t startAddr = this->_machOSection->addr();
7311                 const pint_t endAddr = startAddr + this->_machOSection->size();
7312                 for ( const macho_data_in_code_entry<P>* p = parser.dataInCodeStart(); p != parser.dataInCodeEnd(); ++p ) {
7313                         if ( (p->offset() >= startAddr) && (p->offset() < endAddr) ) {
7314                                 ld::Fixup::Kind kind = ld::Fixup::kindNone;
7315                                 switch ( p->kind() ) {
7316                                         case DICE_KIND_DATA:
7317                                                 kind = ld::Fixup::kindDataInCodeStartData;
7318                                                 break;
7319                                         case DICE_KIND_JUMP_TABLE8:
7320                                                 kind = ld::Fixup::kindDataInCodeStartJT8;
7321                                                 break;
7322                                         case DICE_KIND_JUMP_TABLE16:
7323                                                 kind = ld::Fixup::kindDataInCodeStartJT16;
7324                                                 break;
7325                                         case DICE_KIND_JUMP_TABLE32:
7326                                                 kind = ld::Fixup::kindDataInCodeStartJT32;
7327                                                 break;
7328                                         case DICE_KIND_ABS_JUMP_TABLE32:
7329                                                 kind = ld::Fixup::kindDataInCodeStartJTA32;
7330                                                 break;
7331                                         default:
7332                                                 kind = ld::Fixup::kindDataInCodeStartData;
7333                                                 warning("uknown LC_DATA_IN_CODE kind (%d) at offset 0x%08X", p->kind(), p->offset());
7334                                                 break;
7335                                 }
7336                                 Atom<A>* inAtom = parser.findAtomByAddress(p->offset());
7337                                 typename Parser<A>::SourceLocation srcStart(inAtom, p->offset() - inAtom->objectAddress());
7338                                 parser.addFixup(srcStart, ld::Fixup::k1of1, kind);
7339                                 typename Parser<A>::SourceLocation srcEnd(inAtom, p->offset() + p->length() - inAtom->objectAddress());
7340                                 parser.addFixup(srcEnd, ld::Fixup::k1of1, ld::Fixup::kindDataInCodeEnd);
7341                         }
7342                 }
7343         }
7344
7345         // <rdar://problem/11945700> convert linker optimization hints into internal format
7346         if ( this->type() == ld::Section::typeCode && parser.hasOptimizationHints() ) {
7347                 const pint_t startAddr = this->_machOSection->addr();
7348                 const pint_t endAddr = startAddr + this->_machOSection->size();
7349                 for (const uint8_t* p = parser.optimizationHintsStart(); p < parser.optimizationHintsEnd(); ) {
7350                         uint64_t addrs[4];
7351                         int32_t kind = read_uleb128(&p, parser.optimizationHintsEnd());
7352                         if ( kind == 0 ) // padding at end of loh buffer
7353                                 break;
7354                         if ( kind == -1 ) {
7355                                 warning("malformed uleb128 kind in LC_LINKER_OPTIMIZATION_HINTS");
7356                                 break;
7357                         }
7358                         int32_t count = read_uleb128(&p, parser.optimizationHintsEnd());
7359                         if ( count == -1 ) {
7360                                 warning("malformed uleb128 count in LC_LINKER_OPTIMIZATION_HINTS");
7361                                 break;
7362                         }
7363                         if ( count > 3 ) {
7364                                 warning("address count > 3 in LC_LINKER_OPTIMIZATION_HINTS");
7365                                 break;
7366                         }
7367                         for (int32_t i=0; i < count; ++i) {
7368                                 addrs[i] = read_uleb128(&p, parser.optimizationHintsEnd());
7369                         }
7370                         if ( (startAddr <= addrs[0]) && (addrs[0] < endAddr) ) {
7371                                 this->addLOH(parser, kind, count, addrs);
7372                                 //fprintf(stderr, "kind=%d", kind);
7373                                 //for (int32_t i=0; i < count; ++i) {
7374                                 //      fprintf(stderr, ", addr=0x%08llX", addrs[i]);
7375                                 //}
7376                                 //fprintf(stderr, "\n");
7377                         }
7378                 }
7379         }
7380
7381
7382         // add follow-on fixups for aliases
7383         if ( _hasAliases ) {
7384                 for(Atom<A>* p = _beginAtoms; p < _endAtoms; ++p) {
7385                         if ( p->isAlias() && ! this->addFollowOnFixups() ) {
7386                                 Atom<A>* targetOfAlias = &p[1];
7387                                 assert(p < &_endAtoms[-1]);
7388                                 assert(p->_objAddress == targetOfAlias->_objAddress);
7389                                 typename Parser<A>::SourceLocation src(p, 0);
7390                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, targetOfAlias);
7391                         }
7392                 }
7393         }
7394 }
7395
7396
7397
7398 //
7399 // main function used by linker to instantiate ld::Files
7400 //
7401 ld::relocatable::File* parse(const uint8_t* fileContent, uint64_t fileLength,
7402                                                          const char* path, time_t modTime, ld::File::Ordinal ordinal, const ParserOptions& opts)
7403 {
7404         switch ( opts.architecture ) {
7405 #if SUPPORT_ARCH_x86_64
7406                 case CPU_TYPE_X86_64:
7407                         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) )
7408                                 return mach_o::relocatable::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
7409                         break;
7410 #endif
7411 #if SUPPORT_ARCH_i386
7412                 case CPU_TYPE_I386:
7413                         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) )
7414                                 return mach_o::relocatable::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
7415                         break;
7416 #endif
7417 #if SUPPORT_ARCH_arm_any
7418                 case CPU_TYPE_ARM:
7419                         if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) )
7420                                 return mach_o::relocatable::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
7421                         break;
7422 #endif
7423 #if SUPPORT_ARCH_arm64
7424                 case CPU_TYPE_ARM64:
7425                         if ( mach_o::relocatable::Parser<arm64>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) )
7426                                 return mach_o::relocatable::Parser<arm64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
7427                         break;
7428 #endif
7429         }
7430         return NULL;
7431 }
7432
7433 //
7434 // used by archive reader to validate member object file
7435 //
7436 bool isObjectFile(const uint8_t* fileContent, uint64_t fileLength, const ParserOptions& opts)
7437 {
7438         switch ( opts.architecture ) {
7439                 case CPU_TYPE_X86_64:
7440                         return ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) );
7441                 case CPU_TYPE_I386:
7442                         return ( mach_o::relocatable::Parser<x86>::validFile(fileContent) );
7443                 case CPU_TYPE_ARM:
7444                         return ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) );
7445                 case CPU_TYPE_ARM64:
7446                         return ( mach_o::relocatable::Parser<arm64>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) );
7447         }
7448         return false;
7449 }
7450
7451 //
7452 // used by linker to infer architecture when no -arch is on command line
7453 //
7454 bool isObjectFile(const uint8_t* fileContent, cpu_type_t* result, cpu_subtype_t* subResult)
7455 {
7456         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
7457                 *result = CPU_TYPE_X86_64;
7458                 const macho_header<Pointer64<LittleEndian> >* header = (const macho_header<Pointer64<LittleEndian> >*)fileContent;
7459                 *subResult = header->cpusubtype();
7460                 return true;
7461         }
7462         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
7463                 *result = CPU_TYPE_I386;
7464                 *subResult = CPU_SUBTYPE_X86_ALL;
7465                 return true;
7466         }
7467         if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
7468                 *result = CPU_TYPE_ARM;
7469                 const macho_header<Pointer32<LittleEndian> >* header = (const macho_header<Pointer32<LittleEndian> >*)fileContent;
7470                 *subResult = header->cpusubtype();
7471                 return true;
7472         }
7473         if ( mach_o::relocatable::Parser<arm64>::validFile(fileContent, false, 0) ) {
7474                 *result = CPU_TYPE_ARM64;
7475                 *subResult = CPU_SUBTYPE_ARM64_ALL;
7476                 return true;
7477         }
7478         return false;
7479 }
7480
7481 //
7482 // used by linker is error messages to describe bad .o file
7483 //
7484 const char* archName(const uint8_t* fileContent)
7485 {
7486         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
7487                 return mach_o::relocatable::Parser<x86_64>::fileKind(fileContent);
7488         }
7489         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
7490                 return mach_o::relocatable::Parser<x86>::fileKind(fileContent);
7491         }
7492         if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
7493                 return mach_o::relocatable::Parser<arm>::fileKind(fileContent);
7494         }
7495         return NULL;
7496 }
7497
7498 //
7499 // Used by archive reader when -ObjC option is specified
7500 //
7501 bool hasObjC2Categories(const uint8_t* fileContent)
7502 {
7503         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
7504                 return mach_o::relocatable::Parser<x86_64>::hasObjC2Categories(fileContent);
7505         }
7506         else if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
7507                 return mach_o::relocatable::Parser<arm>::hasObjC2Categories(fileContent);
7508         }
7509         else if ( mach_o::relocatable::Parser<x86>::validFile(fileContent, false, 0) ) {
7510                 return mach_o::relocatable::Parser<x86>::hasObjC2Categories(fileContent);
7511         }
7512 #if SUPPORT_ARCH_arm64
7513     else if ( mach_o::relocatable::Parser<arm64>::validFile(fileContent, false, 0) ) {
7514         return mach_o::relocatable::Parser<arm64>::hasObjC2Categories(fileContent);
7515     }
7516 #endif
7517         return false;
7518 }
7519
7520 //
7521 // Used by archive reader when -ObjC option is specified
7522 //
7523 bool hasObjC1Categories(const uint8_t* fileContent)
7524 {
7525         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent, false, 0) ) {
7526                 return mach_o::relocatable::Parser<x86>::hasObjC1Categories(fileContent);
7527         }
7528         return false;
7529 }
7530
7531
7532
7533 } // namespace relocatable
7534 } // namespace mach_o
7535
7536