src/ld/parsers/macho_relocatable_file.cpp

   1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
   2  *
   3  * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
   4  *
   5  * @APPLE_LICENSE_HEADER_START@
   6  *
   7  * This file contains Original Code and/or Modifications of Original Code
   8  * as defined in and that are subject to the Apple Public Source License
   9  * Version 2.0 (the 'License'). You may not use this file except in
  10  * compliance with the License. Please obtain a copy of the License at
  11  * http://www.opensource.apple.com/apsl/ and read it before using this
  12  * file.
  13  *
  14  * The Original Code and all software distributed under the License are
  15  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  19  * Please see the License for the specific language governing rights and
  20  * limitations under the License.
  21  *
  22  * @APPLE_LICENSE_HEADER_END@
  23  */
  24
  25
  26 #include <stdint.h>
  27 #include <stdlib.h>
  28 #include <math.h>
  29 #include <unistd.h>
  30 #include <fcntl.h>
  31 #include <sys/param.h>
  32 #include <sys/stat.h>
  33 #include <sys/mman.h>
  34
  35 #include "MachOFileAbstraction.hpp"
  36
  37 #include "libunwind/DwarfInstructions.hpp"
  38 #include "libunwind/AddressSpace.hpp"
  39 #include "libunwind/Registers.hpp"
  40
  41 #include <vector>
  42 #include <set>
  43 #include <map>
  44 #include <algorithm>
  45
  46 #include "dwarf2.h"
  47 #include "debugline.h"
  48
  49 #include "Architectures.hpp"
  50 #include "ld.hpp"
  51 #include "macho_relocatable_file.h"
  52
  53
  54
  55 extern void throwf(const char* format, ...) __attribute__ ((noreturn,format(printf, 1, 2)));
  56 extern void warning(const char* format, ...) __attribute__((format(printf, 1, 2)));
  57
  58 namespace mach_o {
  59 namespace relocatable {
  60
  61
  62 // forward reference
  63 template <typename A> class Parser;
  64 template <typename A> class Atom;
  65 template <typename A> class Section;
  66 template <typename A> class CFISection;
  67 template <typename A> class CUSection;
  68
  69 template <typename A>
  70 class File : public ld::relocatable::File
  71 {
  72 public:
  73                                                                                         File(const char* p, time_t mTime, const uint8_t* content, ld::File::Ordinal ord) :
  74                                                                                                 ld::relocatable::File(p,mTime,ord), _fileContent(content),
  75                                                                                                 _sectionsArray(NULL), _atomsArray(NULL),
  76                                                                                                 _sectionsArrayCount(0), _atomsArrayCount(0),
  77                                                                                                 _debugInfoKind(ld::relocatable::File::kDebugInfoNone),
  78                                                                                                 _dwarfTranslationUnitPath(NULL),
  79                                                                                                 _dwarfDebugInfoSect(NULL), _dwarfDebugAbbrevSect(NULL),
  80                                                                                                 _dwarfDebugLineSect(NULL), _dwarfDebugStringSect(NULL),
  81                                                                                                 _objConstraint(ld::File::objcConstraintNone),
  82                                                                                                 _cpuSubType(0),
  83                                                                                                 _canScatterAtoms(false) {}
  84         virtual                                                                 ~File();
  85
  86         // overrides of ld::File
  87         virtual bool                                                                            forEachAtom(ld::File::AtomHandler&) const;
  88         virtual bool                                                                            justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const
  89                                                                                                                                                                         { return false; }
  90
  91         // overrides of ld::relocatable::File
  92         virtual ObjcConstraint                                                          objCConstraint() const                  { return _objConstraint; }
  93         virtual uint32_t                                                                        cpuSubType() const                              { return _cpuSubType; }
  94         virtual DebugInfoKind                                                           debugInfo() const                               { return _debugInfoKind; }
  95         virtual const std::vector<ld::relocatable::File::Stab>* stabs() const                                   { return &_stabs; }
  96         virtual bool                                                                            canScatterAtoms() const                 { return _canScatterAtoms; }
  97         virtual const char*                                                                     translationUnitSource() const;
  98
  99         const uint8_t*                                                                          fileContent()                                   { return _fileContent; }
 100 private:
 101         friend class Atom<A>;
 102         friend class Section<A>;
 103         friend class Parser<A>;
 104         friend class CFISection<A>::OAS;
 105
 106         typedef typename A::P                                   P;
 107
 108         const uint8_t*                                                  _fileContent;
 109         Section<A>**                                                    _sectionsArray;
 110         uint8_t*                                                                _atomsArray;
 111         uint32_t                                                                _sectionsArrayCount;
 112         uint32_t                                                                _atomsArrayCount;
 113         std::vector<ld::Fixup>                                  _fixups;
 114         std::vector<ld::Atom::UnwindInfo>               _unwindInfos;
 115         std::vector<ld::Atom::LineInfo>                 _lineInfos;
 116         std::vector<ld::relocatable::File::Stab>_stabs;
 117         ld::relocatable::File::DebugInfoKind    _debugInfoKind;
 118         const char*                                                             _dwarfTranslationUnitPath;
 119         const macho_section<P>*                                 _dwarfDebugInfoSect;
 120         const macho_section<P>*                                 _dwarfDebugAbbrevSect;
 121         const macho_section<P>*                                 _dwarfDebugLineSect;
 122         const macho_section<P>*                                 _dwarfDebugStringSect;
 123         ld::File::ObjcConstraint                                _objConstraint;
 124         uint32_t                                                                _cpuSubType;
 125         bool                                                                    _canScatterAtoms;
 126 };
 127
 128
 129 template <typename A>
 130 class Section : public ld::Section
 131 {
 132 public:
 133         typedef typename A::P::uint_t   pint_t;
 134         typedef typename A::P                   P;
 135         typedef typename A::P::E                E;
 136
 137         virtual                                                 ~Section()                                      { }
 138         class File<A>&                                  file() const                            { return _file; }
 139         const macho_section<P>*                 machoSection() const            { return _machOSection; }
 140         uint32_t                                                sectionNum(class Parser<A>&) const;
 141         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr);
 142         virtual ld::Atom::ContentType   contentType()                           { return ld::Atom::typeUnclassified; }
 143         virtual bool                                    dontDeadStrip()                         { return (this->_machOSection->flags() & S_ATTR_NO_DEAD_STRIP); }
 144         virtual Atom<A>*                                findAtomByAddress(pint_t addr) { return this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms); }
 145         virtual bool                                    addFollowOnFixups() const       { return ! _file.canScatterAtoms(); }
 146         virtual uint32_t                                appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 147                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 148                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
 149         virtual uint32_t                                computeAtomCount(class Parser<A>& parser,
 150                                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 151                                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
 152         virtual void                                    makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 153         virtual bool                                    addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
 154         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const { return 0; }
 155         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 156                                                                                                         const ld::IndirectBindingTable& ind) const { return false; }
 157         static const char*                              makeSectionName(const macho_section<typename A::P>* s);
 158
 159 protected:
 160                                                 Section(File<A>& f, const macho_section<typename A::P>* s)
 161                                                         : ld::Section(makeSegmentName(s), makeSectionName(s), sectionType(s)),
 162                                                                 _file(f), _machOSection(s), _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
 163                                                 Section(File<A>& f, const char* segName, const char* sectName, ld::Section::Type t, bool hidden=false)
 164                                                         : ld::Section(segName, sectName, t, hidden), _file(f), _machOSection(NULL),
 165                                                                 _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
 166
 167
 168         Atom<A>*                                                findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end);
 169         uint32_t                                                x86_64PcRelOffset(uint8_t r_type);
 170         static const char*                              makeSegmentName(const macho_section<typename A::P>* s);
 171         static bool                                             readable(const macho_section<typename A::P>* s);
 172         static bool                                             writable(const macho_section<typename A::P>* s);
 173         static bool                                             exectuable(const macho_section<typename A::P>* s);
 174         static ld::Section::Type                sectionType(const macho_section<typename A::P>* s);
 175
 176         File<A>&                                                _file;
 177         const macho_section<P>*                 _machOSection;
 178         class Atom<A>*                                  _beginAtoms;
 179         class Atom<A>*                                  _endAtoms;
 180         bool                                                    _hasAliases;
 181 };
 182
 183
 184 template <typename A>
 185 class CFISection : public Section<A>
 186 {
 187 public:
 188                                                 CFISection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 189                                                         : Section<A>(f, s) { }
 190         uint32_t                        cfiCount();
 191
 192         virtual ld::Atom::ContentType   contentType()           { return ld::Atom::typeCFI; }
 193         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 194         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 195         virtual void            makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 196         virtual bool            addFollowOnFixups() const       { return false; }
 197
 198
 199         ///
 200         /// ObjectFileAddressSpace is used as a template parameter to UnwindCursor for parsing
 201         /// dwarf CFI information in an object file.
 202         ///
 203         class OAS
 204         {
 205         public:
 206                         typedef typename A::P::uint_t   pint_t;
 207                         typedef typename A::P                   P;
 208                         typedef typename A::P::E                E;
 209                         typedef typename A::P::uint_t   sint_t;
 210
 211                                                         OAS(CFISection<A>& ehFrameSection, const uint8_t* ehFrameBuffer) :
 212                                                                 _ehFrameSection(ehFrameSection),
 213                                                                 _ehFrameContent(ehFrameBuffer),
 214                                                                 _ehFrameStartAddr(ehFrameSection.machoSection()->addr()),
 215                                                                 _ehFrameEndAddr(ehFrameSection.machoSection()->addr()+ehFrameSection.machoSection()->size()) {}
 216
 217                         uint8_t                 get8(pint_t addr) { return *((uint8_t*)mappedAddress(addr)); }
 218                         uint16_t                get16(pint_t addr)      { return E::get16(*((uint16_t*)mappedAddress(addr))); }
 219                         uint32_t                get32(pint_t addr)      { return E::get32(*((uint32_t*)mappedAddress(addr))); }
 220                         uint64_t                get64(pint_t addr)      { return E::get64(*((uint64_t*)mappedAddress(addr))); }
 221                         pint_t                  getP(pint_t addr)       { return P::getP(*((pint_t*)mappedAddress(addr))); }
 222                         uint64_t                getULEB128(pint_t& addr, pint_t end);
 223                         int64_t                 getSLEB128(pint_t& addr, pint_t end);
 224                         pint_t                  getEncodedP(pint_t& addr, pint_t end, uint8_t encoding);
 225         private:
 226                 const void*                     mappedAddress(pint_t addr);
 227
 228                 CFISection<A>&                          _ehFrameSection;
 229                 const uint8_t*                          _ehFrameContent;
 230                 pint_t                                          _ehFrameStartAddr;
 231                 pint_t                                          _ehFrameEndAddr;
 232         };
 233
 234
 235         typedef typename A::P::uint_t                   pint_t;
 236         typedef libunwind::CFI_Atom_Info<OAS>   CFI_Atom_Info;
 237
 238         void                            cfiParse(class Parser<A>& parser, uint8_t* buffer, CFI_Atom_Info cfiArray[], uint32_t cfiCount);
 239         bool                            needsRelocating();
 240
 241         static bool                     bigEndian();
 242 private:
 243         void                            addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo);
 244         static void                     warnFunc(void* ref, uint64_t funcAddr, const char* msg);
 245 };
 246
 247
 248 template <typename A>
 249 class CUSection : public Section<A>
 250 {
 251 public:
 252                                                 CUSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 253                                                         : Section<A>(f, s) { }
 254
 255         typedef typename A::P::uint_t   pint_t;
 256         typedef typename A::P                   P;
 257         typedef typename A::P::E                E;
 258
 259         virtual uint32_t                computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
 260         virtual uint32_t                appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
 261         virtual void                    makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 262         virtual bool                    addFollowOnFixups() const       { return false; }
 263
 264         struct Info {
 265                 pint_t          functionStartAddress;
 266                 uint32_t        functionSymbolIndex;
 267                 uint32_t        rangeLength;
 268                 uint32_t        compactUnwindInfo;
 269                 const char*     personality;
 270                 pint_t          lsdaAddress;
 271                 Atom<A>*        function;
 272                 Atom<A>*        lsda;
 273         };
 274
 275         uint32_t                                count();
 276         void                                    parse(class Parser<A>& parser, uint32_t cnt, Info array[]);
 277
 278
 279 private:
 280
 281         const char*                             personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc);
 282
 283         static int                              infoSorter(const void* l, const void* r);
 284
 285 };
 286
 287
 288 template <typename A>
 289 class TentativeDefinitionSection : public Section<A>
 290 {
 291 public:
 292                                                 TentativeDefinitionSection(Parser<A>& parser, File<A>& f)
 293                                                         : Section<A>(f, "__DATA", "__comm/tent", ld::Section::typeTentativeDefs)  {}
 294
 295         virtual ld::Atom::ContentType   contentType()           { return ld::Atom::typeZeroFill; }
 296         virtual bool            addFollowOnFixups() const       { return false; }
 297         virtual Atom<A>*        findAtomByAddress(typename A::P::uint_t addr) { throw "TentativeDefinitionSection::findAtomByAddress() should never be called"; }
 298         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
 299                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 300         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 301                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 302                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&);
 303         virtual void            makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
 304 private:
 305         typedef typename A::P::uint_t   pint_t;
 306         typedef typename A::P                   P;
 307 };
 308
 309
 310 template <typename A>
 311 class AbsoluteSymbolSection : public Section<A>
 312 {
 313 public:
 314                                                 AbsoluteSymbolSection(Parser<A>& parser, File<A>& f)
 315                                                         : Section<A>(f, "__DATA", "__abs", ld::Section::typeAbsoluteSymbols, true)  {}
 316
 317         virtual ld::Atom::ContentType   contentType()           { return ld::Atom::typeUnclassified; }
 318         virtual bool                                    dontDeadStrip()         { return false; }
 319         virtual ld::Atom::Alignment             alignmentForAddress(typename A::P::uint_t addr) { return ld::Atom::Alignment(0); }
 320         virtual bool            addFollowOnFixups() const       { return false; }
 321         virtual Atom<A>*        findAtomByAddress(typename A::P::uint_t addr) { throw "AbsoluteSymbolSection::findAtomByAddress() should never be called"; }
 322         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
 323                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 324         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 325                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 326                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&);
 327         virtual void            makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
 328         virtual Atom<A>*        findAbsAtomForValue(typename A::P::uint_t);
 329
 330 private:
 331         typedef typename A::P::uint_t   pint_t;
 332         typedef typename A::P                   P;
 333 };
 334
 335
 336 template <typename A>
 337 class SymboledSection : public Section<A>
 338 {
 339 public:
 340                                                 SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s);
 341         virtual ld::Atom::ContentType   contentType() { return _type; }
 342         virtual bool                                    dontDeadStrip();
 343         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
 344                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 345         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 346                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
 347                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 348 protected:
 349         typedef typename A::P::uint_t   pint_t;
 350         typedef typename A::P                   P;
 351
 352         ld::Atom::ContentType                   _type;
 353 };
 354
 355
 356 template <typename A>
 357 class TLVDefsSection : public SymboledSection<A>
 358 {
 359 public:
 360                                                 TLVDefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s) :
 361                                                         SymboledSection<A>(parser, f, s) { }
 362
 363 private:
 364
 365 };
 366
 367
 368 template <typename A>
 369 class ImplicitSizeSection : public Section<A>
 370 {
 371 public:
 372                                                 ImplicitSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 373                                                         : Section<A>(f, s) { }
 374         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 375         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 376 protected:
 377         typedef typename A::P::uint_t   pint_t;
 378         typedef typename A::P                   P;
 379
 380         virtual bool                                            addFollowOnFixups() const               { return false; }
 381         virtual const char*                                     unlabeledAtomName(Parser<A>& parser, pint_t addr) = 0;
 382         virtual ld::Atom::SymbolTableInclusion  symbolTableInclusion()          { return ld::Atom::symbolTableNotIn; }
 383         virtual pint_t                                          elementSizeAtAddress(pint_t addr) = 0;
 384         virtual ld::Atom::Scope                         scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeLinkageUnit; }
 385         virtual bool                                            useElementAt(Parser<A>& parser,
 386                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr) = 0;
 387         virtual ld::Atom::Definition            definition()                                    { return ld::Atom::definitionRegular; }
 388         virtual ld::Atom::Combine                       combine(Parser<A>& parser, pint_t addr) = 0;
 389         virtual bool                                            ignoreLabel(const char* label)  { return (label[0] == 'L'); }
 390 };
 391
 392 template <typename A>
 393 class FixedSizeSection : public ImplicitSizeSection<A>
 394 {
 395 public:
 396                                                 FixedSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 397                                                         : ImplicitSizeSection<A>(parser, f, s) { }
 398 protected:
 399         typedef typename A::P::uint_t   pint_t;
 400         typedef typename A::P                   P;
 401         typedef typename A::P::E                E;
 402
 403         virtual bool                                    useElementAt(Parser<A>& parser,
 404                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
 405                                                                                                                 { return true; }
 406 };
 407
 408
 409 template <typename A>
 410 class Literal4Section : public FixedSizeSection<A>
 411 {
 412 public:
 413                                                 Literal4Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 414                                                         : FixedSizeSection<A>(parser, f, s) {}
 415 protected:
 416         typedef typename A::P::uint_t   pint_t;
 417         typedef typename A::P                   P;
 418
 419         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(2); }
 420         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "4-byte-literal"; }
 421         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 4; }
 422         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 423         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 424         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 425                                                                                                         const ld::IndirectBindingTable& ind) const;
 426 };
 427
 428 template <typename A>
 429 class Literal8Section : public FixedSizeSection<A>
 430 {
 431 public:
 432                                                 Literal8Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 433                                                         : FixedSizeSection<A>(parser, f, s) {}
 434 protected:
 435         typedef typename A::P::uint_t   pint_t;
 436         typedef typename A::P                   P;
 437
 438         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(3); }
 439         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "8-byte-literal"; }
 440         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 8; }
 441         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 442         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 443         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 444                                                                                                         const ld::IndirectBindingTable& ind) const;
 445 };
 446
 447 template <typename A>
 448 class Literal16Section : public FixedSizeSection<A>
 449 {
 450 public:
 451                                                 Literal16Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 452                                                         : FixedSizeSection<A>(parser, f, s) {}
 453 protected:
 454         typedef typename A::P::uint_t   pint_t;
 455         typedef typename A::P                   P;
 456
 457         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(4); }
 458         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "16-byte-literal"; }
 459         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 16; }
 460         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 461         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 462         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 463                                                                                                         const ld::IndirectBindingTable& ind) const;
 464 };
 465
 466
 467 template <typename A>
 468 class NonLazyPointerSection : public FixedSizeSection<A>
 469 {
 470 public:
 471                                                 NonLazyPointerSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 472                                                         : FixedSizeSection<A>(parser, f, s) {}
 473 protected:
 474         typedef typename A::P::uint_t   pint_t;
 475         typedef typename A::P                   P;
 476
 477         virtual void                                    makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 478         virtual ld::Atom::ContentType   contentType()                                                   { return ld::Atom::typeNonLazyPointer; }
 479         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 480         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "non_lazy_ptr"; }
 481         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 482         virtual ld::Atom::Scope                 scopeAtAddress(Parser<A>& parser, pint_t addr);
 483         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t);
 484         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 485         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 486         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 487                                                                                                         const ld::IndirectBindingTable& ind) const;
 488
 489 private:
 490         static const char*                              targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind);
 491         static ld::Fixup::Kind                  fixupKind();
 492 };
 493
 494
 495 template <typename A>
 496 class CFStringSection : public FixedSizeSection<A>
 497 {
 498 public:
 499                                                 CFStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 500                                                         : FixedSizeSection<A>(parser, f, s) {}
 501 protected:
 502         typedef typename A::P::uint_t   pint_t;
 503
 504         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 505         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "CFString"; }
 506         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 4*sizeof(pint_t); }
 507         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndReferences; }
 508         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 509         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 510         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 511                                                                                                         const ld::IndirectBindingTable& ind) const;
 512 private:
 513         enum ContentType { contentUTF8, contentUTF16, contentUnknown };
 514         static const uint8_t*                   targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
 515                                                                                                 ContentType* ct, unsigned int* count);
 516 };
 517
 518
 519 template <typename A>
 520 class ObjC1ClassSection : public FixedSizeSection<A>
 521 {
 522 public:
 523                                                 ObjC1ClassSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 524                                                         : FixedSizeSection<A>(parser, f, s) {}
 525 protected:
 526         typedef typename A::P::uint_t   pint_t;
 527         typedef typename A::P                   P;
 528         typedef typename A::P::E                E;
 529
 530         virtual ld::Atom::Scope                 scopeAtAddress(Parser<A>& , pint_t )    { return ld::Atom::scopeGlobal; }
 531         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(2); }
 532         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t);
 533         virtual ld::Atom::SymbolTableInclusion  symbolTableInclusion()                  { return ld::Atom::symbolTableIn; }
 534         virtual pint_t                                  elementSizeAtAddress(pint_t addr);
 535         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineNever; }
 536         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 537         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
 538                                                                                                                                                         { return 0; }
 539         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 540                                                                                                         const ld::IndirectBindingTable& ind) const { return false; }
 541         virtual bool                                    addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
 542 };
 543
 544
 545 template <typename A>
 546 class ObjC2ClassRefsSection : public FixedSizeSection<A>
 547 {
 548 public:
 549                                                 ObjC2ClassRefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 550                                                         : FixedSizeSection<A>(parser, f, s) {}
 551 protected:
 552         typedef typename A::P::uint_t   pint_t;
 553
 554         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 555         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "objc-class-ref"; }
 556         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 557         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndReferences; }
 558         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 559         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 560         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 561                                                                                                         const ld::IndirectBindingTable& ind) const;
 562 private:
 563         const char*                                             targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 564 };
 565
 566
 567 template <typename A>
 568 class ObjC2CategoryListSection : public FixedSizeSection<A>
 569 {
 570 public:
 571                                                 ObjC2CategoryListSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 572                                                         : FixedSizeSection<A>(parser, f, s) {}
 573 protected:
 574         typedef typename A::P::uint_t   pint_t;
 575
 576         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 577         virtual ld::Atom::Scope                 scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeTranslationUnit; }
 578         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "objc-cat-list"; }
 579         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 580         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineNever; }
 581         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 582 private:
 583         const char*                                             targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 584 };
 585
 586
 587 template <typename A>
 588 class PointerToCStringSection : public FixedSizeSection<A>
 589 {
 590 public:
 591                                                 PointerToCStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 592                                                         : FixedSizeSection<A>(parser, f, s) {}
 593 protected:
 594         typedef typename A::P::uint_t   pint_t;
 595
 596         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 597         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "pointer-to-literal-cstring"; }
 598         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 599         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndReferences; }
 600         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 601         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 602         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 603                                                                                                         const ld::IndirectBindingTable& ind) const;
 604         virtual const char*                             targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 605 };
 606
 607
 608 template <typename A>
 609 class Objc1ClassReferences : public PointerToCStringSection<A>
 610 {
 611 public:
 612                                                 Objc1ClassReferences(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 613                                                         : PointerToCStringSection<A>(parser, f, s) {}
 614
 615         typedef typename A::P::uint_t   pint_t;
 616         typedef typename A::P                   P;
 617
 618         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "pointer-to-literal-objc-class-name"; }
 619         virtual bool                                    addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
 620         virtual const char*                             targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 621 };
 622
 623
 624 template <typename A>
 625 class CStringSection : public ImplicitSizeSection<A>
 626 {
 627 public:
 628                                                 CStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 629                                                         : ImplicitSizeSection<A>(parser, f, s) {}
 630 protected:
 631         typedef typename A::P::uint_t   pint_t;
 632         typedef typename A::P                   P;
 633
 634         virtual ld::Atom::ContentType   contentType()                                                   { return ld::Atom::typeCString; }
 635         virtual Atom<A>*                                findAtomByAddress(pint_t addr);
 636         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "cstring"; }
 637         virtual pint_t                                  elementSizeAtAddress(pint_t addr);
 638         virtual bool                                    ignoreLabel(const char* label);
 639         virtual bool                                    useElementAt(Parser<A>& parser,
 640                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr);
 641         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 642         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 643         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 644                                                                                                         const ld::IndirectBindingTable& ind) const;
 645
 646 };
 647
 648
 649 template <typename A>
 650 class UTF16StringSection : public SymboledSection<A>
 651 {
 652 public:
 653                                                 UTF16StringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 654                                                         : SymboledSection<A>(parser, f, s) {}
 655 protected:
 656         typedef typename A::P::uint_t   pint_t;
 657         typedef typename A::P                   P;
 658
 659         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 660         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 661         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 662                                                                                                         const ld::IndirectBindingTable& ind) const;
 663 };
 664
 665
 666 //
 667 // Atoms in mach-o files
 668 //
 669 template <typename A>
 670 class Atom : public ld::Atom
 671 {
 672 public:
 673         // overrides of ld::Atom
 674         virtual ld::File*                                                       file() const            { return &sect().file(); }
 675         virtual const char*                                                     translationUnitSource() const
 676                                                                                                                                         { return sect().file().translationUnitSource(); }
 677         virtual const char*                                                     name() const            { return _name; }
 678         virtual uint64_t                                                        size() const            { return _size; }
 679         virtual uint64_t                                                        objectAddress() const { return _objAddress; }
 680         virtual void                                                            copyRawContent(uint8_t buffer[]) const;
 681         virtual const uint8_t*                                          rawContentPointer() const { return contentPointer(); }
 682         virtual unsigned long                                           contentHash(const ld::IndirectBindingTable& ind) const
 683                                                                                                                         { if ( _hash == 0 ) _hash = sect().contentHash(this, ind); return _hash; }
 684         virtual bool                                                            canCoalesceWith(const ld::Atom& rhs, const ld::IndirectBindingTable& ind) const
 685                                                                                                                         { return sect().canCoalesceWith(this, rhs, ind); }
 686         virtual ld::Fixup::iterator                                     fixupsBegin() const     { return &machofile()._fixups[_fixupsStartIndex]; }
 687         virtual ld::Fixup::iterator                                     fixupsEnd()     const   { return &machofile()._fixups[_fixupsStartIndex+_fixupsCount]; }
 688         virtual ld::Atom::UnwindInfo::iterator          beginUnwind() const     { return &machofile()._unwindInfos[_unwindInfoStartIndex]; }
 689         virtual ld::Atom::UnwindInfo::iterator          endUnwind()     const   { return &machofile()._unwindInfos[_unwindInfoStartIndex+_unwindInfoCount];  }
 690         virtual ld::Atom::LineInfo::iterator            beginLineInfo() const{ return &machofile()._lineInfos[_lineInfoStartIndex]; }
 691         virtual ld::Atom::LineInfo::iterator            endLineInfo() const { return &machofile()._lineInfos[_lineInfoStartIndex+_lineInfoCount];  }
 692
 693 private:
 694
 695         enum {  kFixupStartIndexBits = 32,
 696                         kLineInfoStartIndexBits = 32,
 697                         kUnwindInfoStartIndexBits = 24,
 698                         kFixupCountBits = 24,
 699                         kLineInfoCountBits = 12,
 700                         kUnwindInfoCountBits = 4
 701                 }; // must sum to 128
 702
 703 public:
 704         // methods for all atoms from mach-o object file
 705                         Section<A>&                                                     sect() const                    { return (Section<A>&)section(); }
 706                         File<A>&                                                        machofile() const                       { return ((Section<A>*)(this->_section))->file(); }
 707                         void                                                            setFixupsRange(uint32_t s, uint32_t c);
 708                         void                                                            setUnwindInfoRange(uint32_t s, uint32_t c);
 709                         void                                                            extendUnwindInfoRange();
 710                         void                                                            setLineInfoRange(uint32_t s, uint32_t c);
 711                         bool                                                            roomForMoreLineInfoCount() { return (_lineInfoCount < ((1<<kLineInfoCountBits)-1)); }
 712                         void                                                            incrementLineInfoCount() { assert(roomForMoreLineInfoCount()); ++_lineInfoCount; }
 713                         void                                                            incrementFixupCount() { if (_fixupsCount == ((1 << kFixupCountBits)-1))
 714                                                                                                                                                         throwf("too may fixups in %s", name()); ++_fixupsCount; }
 715                         const uint8_t*                                          contentPointer() const;
 716                         uint32_t                                                        fixupCount() const { return _fixupsCount; }
 717                         void                                                            verifyAlignment() const;
 718
 719         typedef typename A::P                                           P;
 720         typedef typename A::P::E                                        E;
 721         typedef typename A::P::uint_t                           pint_t;
 722                                                                                                 // constuct via all attributes
 723                                                                                                 Atom(Section<A>& sct, const char* nm, pint_t addr, uint64_t sz,
 724                                                                                                         ld::Atom::Definition d, ld::Atom::Combine c, ld::Atom::Scope s,
 725                                                                                                         ld::Atom::ContentType ct, ld::Atom::SymbolTableInclusion i,
 726                                                                                                         bool dds, bool thumb, bool al, ld::Atom::Alignment a)
 727                                                                                                                 : ld::Atom((ld::Section&)sct, d, c, s, ct, i, dds, thumb, al, a),
 728                                                                                                                         _size(sz), _objAddress(addr), _name(nm), _hash(0),
 729                                                                                                                         _fixupsStartIndex(0), _lineInfoStartIndex(0),
 730                                                                                                                         _unwindInfoStartIndex(0), _fixupsCount(0),
 731                                                                                                                         _lineInfoCount(0), _unwindInfoCount(0) { }
 732                                                                                                 // construct via symbol table entry
 733                                                                                                 Atom(Section<A>& sct, Parser<A>& parser, const macho_nlist<P>& sym,
 734                                                                                                                                 uint64_t sz, bool alias=false)
 735                                                                                                                 : ld::Atom((ld::Section&)sct, parser.definitionFromSymbol(sym),
 736                                                                                                                                 parser.combineFromSymbol(sym), parser.scopeFromSymbol(sym),
 737                                                                                                                                 parser.resolverFromSymbol(sym) ? ld::Atom::typeResolver : sct.contentType(),
 738                                                                                                                                 parser.inclusionFromSymbol(sym),
 739                                                                                                                                 parser.dontDeadStripFromSymbol(sym) || sct.dontDeadStrip(),
 740                                                                                                                                 parser.isThumbFromSymbol(sym), alias,
 741                                                                                                                                 sct.alignmentForAddress(sym.n_value())),
 742                                                                                                                         _size(sz), _objAddress(sym.n_value()),
 743                                                                                                                         _name(parser.nameFromSymbol(sym)), _hash(0),
 744                                                                                                                         _fixupsStartIndex(0), _lineInfoStartIndex(0),
 745                                                                                                                         _unwindInfoStartIndex(0), _fixupsCount(0),
 746                                                                                                                         _lineInfoCount(0), _unwindInfoCount(0) {
 747                                                                                                                                 // <rdar://problem/6783167> support auto-hidden weak symbols
 748                                                                                                                                 if ( _scope == ld::Atom::scopeGlobal &&
 749                                                                                                                                                 (sym.n_desc() & (N_WEAK_DEF|N_WEAK_REF)) == (N_WEAK_DEF|N_WEAK_REF) )
 750                                                                                                                                         this->setAutoHide();
 751                                                                                                                                         this->verifyAlignment();
 752                                                                                                                         }
 753
 754 private:
 755         friend class Parser<A>;
 756         friend class Section<A>;
 757         friend class CStringSection<A>;
 758         friend class AbsoluteSymbolSection<A>;
 759
 760         pint_t                                                                          _size;
 761         pint_t                                                                          _objAddress;
 762         const char*                                                                     _name;
 763         mutable unsigned long                                           _hash;
 764
 765         uint64_t                                                                        _fixupsStartIndex               : kFixupStartIndexBits,
 766                                                                                                 _lineInfoStartIndex             : kLineInfoStartIndexBits,
 767                                                                                                 _unwindInfoStartIndex   : kUnwindInfoStartIndexBits,
 768                                                                                                 _fixupsCount                    : kFixupCountBits,
 769                                                                                                 _lineInfoCount                  : kLineInfoCountBits,
 770                                                                                                 _unwindInfoCount                : kUnwindInfoCountBits;
 771
 772 };
 773
 774
 775
 776 template <typename A>
 777 void Atom<A>::setFixupsRange(uint32_t startIndex, uint32_t count)
 778 {
 779         if ( count >= (1 << kFixupCountBits) )
 780                 throwf("too many fixups in function %s", this->name());
 781         if ( startIndex >= (1 << kFixupStartIndexBits) )
 782                 throwf("too many fixups in file");
 783         assert(((startIndex+count) <= sect().file()._fixups.size()) && "fixup index out of range");
 784         _fixupsStartIndex = startIndex;
 785         _fixupsCount = count;
 786 }
 787
 788 template <typename A>
 789 void Atom<A>::setUnwindInfoRange(uint32_t startIndex, uint32_t count)
 790 {
 791         if ( count >= (1 << kUnwindInfoCountBits) )
 792                 throwf("too many compact unwind infos in function %s", this->name());
 793         if ( startIndex >= (1 << kUnwindInfoStartIndexBits) )
 794                 throwf("too many compact unwind infos (%d) in file", startIndex);
 795         assert((startIndex+count) <= sect().file()._unwindInfos.size() && "unwindinfo index out of range");
 796         _unwindInfoStartIndex = startIndex;
 797         _unwindInfoCount = count;
 798 }
 799
 800 template <typename A>
 801 void Atom<A>::extendUnwindInfoRange()
 802 {
 803         if ( _unwindInfoCount+1 >= (1 << kUnwindInfoCountBits) )
 804                 throwf("too many compact unwind infos in function %s", this->name());
 805         _unwindInfoCount += 1;
 806 }
 807
 808 template <typename A>
 809 void Atom<A>::setLineInfoRange(uint32_t startIndex, uint32_t count)
 810 {
 811         assert((count < (1 << kLineInfoCountBits)) && "too many line infos");
 812         assert((startIndex+count) < sect().file()._lineInfos.size() && "line info index out of range");
 813         _lineInfoStartIndex = startIndex;
 814         _lineInfoCount = count;
 815 }
 816
 817 template <typename A>
 818 const uint8_t* Atom<A>::contentPointer() const
 819 {
 820         const macho_section<P>* sct = this->sect().machoSection();
 821         if ( this->_objAddress > sct->addr() + sct->size() )
 822                 throwf("malformed .o file, symbol has address 0x%0llX which is outside range of its section", (uint64_t)this->_objAddress);
 823         uint32_t fileOffset = sct->offset() - sct->addr() + this->_objAddress;
 824         return this->sect().file().fileContent()+fileOffset;
 825 }
 826
 827
 828 template <typename A>
 829 void Atom<A>::copyRawContent(uint8_t buffer[]) const
 830 {
 831         // copy base bytes
 832         if ( this->contentType() == ld::Atom::typeZeroFill ) {
 833                 bzero(buffer, _size);
 834         }
 835         else if ( _size != 0 ) {
 836                 memcpy(buffer, this->contentPointer(), _size);
 837         }
 838 }
 839
 840 template <>
 841 void Atom<arm>::verifyAlignment() const
 842 {
 843         if ( (this->section().type() == ld::Section::typeCode) && ! isThumb() ) {
 844                 if ( ((_objAddress % 4) != 0) || (this->alignment().powerOf2 < 2) )
 845                         warning("ARM function not 4-byte aligned: %s from %s", this->name(), this->file()->path());
 846         }
 847 }
 848
 849 template <typename A>
 850 void Atom<A>::verifyAlignment() const
 851 {
 852 }
 853
 854
 855 template <typename A>
 856 class Parser
 857 {
 858 public:
 859         static bool                                                                             validFile(const uint8_t* fileContent, bool subtypeMustMatch=false,
 860                                                                                                                                 cpu_subtype_t subtype=0);
 861         static const char*                                                              fileKind(const uint8_t* fileContent);
 862         static bool                                                                             hasObjC2Categories(const uint8_t* fileContent);
 863         static bool                                                                             hasObjC1Categories(const uint8_t* fileContent);
 864         static ld::relocatable::File*                                   parse(const uint8_t* fileContent, uint64_t fileLength,
 865                                                                                                                         const char* path, time_t modTime, ld::File::Ordinal ordinal,
 866                                                                                                                          const ParserOptions& opts) {
 867                                                                                                                                 Parser p(fileContent, fileLength, path, modTime,
 868                                                                                                                                                 ordinal, opts.convertUnwindInfo);
 869                                                                                                                                 return p.parse(opts);
 870                                                                                                                 }
 871
 872         typedef typename A::P                                           P;
 873         typedef typename A::P::E                                        E;
 874         typedef typename A::P::uint_t                           pint_t;
 875
 876         struct SourceLocation {
 877                                                                 SourceLocation() {}
 878                                                                 SourceLocation(Atom<A>* a, uint32_t o) : atom(a), offsetInAtom(o) {}
 879                 Atom<A>*        atom;
 880                 uint32_t        offsetInAtom;
 881         };
 882
 883         struct TargetDesc {
 884                 Atom<A>*        atom;
 885                 const char*     name;           // only used if targetAtom is NULL
 886                 int64_t         addend;
 887                 bool            weakImport;     // only used if targetAtom is NULL
 888         };
 889
 890         struct FixupInAtom {
 891                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) :
 892                         fixup(src.offsetInAtom, c, k, target), atom(src.atom) { src.atom->incrementFixupCount(); }
 893
 894                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) :
 895                         fixup(src.offsetInAtom, c, k, b, target), atom(src.atom) { src.atom->incrementFixupCount(); }
 896
 897                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) :
 898                         fixup(src.offsetInAtom, c, k, wi, name), atom(src.atom) { src.atom->incrementFixupCount(); }
 899
 900                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) :
 901                         fixup(src.offsetInAtom, c, k, b, name), atom(src.atom) { src.atom->incrementFixupCount(); }
 902
 903                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) :
 904                         fixup(src.offsetInAtom, c, k, addend), atom(src.atom) { src.atom->incrementFixupCount(); }
 905
 906                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) :
 907                         fixup(src.offsetInAtom, c, k, (uint64_t)0), atom(src.atom) { src.atom->incrementFixupCount(); }
 908
 909                 ld::Fixup               fixup;
 910                 Atom<A>*                atom;
 911         };
 912
 913         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) {
 914                 _allFixups.push_back(FixupInAtom(src, c, k, target));
 915         }
 916
 917         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) {
 918                 _allFixups.push_back(FixupInAtom(src, c, k, b, target));
 919         }
 920
 921         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) {
 922                 _allFixups.push_back(FixupInAtom(src, c, k, wi, name));
 923         }
 924
 925         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) {
 926                 _allFixups.push_back(FixupInAtom(src, c, k, b, name));
 927         }
 928
 929         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) {
 930                 _allFixups.push_back(FixupInAtom(src, c, k, addend));
 931         }
 932
 933         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) {
 934                 _allFixups.push_back(FixupInAtom(src, c, k));
 935         }
 936
 937
 938         uint32_t                                                                                symbolCount() { return _symbolCount; }
 939         uint32_t                                                                                indirectSymbol(uint32_t indirectIndex);
 940         const macho_nlist<P>&                                                   symbolFromIndex(uint32_t index);
 941         const char*                                                                             nameFromSymbol(const macho_nlist<P>& sym);
 942         ld::Atom::Scope                                                                 scopeFromSymbol(const macho_nlist<P>& sym);
 943         static ld::Atom::Definition                                             definitionFromSymbol(const macho_nlist<P>& sym);
 944         static ld::Atom::Combine                                                combineFromSymbol(const macho_nlist<P>& sym);
 945                         ld::Atom::SymbolTableInclusion                  inclusionFromSymbol(const macho_nlist<P>& sym);
 946         static bool                                                                             dontDeadStripFromSymbol(const macho_nlist<P>& sym);
 947         static bool                                                                             isThumbFromSymbol(const macho_nlist<P>& sym);
 948         static bool                                                                             weakImportFromSymbol(const macho_nlist<P>& sym);
 949         static bool                                                                             resolverFromSymbol(const macho_nlist<P>& sym);
 950         uint32_t                                                                                symbolIndexFromIndirectSectionAddress(pint_t,const macho_section<P>*);
 951         const macho_section<P>*                                                 firstMachOSection() { return _sectionsStart; }
 952         const macho_section<P>*                                                 machOSectionFromSectionIndex(uint32_t index);
 953         uint32_t                                                                                machOSectionCount() { return _machOSectionsCount; }
 954         uint32_t                                                                                undefinedStartIndex() { return _undefinedStartIndex; }
 955         uint32_t                                                                                undefinedEndIndex() { return _undefinedEndIndex; }
 956         void                                                                                    addFixup(FixupInAtom f) { _allFixups.push_back(f); }
 957         Section<A>*                                                                             sectionForNum(unsigned int sectNum);
 958         Section<A>*                                                                             sectionForAddress(pint_t addr);
 959         Atom<A>*                                                                                findAtomByAddress(pint_t addr);
 960         Atom<A>*                                                                                findAtomByAddressOrNullIfStub(pint_t addr);
 961         Atom<A>*                                                                                findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom);
 962         Atom<A>*                                                                                findAtomByName(const char* name);       // slow!
 963         void                                                                                    findTargetFromAddress(pint_t addr, TargetDesc& target);
 964         void                                                                                    findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target);
 965         void                                                                                    findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum,
 966                                                                                                                                                                                 TargetDesc& target);
 967         uint32_t                                                                                tentativeDefinitionCount() { return _tentativeDefinitionCount; }
 968         uint32_t                                                                                absoluteSymbolCount() { return _absoluteSymbolCount; }
 969
 970         bool                                                                                    hasStubsSection() { return (_stubsSectionNum != 0); }
 971         unsigned int                                                                    stubsSectionNum() { return _stubsSectionNum; }
 972         void                                                                                    addDtraceExtraInfos(const SourceLocation& src, const char* provider);
 973         const char*                                                                             scanSymbolTableForAddress(uint64_t addr);
 974         bool                                                                                    convertUnwindInfo() { return _convertUnwindInfo; }
 975         bool                                                                                    hasDataInCodeLabels() { return _hasDataInCodeLabels; }
 976
 977         macho_data_in_code_entry<P>*                                    dataInCodeStart() { return _dataInCodeStart; }
 978         macho_data_in_code_entry<P>*                                    dataInCodeEnd()   { return _dataInCodeEnd; }
 979
 980         void                                                    addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target);
 981         void                                                    addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase);
 982
 983
 984
 985         struct LabelAndCFIBreakIterator {
 986                 typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
 987                                                                 LabelAndCFIBreakIterator(const uint32_t* ssa, uint32_t ssc, const pint_t* cfisa,
 988                                                                                                                 uint32_t cfisc, bool ols)
 989                                                                         : sortedSymbolIndexes(ssa), sortedSymbolCount(ssc), cfiStartsArray(cfisa),
 990                                                                                 cfiStartsCount(cfisc), fileHasOverlappingSymbols(ols),
 991                                                                                 newSection(false), cfiIndex(0), symIndex(0) {}
 992                 bool                                    next(Parser<A>& parser, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
 993                                                                                 pint_t* addr, pint_t* size, const macho_nlist<P>** sym);
 994                 pint_t                                  peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr);
 995                 void                                    beginSection() { newSection = true; symIndex = 0; }
 996
 997                 const uint32_t* const           sortedSymbolIndexes;
 998                 const uint32_t                          sortedSymbolCount;
 999                 const pint_t*                           cfiStartsArray;
1000                 const uint32_t                          cfiStartsCount;
1001                 const bool                                      fileHasOverlappingSymbols;
1002                 bool                                            newSection;
1003                 uint32_t                                        cfiIndex;
1004                 uint32_t                                        symIndex;
1005         };
1006
1007         struct CFI_CU_InfoArrays {
1008                         typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
1009                         typedef typename CUSection<A>::Info CU_Info;
1010                                                 CFI_CU_InfoArrays(const CFI_Atom_Info* cfiAr, uint32_t cfiC, CU_Info* cuAr, uint32_t cuC)
1011                                                         : cfiArray(cfiAr), cuArray(cuAr), cfiCount(cfiC), cuCount(cuC) {}
1012                 const CFI_Atom_Info* const      cfiArray;
1013                         CU_Info* const                  cuArray;
1014                 const uint32_t                          cfiCount;
1015                 const uint32_t                          cuCount;
1016         };
1017
1018
1019
1020 private:
1021         friend class Section<A>;
1022
1023         enum SectionType { sectionTypeIgnore, sectionTypeLiteral4, sectionTypeLiteral8, sectionTypeLiteral16,
1024                                                 sectionTypeNonLazy, sectionTypeCFI, sectionTypeCString, sectionTypeCStringPointer,
1025                                                 sectionTypeUTF16Strings, sectionTypeCFString, sectionTypeObjC2ClassRefs, typeObjC2CategoryList,
1026                                                 sectionTypeObjC1Classes, sectionTypeSymboled, sectionTypeObjC1ClassRefs,
1027                                                 sectionTypeTentativeDefinitions, sectionTypeAbsoluteSymbols, sectionTypeTLVDefs,
1028                                                 sectionTypeCompactUnwind };
1029
1030         template <typename P>
1031         struct MachOSectionAndSectionClass
1032         {
1033                 const macho_section<P>* sect;
1034                 SectionType                             type;
1035
1036                 static int sorter(const void* l, const void* r) {
1037                         const MachOSectionAndSectionClass<P>* left = (MachOSectionAndSectionClass<P>*)l;
1038                         const MachOSectionAndSectionClass<P>* right = (MachOSectionAndSectionClass<P>*)r;
1039                         int64_t diff = left->sect->addr() - right->sect->addr();
1040                         if ( diff == 0 )
1041                                 return 0;
1042                         if ( diff < 0 )
1043                                 return -1;
1044                         else
1045                                 return 1;
1046                 }
1047         };
1048
1049         struct ParserAndSectionsArray { Parser* parser; const uint32_t* sortedSectionsArray; };
1050
1051
1052                                                                                                         Parser(const uint8_t* fileContent, uint64_t fileLength,
1053                                                                                                                         const char* path, time_t modTime,
1054                                                                                                                         ld::File::Ordinal ordinal, bool convertUnwindInfo);
1055         ld::relocatable::File*                                                  parse(const ParserOptions& opts);
1056         uint8_t                                                                                 loadCommandSizeMask();
1057         bool                                                                                    parseLoadCommands();
1058         void                                                                                    makeSections();
1059         void                                                                                    prescanSymbolTable();
1060         void                                                                                    makeSortedSymbolsArray(uint32_t symArray[], const uint32_t sectionArray[]);
1061         void                                                                                    makeSortedSectionsArray(uint32_t array[]);
1062         static int                                                                              pointerSorter(const void* l, const void* r);
1063         static int                                                                              symbolIndexSorter(void* extra, const void* l, const void* r);
1064         static int                                                                              sectionIndexSorter(void* extra, const void* l, const void* r);
1065
1066         void                                                                                    parseDebugInfo();
1067         void                                                                                    parseStabs();
1068         static bool                                                                             isConstFunStabs(const char *stabStr);
1069         bool                                                                                    read_comp_unit(const char ** name, const char ** comp_dir,
1070                                                                                                                                                                                                 uint64_t *stmt_list);
1071         const char*                                                                             getDwarfString(uint64_t form, const uint8_t* p);
1072         bool                                                                                    skip_form(const uint8_t ** offset, const uint8_t * end,
1073                                                                                                                                 uint64_t form, uint8_t addr_size, bool dwarf64);
1074
1075
1076         // filled in by constructor
1077         const uint8_t*                                                          _fileContent;
1078         uint32_t                                                                        _fileLength;
1079         const char*                                                                     _path;
1080         time_t                                                                          _modTime;
1081         ld::File::Ordinal                                                       _ordinal;
1082
1083         // filled in by parseLoadCommands()
1084         File<A>*                                                                        _file;
1085         const macho_nlist<P>*                                           _symbols;
1086         uint32_t                                                                        _symbolCount;
1087         const char*                                                                     _strings;
1088         uint32_t                                                                        _stringsSize;
1089         const uint32_t*                                                         _indirectTable;
1090         uint32_t                                                                        _indirectTableCount;
1091         uint32_t                                                                        _undefinedStartIndex;
1092         uint32_t                                                                        _undefinedEndIndex;
1093         const macho_section<P>*                                         _sectionsStart;
1094         uint32_t                                                                        _machOSectionsCount;
1095         bool                                                                            _hasUUID;
1096         macho_data_in_code_entry<P>*                            _dataInCodeStart;
1097         macho_data_in_code_entry<P>*                            _dataInCodeEnd;
1098
1099         // filled in by parse()
1100         CFISection<A>*                                                          _EHFrameSection;
1101         CUSection<A>*                                                           _compactUnwindSection;
1102         AbsoluteSymbolSection<A>*                                       _absoluteSection;
1103         uint32_t                                                                        _tentativeDefinitionCount;
1104         uint32_t                                                                        _absoluteSymbolCount;
1105         uint32_t                                                                        _symbolsInSections;
1106         bool                                                                            _hasLongBranchStubs;
1107         bool                                                                            _AppleObjc; // FSF has objc that uses different data layout
1108         bool                                                                            _overlappingSymbols;
1109         bool                                                                            _convertUnwindInfo;
1110         bool                                                                            _hasDataInCodeLabels;
1111         unsigned int                                                            _stubsSectionNum;
1112         const macho_section<P>*                                         _stubsMachOSection;
1113         std::vector<const char*>                                        _dtraceProviderInfo;
1114         std::vector<FixupInAtom>                                        _allFixups;
1115 };
1116
1117
1118
1119 template <typename A>
1120 Parser<A>::Parser(const uint8_t* fileContent, uint64_t fileLength, const char* path, time_t modTime,
1121                                         ld::File::Ordinal ordinal, bool convertDUI)
1122                 : _fileContent(fileContent), _fileLength(fileLength), _path(path), _modTime(modTime),
1123                         _ordinal(ordinal), _file(NULL),
1124                         _symbols(NULL), _symbolCount(0), _strings(NULL), _stringsSize(0),
1125                         _indirectTable(NULL), _indirectTableCount(0),
1126                         _undefinedStartIndex(0), _undefinedEndIndex(0),
1127                         _sectionsStart(NULL), _machOSectionsCount(0), _hasUUID(false),
1128                         _dataInCodeStart(NULL), _dataInCodeEnd(NULL),
1129                         _EHFrameSection(NULL), _compactUnwindSection(NULL), _absoluteSection(NULL),
1130                         _tentativeDefinitionCount(0), _absoluteSymbolCount(0),
1131                         _symbolsInSections(0), _hasLongBranchStubs(false),  _AppleObjc(false),
1132                         _overlappingSymbols(false), _convertUnwindInfo(convertDUI), _hasDataInCodeLabels(false),
1133                         _stubsSectionNum(0), _stubsMachOSection(NULL)
1134 {
1135 }
1136
1137
1138 template <>
1139 bool Parser<x86>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1140 {
1141         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1142         if ( header->magic() != MH_MAGIC )
1143                 return false;
1144         if ( header->cputype() != CPU_TYPE_I386 )
1145                 return false;
1146         if ( header->filetype() != MH_OBJECT )
1147                 return false;
1148         return true;
1149 }
1150
1151 template <>
1152 bool Parser<x86_64>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1153 {
1154         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1155         if ( header->magic() != MH_MAGIC_64 )
1156                 return false;
1157         if ( header->cputype() != CPU_TYPE_X86_64 )
1158                 return false;
1159         if ( header->filetype() != MH_OBJECT )
1160                 return false;
1161         return true;
1162 }
1163
1164 template <>
1165 bool Parser<arm>::validFile(const uint8_t* fileContent, bool subtypeMustMatch, cpu_subtype_t subtype)
1166 {
1167         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1168         if ( header->magic() != MH_MAGIC )
1169                 return false;
1170         if ( header->cputype() != CPU_TYPE_ARM )
1171                 return false;
1172         if ( header->filetype() != MH_OBJECT )
1173                 return false;
1174         if ( subtypeMustMatch ) {
1175                 if ( (cpu_subtype_t)header->cpusubtype() == subtype )
1176                         return true;
1177                 // hack until libcc_kext.a is made fat
1178                 if ( header->cpusubtype() == CPU_SUBTYPE_ARM_ALL )
1179                         return true;
1180                 return false;
1181         }
1182         return true;
1183 }
1184
1185
1186
1187 template <>
1188 const char* Parser<x86>::fileKind(const uint8_t* fileContent)
1189 {
1190         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1191         if ( header->magic() != MH_MAGIC )
1192                 return NULL;
1193         if ( header->cputype() != CPU_TYPE_I386 )
1194                 return NULL;
1195         return "i386";
1196 }
1197
1198 template <>
1199 const char* Parser<x86_64>::fileKind(const uint8_t* fileContent)
1200 {
1201         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1202         if ( header->magic() != MH_MAGIC )
1203                 return NULL;
1204         if ( header->cputype() != CPU_TYPE_X86_64 )
1205                 return NULL;
1206         return "x86_64";
1207 }
1208
1209 template <>
1210 const char* Parser<arm>::fileKind(const uint8_t* fileContent)
1211 {
1212         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1213         if ( header->magic() != MH_MAGIC )
1214                 return NULL;
1215         if ( header->cputype() != CPU_TYPE_ARM )
1216                 return NULL;
1217         for (const ArchInfo* t=archInfoArray; t->archName != NULL; ++t) {
1218                 if ( (t->cpuType == CPU_TYPE_ARM) && ((cpu_subtype_t)header->cpusubtype() == t->cpuSubType) ) {
1219                         return t->archName;
1220                 }
1221         }
1222         return "arm???";
1223 }
1224
1225
1226 template <typename A>
1227 bool Parser<A>::hasObjC2Categories(const uint8_t* fileContent)
1228 {
1229         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1230         const uint32_t cmd_count = header->ncmds();
1231         const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1232         const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1233         const macho_load_command<P>* cmd = cmds;
1234         for (uint32_t i = 0; i < cmd_count; ++i) {
1235                 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1236                         const macho_segment_command<P>* segment = (macho_segment_command<P>*)cmd;
1237                         const macho_section<P>* sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1238                         for (uint32_t si=0; si < segment->nsects(); ++si) {
1239                                 const macho_section<P>* sect = &sectionsStart[si];
1240                                 if ( (sect->size() > 0)
1241                                         && (strcmp(sect->sectname(), "__objc_catlist") == 0)
1242                                         && (strcmp(sect->segname(), "__DATA") == 0) ) {
1243                                                 return true;
1244                                 }
1245                         }
1246                 }
1247                 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1248                 if ( cmd > cmdsEnd )
1249                         throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1250         }
1251         return false;
1252 }
1253
1254
1255 template <typename A>
1256 bool Parser<A>::hasObjC1Categories(const uint8_t* fileContent)
1257 {
1258         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1259         const uint32_t cmd_count = header->ncmds();
1260         const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1261         const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1262         const macho_load_command<P>* cmd = cmds;
1263         for (uint32_t i = 0; i < cmd_count; ++i) {
1264                 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1265                         const macho_segment_command<P>* segment = (macho_segment_command<P>*)cmd;
1266                         const macho_section<P>* sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1267                         for (uint32_t si=0; si < segment->nsects(); ++si) {
1268                                 const macho_section<P>* sect = &sectionsStart[si];
1269                                 if ( (sect->size() > 0)
1270                                         && (strcmp(sect->sectname(), "__category") == 0)
1271                                         && (strcmp(sect->segname(), "__OBJC") == 0) ) {
1272                                                 return true;
1273                                 }
1274                         }
1275                 }
1276                 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1277                 if ( cmd > cmdsEnd )
1278                         throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1279         }
1280         return false;
1281 }
1282
1283 template <typename A>
1284 int Parser<A>::pointerSorter(const void* l, const void* r)
1285 {
1286         // sort references by address
1287         const pint_t* left = (pint_t*)l;
1288         const pint_t* right = (pint_t*)r;
1289         return (*left - *right);
1290 }
1291
1292 template <typename A>
1293 typename A::P::uint_t Parser<A>::LabelAndCFIBreakIterator::peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr)
1294 {
1295         pint_t symbolAddr;
1296         if ( symIndex < sortedSymbolCount )
1297                 symbolAddr = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]).n_value();
1298         else
1299                 symbolAddr = endAddr;
1300         pint_t cfiAddr;
1301         if ( cfiIndex < cfiStartsCount )
1302                 cfiAddr = cfiStartsArray[cfiIndex];
1303         else
1304                 cfiAddr = endAddr;
1305         if ( (cfiAddr < symbolAddr) && (cfiAddr >= startAddr) ) {
1306                 if ( cfiAddr <  endAddr )
1307                         return cfiAddr;
1308                 else
1309                         return endAddr;
1310         }
1311         else  {
1312                 if ( symbolAddr <  endAddr )
1313                         return symbolAddr;
1314                 else
1315                         return endAddr;
1316         }
1317 }
1318
1319 //
1320 // Parses up a section into chunks based on labels and CFI information.
1321 // Each call returns the next chunk address and size, and (if the break
1322 // was becuase of a label, the symbol). Returns false when no more chunks.
1323 //
1324 template <typename A>
1325 bool Parser<A>::LabelAndCFIBreakIterator::next(Parser<A>& parser, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
1326                                                                                                 pint_t* addr, pint_t* size, const macho_nlist<P>** symbol)
1327 {
1328         // may not be a label on start of section, but need atom demarcation there
1329         if ( newSection ) {
1330                 newSection = false;
1331                 // advance symIndex until we get to the first label at or past the start of this section
1332                 while ( symIndex < sortedSymbolCount ) {
1333                         const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1334                         pint_t nextSymbolAddr = sym.n_value();
1335                         //fprintf(stderr, "sectNum=%d, nextSymbolAddr=0x%08llX, name=%s\n", sectNum, (uint64_t)nextSymbolAddr, parser.nameFromSymbol(sym));
1336                         if ( (nextSymbolAddr > startAddr) || ((nextSymbolAddr == startAddr) && (sym.n_sect() == sectNum)) )
1337                                 break;
1338                         ++symIndex;
1339                 }
1340                 if ( symIndex < sortedSymbolCount ) {
1341                         const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1342                         pint_t nextSymbolAddr = sym.n_value();
1343                         // if next symbol found is not in this section
1344                         if ( sym.n_sect() != sectNum ) {
1345                                 // check for CFI break instead of symbol break
1346                                 if ( cfiIndex < cfiStartsCount ) {
1347                                         pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1348                                         if ( nextCfiAddr < endAddr ) {
1349                                                 // use cfi
1350                                                 ++cfiIndex;
1351                                                 *addr = nextCfiAddr;
1352                                                 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1353                                                 *symbol = NULL;
1354                                                 return true;
1355                                         }
1356                                 }
1357                                 *addr = startAddr;
1358                                 *size = endAddr - startAddr;
1359                                 *symbol = NULL;
1360                                 if ( startAddr == endAddr )
1361                                         return false;  // zero size section
1362                                 else
1363                                         return true;  // whole section is one atom with no label
1364                         }
1365                         // if also CFI break here, eat it
1366                         if ( cfiIndex < cfiStartsCount ) {
1367                                 if ( cfiStartsArray[cfiIndex] == nextSymbolAddr )
1368                                         ++cfiIndex;
1369                         }
1370                         if ( nextSymbolAddr == startAddr ) {
1371                                 // label at start of section, return it as chunk
1372                                 ++symIndex;
1373                                 *addr = startAddr;
1374                                 *size = peek(parser, startAddr, endAddr) - startAddr;
1375                                 *symbol = &sym;
1376                                 return true;
1377                         }
1378                         // return chunk before first symbol
1379                         *addr = startAddr;
1380                         *size = nextSymbolAddr - startAddr;
1381                         *symbol = NULL;
1382                         return true;
1383                 }
1384                 // no symbols left in whole file, so entire section is one chunk
1385                 *addr = startAddr;
1386                 *size = endAddr - startAddr;
1387                 *symbol = NULL;
1388                 if ( startAddr == endAddr )
1389                         return false;  // zero size section
1390                 else
1391                         return true;  // whole section is one atom with no label
1392         }
1393
1394         while ( (symIndex < sortedSymbolCount) && (cfiIndex < cfiStartsCount) ) {
1395                 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1396                 pint_t nextSymbolAddr = sym.n_value();
1397                 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1398                 if ( nextSymbolAddr <  nextCfiAddr ) {
1399                         if ( nextSymbolAddr >= endAddr )
1400                                 return false;
1401                         ++symIndex;
1402                         if ( nextSymbolAddr < startAddr )
1403                                 continue;
1404                         *addr = nextSymbolAddr;
1405                         *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1406                         *symbol = &sym;
1407                         return true;
1408                 }
1409                 else if ( nextCfiAddr < nextSymbolAddr ) {
1410                         if ( nextCfiAddr >= endAddr )
1411                                 return false;
1412                         ++cfiIndex;
1413                         if ( nextCfiAddr < startAddr )
1414                                 continue;
1415                         *addr = nextCfiAddr;
1416                         *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1417                         *symbol = NULL;
1418                         return true;
1419                 }
1420                 else {
1421                         if ( nextCfiAddr >= endAddr )
1422                                 return false;
1423                         ++symIndex;
1424                         ++cfiIndex;
1425                         if ( nextCfiAddr < startAddr )
1426                                 continue;
1427                         *addr = nextCfiAddr;
1428                         *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1429                         *symbol = &sym;
1430                         return true;
1431                 }
1432         }
1433         while ( symIndex < sortedSymbolCount ) {
1434                 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1435                 pint_t nextSymbolAddr = sym.n_value();
1436                 // if next symbol found is not in this section, then done with iteration
1437                 if ( sym.n_sect() != sectNum )
1438                         return false;
1439                 ++symIndex;
1440                 if ( nextSymbolAddr < startAddr )
1441                         continue;
1442                 *addr = nextSymbolAddr;
1443                 *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1444                 *symbol = &sym;
1445                 return true;
1446         }
1447         while ( cfiIndex < cfiStartsCount ) {
1448                 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1449                 if ( nextCfiAddr >= endAddr )
1450                         return false;
1451                 ++cfiIndex;
1452                 if ( nextCfiAddr < startAddr )
1453                         continue;
1454                 *addr = nextCfiAddr;
1455                 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1456                 *symbol = NULL;
1457                 return true;
1458         }
1459         return false;
1460 }
1461
1462
1463
1464 template <typename A>
1465 ld::relocatable::File* Parser<A>::parse(const ParserOptions& opts)
1466 {
1467         // create file object
1468         _file = new File<A>(_path, _modTime, _fileContent, _ordinal);
1469
1470         // respond to -t option
1471         if ( opts.logAllFiles )
1472                 printf("%s\n", _path);
1473
1474         // parse start of mach-o file
1475         if ( ! parseLoadCommands() )
1476                 return _file;
1477
1478         // make array of
1479         uint32_t sortedSectionIndexes[_machOSectionsCount];
1480         this->makeSortedSectionsArray(sortedSectionIndexes);
1481
1482         // make symbol table sorted by address
1483         this->prescanSymbolTable();
1484         uint32_t sortedSymbolIndexes[_symbolsInSections];
1485         this->makeSortedSymbolsArray(sortedSymbolIndexes, sortedSectionIndexes);
1486
1487         // allocate Section<A> object for each mach-o section
1488         makeSections();
1489
1490         // if it exists, do special early parsing of __compact_unwind section
1491         uint32_t countOfCUs = 0;
1492         if ( _compactUnwindSection != NULL )
1493                 countOfCUs = _compactUnwindSection->count();
1494         uint8_t cuInfoBuffer[sizeof(typename CUSection<A>::Info) * countOfCUs];
1495         typename CUSection<A>::Info*  cuInfoArray = (typename CUSection<A>::Info*)cuInfoBuffer;
1496         if ( countOfCUs != 0 )
1497                 _compactUnwindSection->parse(*this, countOfCUs, cuInfoArray);
1498
1499         // if it exists, do special early parsing of __eh_frame section
1500         // stack allocate array of CFI_Atom_Info
1501         uint32_t countOfCFIs = 0;
1502         if ( _EHFrameSection != NULL )
1503                 countOfCFIs = _EHFrameSection->cfiCount();
1504         typename CFISection<A>::CFI_Atom_Info  cfiArray[countOfCFIs];
1505         // stack allocate (if not too large) a copy of __eh_frame to apply relocations to
1506         uint8_t* ehBuffer = NULL;
1507         uint32_t stackAllocSize = 0;
1508         if ( (countOfCFIs != 0) && _EHFrameSection->needsRelocating() ) {
1509                 uint32_t sectSize = _EHFrameSection->machoSection()->size();
1510                 if ( sectSize > 50*1024 )
1511                         ehBuffer = (uint8_t*)malloc(sectSize);
1512                 else
1513                         stackAllocSize = sectSize;
1514         }
1515         uint32_t ehStackBuffer[1+stackAllocSize/4]; // make 4-byte aligned stack bufffer
1516         if ( ehBuffer == NULL )
1517                 ehBuffer = (uint8_t*)&ehStackBuffer;
1518         uint32_t cfiStartsCount = 0;
1519         if ( countOfCFIs != 0 ) {
1520                 _EHFrameSection->cfiParse(*this, ehBuffer, cfiArray, countOfCFIs);
1521                 // count functions and lsdas
1522                 for(uint32_t i=0; i < countOfCFIs; ++i) {
1523                         if ( cfiArray[i].isCIE )
1524                                 continue;
1525                         //fprintf(stderr, "cfiArray[i].func = 0x%08llX, cfiArray[i].lsda = 0x%08llX, encoding=0x%08X\n",
1526                         //                      (uint64_t)cfiArray[i].u.fdeInfo.function.targetAddress,
1527                         //                      (uint64_t)cfiArray[i].u.fdeInfo.lsda.targetAddress,
1528                         //                      cfiArray[i].u.fdeInfo.compactUnwindInfo);
1529                         if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1530                                 ++cfiStartsCount;
1531                         if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1532                                 ++cfiStartsCount;
1533                 }
1534         }
1535         CFI_CU_InfoArrays cfis(cfiArray, countOfCFIs, cuInfoArray, countOfCUs);
1536
1537         // create sorted array of function starts and lsda starts
1538         pint_t cfiStartsArray[cfiStartsCount];
1539         uint32_t countOfFDEs = 0;
1540         if ( countOfCFIs != 0 ) {
1541                 int index = 0;
1542                 for(uint32_t i=0; i < countOfCFIs; ++i) {
1543                         if ( cfiArray[i].isCIE )
1544                                 continue;
1545                         if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1546                                 cfiStartsArray[index++] = cfiArray[i].u.fdeInfo.function.targetAddress;
1547                         if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1548                                 cfiStartsArray[index++] = cfiArray[i].u.fdeInfo.lsda.targetAddress;
1549                         ++countOfFDEs;
1550                 }
1551                 ::qsort(cfiStartsArray, cfiStartsCount, sizeof(pint_t), pointerSorter);
1552         #ifndef NDEBUG
1553                 // scan for FDEs claming the same function
1554                 for(int i=1; i < index; ++i) {
1555                         assert( cfiStartsArray[i] != cfiStartsArray[i-1] );
1556                 }
1557         #endif
1558         }
1559
1560         Section<A>** sections = _file->_sectionsArray;
1561         uint32_t        sectionsCount = _file->_sectionsArrayCount;
1562
1563         // figure out how many atoms will be allocated and allocate
1564         LabelAndCFIBreakIterator breakIterator(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1565                                                                                         cfiStartsCount, _overlappingSymbols);
1566         uint32_t computedAtomCount = 0;
1567         for (uint32_t i=0; i < sectionsCount; ++i ) {
1568                 breakIterator.beginSection();
1569                 uint32_t count = sections[i]->computeAtomCount(*this, breakIterator, cfis);
1570                 //const macho_section<P>* sect = sections[i]->machoSection();
1571                 //fprintf(stderr, "computed count=%u for section %s size=%llu\n", count, sect->sectname(), (sect != NULL) ? sect->size() : 0);
1572                 computedAtomCount += count;
1573         }
1574         //fprintf(stderr, "allocating %d atoms * sizeof(Atom<A>)=%ld, sizeof(ld::Atom)=%ld\n", computedAtomCount, sizeof(Atom<A>), sizeof(ld::Atom));
1575         _file->_atomsArray = new uint8_t[computedAtomCount*sizeof(Atom<A>)];
1576         _file->_atomsArrayCount = 0;
1577
1578         // have each section append atoms to _atomsArray
1579         LabelAndCFIBreakIterator breakIterator2(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1580                                                                                                 cfiStartsCount, _overlappingSymbols);
1581         for (uint32_t i=0; i < sectionsCount; ++i ) {
1582                 uint8_t* atoms = _file->_atomsArray + _file->_atomsArrayCount*sizeof(Atom<A>);
1583                 breakIterator2.beginSection();
1584                 uint32_t count = sections[i]->appendAtoms(*this, atoms, breakIterator2, cfis);
1585                 //fprintf(stderr, "append count=%u for section %s/%s\n", count, sections[i]->machoSection()->segname(), sections[i]->machoSection()->sectname());
1586                 _file->_atomsArrayCount += count;
1587         }
1588         assert( _file->_atomsArrayCount == computedAtomCount && "more atoms allocated than expected");
1589
1590
1591         // have each section add all fix-ups for its atoms
1592         _allFixups.reserve(computedAtomCount*5);
1593         for (uint32_t i=0; i < sectionsCount; ++i )
1594                 sections[i]->makeFixups(*this, cfis);
1595
1596         // assign fixups start offset for each atom
1597         uint8_t* p = _file->_atomsArray;
1598         uint32_t fixupOffset = 0;
1599         for(int i=_file->_atomsArrayCount; i > 0; --i) {
1600                 Atom<A>* atom = (Atom<A>*)p;
1601                 atom->_fixupsStartIndex = fixupOffset;
1602                 fixupOffset += atom->_fixupsCount;
1603                 atom->_fixupsCount = 0;
1604                 p += sizeof(Atom<A>);
1605         }
1606         assert(fixupOffset == _allFixups.size());
1607         _file->_fixups.reserve(fixupOffset);
1608
1609         // copy each fixup for each atom
1610         for(typename std::vector<FixupInAtom>::iterator it=_allFixups.begin(); it != _allFixups.end(); ++it) {
1611                 uint32_t slot = it->atom->_fixupsStartIndex + it->atom->_fixupsCount;
1612                 _file->_fixups[slot] = it->fixup;
1613                 it->atom->_fixupsCount++;
1614         }
1615
1616         // done with temp vector
1617         _allFixups.clear();
1618
1619         // add unwind info
1620         _file->_unwindInfos.reserve(countOfFDEs+countOfCUs);
1621         for(uint32_t i=0; i < countOfCFIs; ++i) {
1622                 if ( cfiArray[i].isCIE )
1623                         continue;
1624                 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS ) {
1625                         ld::Atom::UnwindInfo info;
1626                         info.startOffset = 0;
1627                         info.unwindInfo = cfiArray[i].u.fdeInfo.compactUnwindInfo;
1628                         _file->_unwindInfos.push_back(info);
1629                         Atom<A>* func = findAtomByAddress(cfiArray[i].u.fdeInfo.function.targetAddress);
1630                         func->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1631                 }
1632         }
1633         // apply compact infos in __LD,__compact_unwind section to each function
1634         // if function also has dwarf unwind, CU will override it
1635         Atom<A>* lastFunc = NULL;
1636         uint32_t lastEnd = 0;
1637         for(uint32_t i=0; i < countOfCUs; ++i) {
1638                 typename CUSection<A>::Info* info = &cuInfoArray[i];
1639                 assert(info->function != NULL);
1640                 ld::Atom::UnwindInfo ui;
1641                 ui.startOffset = info->functionStartAddress - info->function->objectAddress();
1642                 ui.unwindInfo = info->compactUnwindInfo;
1643                 _file->_unwindInfos.push_back(ui);
1644                 // if previous is for same function, extend range
1645                 if ( info->function == lastFunc ) {
1646                         if ( lastEnd != ui.startOffset ) {
1647                                 if ( lastEnd < ui.startOffset )
1648                                         warning("__LD,__compact_unwind entries for %s have a gap at offset 0x%0X", info->function->name(), lastEnd);
1649                                 else
1650                                         warning("__LD,__compact_unwind entries for %s overlap at offset 0x%0X", info->function->name(), lastEnd);
1651                         }
1652                         lastFunc->extendUnwindInfoRange();
1653                 }
1654                 else
1655                         info->function->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1656                 lastFunc = info->function;
1657                 lastEnd = ui.startOffset + info->rangeLength;
1658         }
1659
1660         // parse dwarf debug info to get line info
1661         this->parseDebugInfo();
1662
1663         return _file;
1664 }
1665
1666
1667
1668 template <> uint8_t Parser<x86>::loadCommandSizeMask()          { return 0x03; }
1669 template <> uint8_t Parser<x86_64>::loadCommandSizeMask()       { return 0x07; }
1670 template <> uint8_t Parser<arm>::loadCommandSizeMask()          { return 0x03; }
1671
1672 template <typename A>
1673 bool Parser<A>::parseLoadCommands()
1674 {
1675         const macho_header<P>* header = (const macho_header<P>*)_fileContent;
1676
1677         // set File attributes
1678         _file->_canScatterAtoms = (header->flags() & MH_SUBSECTIONS_VIA_SYMBOLS);
1679         _file->_cpuSubType = header->cpusubtype();
1680
1681         const macho_segment_command<P>* segment = NULL;
1682         const uint8_t* const endOfFile = _fileContent + _fileLength;
1683         const uint32_t cmd_count = header->ncmds();
1684         // <rdar://problem/5394172> an empty .o file with zero load commands will crash linker
1685         if ( cmd_count == 0 )
1686                 return false;
1687         const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1688         const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1689         const macho_load_command<P>* cmd = cmds;
1690         for (uint32_t i = 0; i < cmd_count; ++i) {
1691                 uint32_t size = cmd->cmdsize();
1692                 if ( (size & this->loadCommandSizeMask()) != 0 )
1693                         throwf("load command #%d has a unaligned size", i);
1694                 const uint8_t* endOfCmd = ((uint8_t*)cmd)+cmd->cmdsize();
1695                 if ( endOfCmd > (uint8_t*)cmdsEnd )
1696                         throwf("load command #%d extends beyond the end of the load commands", i);
1697                 if ( endOfCmd > endOfFile )
1698                         throwf("load command #%d extends beyond the end of the file", i);
1699                 switch (cmd->cmd()) {
1700                     case LC_SYMTAB:
1701                                 {
1702                                         const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
1703                                         _symbolCount = symtab->nsyms();
1704                                         _symbols = (const macho_nlist<P>*)(_fileContent + symtab->symoff());
1705                                         _strings = (char*)_fileContent + symtab->stroff();
1706                                         _stringsSize = symtab->strsize();
1707                                         if ( (symtab->symoff() + _symbolCount*sizeof(macho_nlist<P>)) > _fileLength )
1708                                                 throw "mach-o symbol table extends beyond end of file";
1709                                         if ( (_strings + _stringsSize) > (char*)endOfFile )
1710                                                 throw "mach-o string pool extends beyond end of file";
1711                                         if ( _indirectTable == NULL ) {
1712                                                 if ( _undefinedEndIndex == 0 ) {
1713                                                         _undefinedStartIndex = 0;
1714                                                         _undefinedEndIndex = symtab->nsyms();
1715                                                 }
1716                                         }
1717                                 }
1718                                 break;
1719                         case LC_DYSYMTAB:
1720                                 {
1721                                         const macho_dysymtab_command<P>* dsymtab = (macho_dysymtab_command<P>*)cmd;
1722                                         _indirectTable = (uint32_t*)(_fileContent + dsymtab->indirectsymoff());
1723                                         _indirectTableCount = dsymtab->nindirectsyms();
1724                                         if ( &_indirectTable[_indirectTableCount] > (uint32_t*)endOfFile )
1725                                                 throw "indirect symbol table extends beyond end of file";
1726                                         _undefinedStartIndex = dsymtab->iundefsym();
1727                                         _undefinedEndIndex = _undefinedStartIndex + dsymtab->nundefsym();
1728                                 }
1729                                 break;
1730                     case LC_UUID:
1731                                 _hasUUID = true;
1732                                 break;
1733                         case LC_DATA_IN_CODE:
1734                                 {
1735                                         const macho_linkedit_data_command<P>* dc = (macho_linkedit_data_command<P>*)cmd;
1736                                         _dataInCodeStart = (macho_data_in_code_entry<P>*)(_fileContent + dc->dataoff());
1737                                         _dataInCodeEnd = (macho_data_in_code_entry<P>*)(_fileContent + dc->dataoff() + dc->datasize());
1738                                         if ( _dataInCodeEnd > (macho_data_in_code_entry<P>*)endOfFile )
1739                                                 throw "LC_DATA_IN_CODE table extends beyond end of file";
1740                                 }
1741                         default:
1742                                 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1743                                         if ( segment != NULL )
1744                                                 throw "more than one LC_SEGMENT found in object file";
1745                                         segment = (macho_segment_command<P>*)cmd;
1746                                 }
1747                                 break;
1748                 }
1749                 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1750                 if ( cmd > cmdsEnd )
1751                         throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1752         }
1753
1754         // record range of sections
1755         if ( segment == NULL )
1756                 throw "missing LC_SEGMENT";
1757         _sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1758         _machOSectionsCount = segment->nsects();
1759
1760         return true;
1761 }
1762
1763
1764 template <typename A>
1765 void Parser<A>::prescanSymbolTable()
1766 {
1767         _tentativeDefinitionCount = 0;
1768         _absoluteSymbolCount = 0;
1769         _symbolsInSections = 0;
1770         _hasDataInCodeLabels = false;
1771         for (uint32_t i=0; i < this->_symbolCount; ++i) {
1772                 const macho_nlist<P>& sym =     symbolFromIndex(i);
1773                 // ignore stabs
1774                 if ( (sym.n_type() & N_STAB) != 0 )
1775                         continue;
1776
1777                 // look at undefines
1778                 const char* symbolName = this->nameFromSymbol(sym);
1779                 if ( (sym.n_type() & N_TYPE) == N_UNDF ) {
1780                         if ( sym.n_value() != 0 ) {
1781                                 // count tentative definitions
1782                                 ++_tentativeDefinitionCount;
1783                         }
1784                         else if ( strncmp(symbolName, "___dtrace_", 10) == 0 ) {
1785                                 // any undefined starting with __dtrace_*$ that is not ___dtrace_probe$* or ___dtrace_isenabled$*
1786                                 // is extra provider info
1787                                 if ( (strncmp(&symbolName[10], "probe$", 6) != 0) && (strncmp(&symbolName[10], "isenabled$", 10) != 0) ) {
1788                                         _dtraceProviderInfo.push_back(symbolName);
1789                                 }
1790                         }
1791                         continue;
1792                 }
1793
1794                 // count absolute symbols
1795                 if ( (sym.n_type() & N_TYPE) == N_ABS ) {
1796                         const char* absName = this->nameFromSymbol(sym);
1797                         // ignore .objc_class_name_* symbols
1798                         if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
1799                                 _AppleObjc = true;
1800                                 continue;
1801                         }
1802                         // ignore .objc_class_name_* symbols
1803                         if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
1804                                 continue;
1805                         // ignore empty *.eh symbols
1806                         if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
1807                                 continue;
1808                         ++_absoluteSymbolCount;
1809                 }
1810
1811                 // only look at definitions
1812                 if ( (sym.n_type() & N_TYPE) != N_SECT )
1813                         continue;
1814
1815                 // 'L' labels do not denote atom breaks
1816                 if ( symbolName[0] == 'L' ) {
1817                         // <rdar://problem/9218847> Formalize data in code with L$start$ labels
1818                         if ( strncmp(symbolName, "L$start$", 8) == 0 )
1819                                 _hasDataInCodeLabels = true;
1820                         continue;
1821                 }
1822                 // how many def syms in each section
1823                 if ( sym.n_sect() > _machOSectionsCount )
1824                         throw "bad n_sect in symbol table";
1825
1826                 _symbolsInSections++;
1827         }
1828 }
1829
1830 template <typename A>
1831 int Parser<A>::sectionIndexSorter(void* extra, const void* l, const void* r)
1832 {
1833         Parser<A>* parser = (Parser<A>*)extra;
1834         const uint32_t* left = (uint32_t*)l;
1835         const uint32_t* right = (uint32_t*)r;
1836         const macho_section<P>* leftSect =      parser->machOSectionFromSectionIndex(*left);
1837         const macho_section<P>* rightSect = parser->machOSectionFromSectionIndex(*right);
1838
1839         // can't just return difference because 64-bit diff does not fit in 32-bit return type
1840         int64_t result = leftSect->addr() - rightSect->addr();
1841         if ( result == 0 ) {
1842                 // two sections with same start address
1843                 // one with zero size goes first
1844                 bool leftEmpty = ( leftSect->size() == 0 );
1845                 bool rightEmpty = ( rightSect->size() == 0 );
1846                 if ( leftEmpty != rightEmpty ) {
1847                         return ( rightEmpty ? 1 : -1 );
1848                 }
1849                 if ( !leftEmpty && !rightEmpty )
1850                         throwf("overlapping sections");
1851                 // both empty, so chose file order
1852                 return ( rightSect - leftSect );
1853         }
1854         else if ( result < 0 )
1855                 return -1;
1856         else
1857                 return 1;
1858 }
1859
1860 template <typename A>
1861 void Parser<A>::makeSortedSectionsArray(uint32_t array[])
1862 {
1863         const bool log = false;
1864
1865         if ( log ) {
1866                 fprintf(stderr, "unsorted sections:\n");
1867                 for(unsigned int i=0; i < _machOSectionsCount; ++i )
1868                         fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[i].addr(), _sectionsStart[i].segname(), _sectionsStart[i].sectname());
1869         }
1870
1871         // sort by symbol table address
1872         for (uint32_t i=0; i < _machOSectionsCount; ++i)
1873                 array[i] = i;
1874         ::qsort_r(array, _machOSectionsCount, sizeof(uint32_t), this, &sectionIndexSorter);
1875
1876         if ( log ) {
1877                 fprintf(stderr, "sorted sections:\n");
1878                 for(unsigned int i=0; i < _machOSectionsCount; ++i )
1879                         fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[array[i]].addr(), _sectionsStart[array[i]].segname(), _sectionsStart[array[i]].sectname());
1880         }
1881 }
1882
1883
1884
1885 template <typename A>
1886 int Parser<A>::symbolIndexSorter(void* extra, const void* l, const void* r)
1887 {
1888         ParserAndSectionsArray* extraInfo = (ParserAndSectionsArray*)extra;
1889         Parser<A>* parser = extraInfo->parser;
1890         const uint32_t* sortedSectionsArray = extraInfo->sortedSectionsArray;
1891         const uint32_t* left = (uint32_t*)l;
1892         const uint32_t* right = (uint32_t*)r;
1893         const macho_nlist<P>& leftSym = parser->symbolFromIndex(*left);
1894         const macho_nlist<P>& rightSym = parser->symbolFromIndex(*right);
1895         // can't just return difference because 64-bit diff does not fit in 32-bit return type
1896         int64_t result = leftSym.n_value() - rightSym.n_value();
1897         if ( result == 0 ) {
1898                 // two symbols with same address
1899                 // if in different sections, sort earlier section first
1900                 if ( leftSym.n_sect() != rightSym.n_sect() ) {
1901                         for (uint32_t i=0; i < parser->machOSectionCount(); ++i) {
1902                                 if ( sortedSectionsArray[i]+1 == leftSym.n_sect() )
1903                                         return -1;
1904                                 if ( sortedSectionsArray[i]+1 == rightSym.n_sect() )
1905                                         return 1;
1906                         }
1907                 }
1908                 // two symbols in same section, means one is an alias
1909                 // if one is ltmp*, make it an alias (sort first)
1910                 const char* leftName  = parser->nameFromSymbol(leftSym);
1911                 const char* rightName = parser->nameFromSymbol(rightSym);
1912                 bool leftIsTmp  = strncmp(leftName,  "ltmp", 4);
1913                 bool rightIsTmp = strncmp(rightName, "ltmp", 4);
1914                 if ( leftIsTmp != rightIsTmp ) {
1915                         return (rightIsTmp ? -1 : 1);
1916                 }
1917
1918                 // if only one is global, make the other an alias (sort first)
1919                 if ( (leftSym.n_type() & N_EXT) != (rightSym.n_type() & N_EXT) ) {
1920                         if ( (rightSym.n_type() & N_EXT) != 0 )
1921                                 return -1;
1922                         else
1923                                 return 1;
1924                 }
1925                 // if both are global, sort alphabetically. earlier one will be the alias
1926                 return ( strcmp(rightName, leftName) );
1927         }
1928         else if ( result < 0 )
1929                 return -1;
1930         else
1931                 return 1;
1932 }
1933
1934
1935 template <typename A>
1936 void Parser<A>::makeSortedSymbolsArray(uint32_t array[], const uint32_t sectionArray[])
1937 {
1938         const bool log = false;
1939
1940         uint32_t* p = array;
1941         for (uint32_t i=0; i < this->_symbolCount; ++i) {
1942                 const macho_nlist<P>& sym =     symbolFromIndex(i);
1943                 // ignore stabs
1944                 if ( (sym.n_type() & N_STAB) != 0 )
1945                         continue;
1946
1947                 // only look at definitions
1948                 if ( (sym.n_type() & N_TYPE) != N_SECT )
1949                         continue;
1950
1951                 // 'L' labels do not denote atom breaks
1952                 const char* symbolName = this->nameFromSymbol(sym);
1953                 if ( symbolName[0] == 'L' )
1954                         continue;
1955
1956                 // how many def syms in each section
1957                 if ( sym.n_sect() > _machOSectionsCount )
1958                         throw "bad n_sect in symbol table";
1959
1960                 // append to array
1961                 *p++ = i;
1962         }
1963         assert(p == &array[_symbolsInSections] && "second pass over symbol table yield a different number of symbols");
1964
1965         // sort by symbol table address
1966         ParserAndSectionsArray extra = { this, sectionArray };
1967         ::qsort_r(array, _symbolsInSections, sizeof(uint32_t), &extra, &symbolIndexSorter);
1968
1969
1970         // look for two symbols at same address
1971         _overlappingSymbols = false;
1972         for (unsigned int i=1; i < _symbolsInSections; ++i) {
1973                 if ( symbolFromIndex(array[i-1]).n_value() == symbolFromIndex(array[i]).n_value() ) {
1974                         //fprintf(stderr, "overlapping symbols at 0x%08llX\n", symbolFromIndex(array[i-1]).n_value());
1975                         _overlappingSymbols = true;
1976                         break;
1977                 }
1978         }
1979
1980         if ( log ) {
1981                 fprintf(stderr, "sorted symbols:\n");
1982                 for(unsigned int i=0; i < _symbolsInSections; ++i )
1983                         fprintf(stderr, "0x%09llX symIndex=%d sectNum=%2d, %s\n", symbolFromIndex(array[i]).n_value(), array[i], symbolFromIndex(array[i]).n_sect(), nameFromSymbol(symbolFromIndex(array[i])) );
1984         }
1985 }
1986
1987
1988 template <typename A>
1989 void Parser<A>::makeSections()
1990 {
1991         // classify each section by type
1992         // compute how many Section objects will be needed and total size for all
1993         unsigned int totalSectionsSize = 0;
1994         uint8_t machOSectsStorage[sizeof(MachOSectionAndSectionClass<P>)*(_machOSectionsCount+2)]; // also room for tentative-defs and absolute symbols
1995         // allocate raw storage for all section objects on stack
1996         MachOSectionAndSectionClass<P>* machOSects = (MachOSectionAndSectionClass<P>*)machOSectsStorage;
1997         unsigned int count = 0;
1998         for (uint32_t i=0; i < _machOSectionsCount; ++i) {
1999                 const macho_section<P>* sect = &_sectionsStart[i];
2000                 if ( (sect->flags() & S_ATTR_DEBUG) != 0 ) {
2001                         if ( strcmp(sect->segname(), "__DWARF") == 0 ) {
2002                                 // note that .o file has dwarf
2003                                 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoDwarf;
2004                                 // save off iteresting dwarf sections
2005                                 if ( strcmp(sect->sectname(), "__debug_info") == 0 )
2006                                         _file->_dwarfDebugInfoSect = sect;
2007                                 else if ( strcmp(sect->sectname(), "__debug_abbrev") == 0 )
2008                                         _file->_dwarfDebugAbbrevSect = sect;
2009                                 else if ( strcmp(sect->sectname(), "__debug_line") == 0 )
2010                                         _file->_dwarfDebugLineSect = sect;
2011                                 else if ( strcmp(sect->sectname(), "__debug_str") == 0 )
2012                                         _file->_dwarfDebugStringSect = sect;
2013                                 // linker does not propagate dwarf sections to output file
2014                                 continue;
2015                         }
2016                         else if ( strcmp(sect->segname(), "__LD") == 0 ) {
2017                                 if ( strncmp(sect->sectname(), "__compact_unwind", 16) == 0 ) {
2018                                         machOSects[count].sect = sect;
2019                                         totalSectionsSize += sizeof(CUSection<A>);
2020                                         machOSects[count++].type = sectionTypeCompactUnwind;
2021                                         continue;
2022                                 }
2023                         }
2024                 }
2025                 // ignore empty __OBJC sections
2026                 if ( (sect->size() == 0) && (strcmp(sect->segname(), "__OBJC") == 0) )
2027                         continue;
2028                 // objc image info section is really attributes and not content
2029                 if ( ((strcmp(sect->sectname(), "__image_info") == 0) && (strcmp(sect->segname(), "__OBJC") == 0))
2030                         || ((strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0) && (strcmp(sect->segname(), "__DATA") == 0)) ) {
2031                         //      struct objc_image_info  {
2032                         //              uint32_t        version;        // initially 0
2033                         //              uint32_t        flags;
2034                         //      };
2035                         // #define OBJC_IMAGE_SUPPORTS_GC   2
2036                         // #define OBJC_IMAGE_GC_ONLY       4
2037                         //
2038                         const uint32_t* contents = (uint32_t*)(_file->fileContent()+sect->offset());
2039                         if ( (sect->size() >= 8) && (contents[0] == 0) ) {
2040                                 uint32_t flags = E::get32(contents[1]);
2041                                 if ( (flags & 4) == 4 )
2042                                         _file->_objConstraint = ld::File::objcConstraintGC;
2043                                 else if ( (flags & 2) == 2 )
2044                                         _file->_objConstraint = ld::File::objcConstraintRetainReleaseOrGC;
2045                                 else
2046                                         _file->_objConstraint = ld::File::objcConstraintRetainRelease;
2047                                 if ( sect->size() > 8 ) {
2048                                         warning("section %s/%s has unexpectedly large size %llu in %s",
2049                                                         sect->segname(), Section<A>::makeSectionName(sect), sect->size(), _file->path());
2050                                 }
2051                         }
2052                         else {
2053                                 warning("can't parse %s/%s section in %s", sect->segname(), Section<A>::makeSectionName(sect), _file->path());
2054                         }
2055                         continue;
2056                 }
2057                 machOSects[count].sect = sect;
2058                 switch ( sect->flags() & SECTION_TYPE ) {
2059                         case S_SYMBOL_STUBS:
2060                                 if ( _stubsSectionNum == 0 ) {
2061                                         _stubsSectionNum = i+1;
2062                                         _stubsMachOSection = sect;
2063                                 }
2064                                 else
2065                                         assert(1 && "multiple S_SYMBOL_STUBS sections");
2066                         case S_LAZY_SYMBOL_POINTERS:
2067                                 break;
2068                         case S_4BYTE_LITERALS:
2069                                 totalSectionsSize += sizeof(Literal4Section<A>);
2070                                 machOSects[count++].type = sectionTypeLiteral4;
2071                                 break;
2072                         case S_8BYTE_LITERALS:
2073                                 totalSectionsSize += sizeof(Literal8Section<A>);
2074                                 machOSects[count++].type = sectionTypeLiteral8;
2075                                 break;
2076                         case S_16BYTE_LITERALS:
2077                                 totalSectionsSize += sizeof(Literal16Section<A>);
2078                                 machOSects[count++].type = sectionTypeLiteral16;
2079                                 break;
2080                         case S_NON_LAZY_SYMBOL_POINTERS:
2081                                 totalSectionsSize += sizeof(NonLazyPointerSection<A>);
2082                                 machOSects[count++].type = sectionTypeNonLazy;
2083                                 break;
2084                         case S_LITERAL_POINTERS:
2085                                 if ( (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__cls_refs") == 0) ) {
2086                                         totalSectionsSize += sizeof(Objc1ClassReferences<A>);
2087                                         machOSects[count++].type = sectionTypeObjC1ClassRefs;
2088                                 }
2089                                 else {
2090                                         totalSectionsSize += sizeof(PointerToCStringSection<A>);
2091                                         machOSects[count++].type = sectionTypeCStringPointer;
2092                                 }
2093                                 break;
2094                         case S_CSTRING_LITERALS:
2095                                 totalSectionsSize += sizeof(CStringSection<A>);
2096                                 machOSects[count++].type = sectionTypeCString;
2097                                 break;
2098                         case S_MOD_INIT_FUNC_POINTERS:
2099                         case S_MOD_TERM_FUNC_POINTERS:
2100                         case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
2101                         case S_INTERPOSING:
2102                         case S_ZEROFILL:
2103                         case S_REGULAR:
2104                         case S_COALESCED:
2105                         case S_THREAD_LOCAL_REGULAR:
2106                         case S_THREAD_LOCAL_ZEROFILL:
2107                                 if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__eh_frame") == 0) ) {
2108                                         totalSectionsSize += sizeof(CFISection<A>);
2109                                         machOSects[count++].type = sectionTypeCFI;
2110                                 }
2111                                 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__cfstring") == 0) ) {
2112                                         totalSectionsSize += sizeof(CFStringSection<A>);
2113                                         machOSects[count++].type = sectionTypeCFString;
2114                                 }
2115                                 else if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__ustring") == 0) ) {
2116                                         totalSectionsSize += sizeof(UTF16StringSection<A>);
2117                                         machOSects[count++].type = sectionTypeUTF16Strings;
2118                                 }
2119                                 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strncmp(sect->sectname(), "__objc_classrefs", 16) == 0) ) {
2120                                         totalSectionsSize += sizeof(ObjC2ClassRefsSection<A>);
2121                                         machOSects[count++].type = sectionTypeObjC2ClassRefs;
2122                                 }
2123                                 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__objc_catlist") == 0) ) {
2124                                         totalSectionsSize += sizeof(ObjC2CategoryListSection<A>);
2125                                         machOSects[count++].type = typeObjC2CategoryList;
2126                                 }
2127                                 else if ( _AppleObjc && (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__class") == 0) ) {
2128                                         totalSectionsSize += sizeof(ObjC1ClassSection<A>);
2129                                         machOSects[count++].type = sectionTypeObjC1Classes;
2130                                 }
2131                                 else {
2132                                         totalSectionsSize += sizeof(SymboledSection<A>);
2133                                         machOSects[count++].type = sectionTypeSymboled;
2134                                 }
2135                                 break;
2136                         case S_THREAD_LOCAL_VARIABLES:
2137                                 totalSectionsSize += sizeof(TLVDefsSection<A>);
2138                                 machOSects[count++].type = sectionTypeTLVDefs;
2139                                 break;
2140                         case S_THREAD_LOCAL_VARIABLE_POINTERS:
2141                         default:
2142                                 throwf("unknown section type %d", sect->flags() & SECTION_TYPE);
2143                 }
2144         }
2145
2146         // sort by address (mach-o object files don't aways have sections sorted)
2147         ::qsort(machOSects, count, sizeof(MachOSectionAndSectionClass<P>), MachOSectionAndSectionClass<P>::sorter);
2148
2149         // we will synthesize a dummy Section<A> object for tentative definitions
2150         if ( _tentativeDefinitionCount > 0 ) {
2151                 totalSectionsSize += sizeof(TentativeDefinitionSection<A>);
2152                 machOSects[count++].type = sectionTypeTentativeDefinitions;
2153         }
2154
2155         // we will synthesize a dummy Section<A> object for Absolute symbols
2156         if ( _absoluteSymbolCount > 0 ) {
2157                 totalSectionsSize += sizeof(AbsoluteSymbolSection<A>);
2158                 machOSects[count++].type = sectionTypeAbsoluteSymbols;
2159         }
2160
2161         // allocate one block for all Section objects as well as pointers to each
2162         uint8_t* space = new uint8_t[totalSectionsSize+count*sizeof(Section<A>*)];
2163         _file->_sectionsArray = (Section<A>**)space;
2164         _file->_sectionsArrayCount = count;
2165         Section<A>** objects = _file->_sectionsArray;
2166         space += count*sizeof(Section<A>*);
2167         for (uint32_t i=0; i < count; ++i) {
2168                 switch ( machOSects[i].type ) {
2169                         case sectionTypeIgnore:
2170                                 break;
2171                         case sectionTypeLiteral4:
2172                                 *objects++ = new (space) Literal4Section<A>(*this, *_file, machOSects[i].sect);
2173                                 space += sizeof(Literal4Section<A>);
2174                                 break;
2175                         case sectionTypeLiteral8:
2176                                 *objects++ = new (space) Literal8Section<A>(*this, *_file, machOSects[i].sect);
2177                                 space += sizeof(Literal8Section<A>);
2178                                 break;
2179                         case sectionTypeLiteral16:
2180                                 *objects++ = new (space) Literal16Section<A>(*this, *_file, machOSects[i].sect);
2181                                 space += sizeof(Literal16Section<A>);
2182                                 break;
2183                         case sectionTypeNonLazy:
2184                                 *objects++ = new (space) NonLazyPointerSection<A>(*this, *_file, machOSects[i].sect);
2185                                 space += sizeof(NonLazyPointerSection<A>);
2186                                 break;
2187                         case sectionTypeCFI:
2188                                 _EHFrameSection = new (space) CFISection<A>(*this, *_file, machOSects[i].sect);
2189                                 *objects++ = _EHFrameSection;
2190                                 space += sizeof(CFISection<A>);
2191                                 break;
2192                         case sectionTypeCString:
2193                                 *objects++ = new (space) CStringSection<A>(*this, *_file, machOSects[i].sect);
2194                                 space += sizeof(CStringSection<A>);
2195                                 break;
2196                         case sectionTypeCStringPointer:
2197                                 *objects++ = new (space) PointerToCStringSection<A>(*this, *_file, machOSects[i].sect);
2198                                 space += sizeof(PointerToCStringSection<A>);
2199                                 break;
2200                         case sectionTypeObjC1ClassRefs:
2201                                 *objects++ = new (space) Objc1ClassReferences<A>(*this, *_file, machOSects[i].sect);
2202                                 space += sizeof(Objc1ClassReferences<A>);
2203                                 break;
2204                         case sectionTypeUTF16Strings:
2205                                 *objects++ = new (space) UTF16StringSection<A>(*this, *_file, machOSects[i].sect);
2206                                 space += sizeof(UTF16StringSection<A>);
2207                                 break;
2208                         case sectionTypeCFString:
2209                                 *objects++ = new (space) CFStringSection<A>(*this, *_file, machOSects[i].sect);
2210                                 space += sizeof(CFStringSection<A>);
2211                                 break;
2212                         case sectionTypeObjC2ClassRefs:
2213                                 *objects++ = new (space) ObjC2ClassRefsSection<A>(*this, *_file, machOSects[i].sect);
2214                                 space += sizeof(ObjC2ClassRefsSection<A>);
2215                                 break;
2216                         case typeObjC2CategoryList:
2217                                 *objects++ = new (space) ObjC2CategoryListSection<A>(*this, *_file, machOSects[i].sect);
2218                                 space += sizeof(ObjC2CategoryListSection<A>);
2219                                 break;
2220                         case sectionTypeObjC1Classes:
2221                                 *objects++ = new (space) ObjC1ClassSection<A>(*this, *_file, machOSects[i].sect);
2222                                 space += sizeof(ObjC1ClassSection<A>);
2223                                 break;
2224                         case sectionTypeSymboled:
2225                                 *objects++ = new (space) SymboledSection<A>(*this, *_file, machOSects[i].sect);
2226                                 space += sizeof(SymboledSection<A>);
2227                                 break;
2228                         case sectionTypeTLVDefs:
2229                                 *objects++ = new (space) TLVDefsSection<A>(*this, *_file, machOSects[i].sect);
2230                                 space += sizeof(TLVDefsSection<A>);
2231                                 break;
2232                         case sectionTypeCompactUnwind:
2233                                 _compactUnwindSection = new (space) CUSection<A>(*this, *_file, machOSects[i].sect);
2234                                 *objects++ = _compactUnwindSection;
2235                                 space += sizeof(CUSection<A>);
2236                                 break;
2237                         case sectionTypeTentativeDefinitions:
2238                                 *objects++ = new (space) TentativeDefinitionSection<A>(*this, *_file);
2239                                 space += sizeof(TentativeDefinitionSection<A>);
2240                                 break;
2241                         case sectionTypeAbsoluteSymbols:
2242                                 _absoluteSection = new (space) AbsoluteSymbolSection<A>(*this, *_file);
2243                                 *objects++ = _absoluteSection;
2244                                 space += sizeof(AbsoluteSymbolSection<A>);
2245                                 break;
2246                         default:
2247                                 throw "internal error uknown SectionType";
2248                 }
2249         }
2250 }
2251
2252
2253 template <typename A>
2254 Section<A>* Parser<A>::sectionForAddress(typename A::P::uint_t addr)
2255 {
2256         for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2257                 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2258                 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2259                 if ( sect != NULL ) {
2260                         if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
2261                                 return _file->_sectionsArray[i];
2262                         }
2263                 }
2264         }
2265         // not strictly in any section
2266         // may be in a zero length section
2267         for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2268                 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2269                 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2270                 if ( sect != NULL ) {
2271                         if ( (sect->addr() == addr) && (sect->size() == 0) ) {
2272                                 return _file->_sectionsArray[i];
2273                         }
2274                 }
2275         }
2276
2277         throwf("sectionForAddress(0x%llX) address not in any section", (uint64_t)addr);
2278 }
2279
2280 template <typename A>
2281 Section<A>* Parser<A>::sectionForNum(unsigned int num)
2282 {
2283         for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2284                 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2285                 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2286                 if ( sect != NULL ) {
2287                         if ( num == (unsigned int)((sect - _sectionsStart)+1) )
2288                                 return _file->_sectionsArray[i];
2289                 }
2290         }
2291         throwf("sectionForNum(%u) section number not for any section", num);
2292 }
2293
2294 template <typename A>
2295 Atom<A>* Parser<A>::findAtomByAddress(pint_t addr)
2296 {
2297         Section<A>* section = this->sectionForAddress(addr);
2298         return section->findAtomByAddress(addr);
2299 }
2300
2301 template <typename A>
2302 Atom<A>* Parser<A>::findAtomByAddressOrNullIfStub(pint_t addr)
2303 {
2304         if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) )
2305                 return NULL;
2306         return findAtomByAddress(addr);
2307 }
2308
2309 template <typename A>
2310 Atom<A>* Parser<A>::findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom)
2311 {
2312         if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2313                 // target is a stub, remove indirection
2314                 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2315                 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2316                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2317                 // can't be to external weak symbol
2318                 assert( (this->combineFromSymbol(sym) != ld::Atom::combineByName) || (this->scopeFromSymbol(sym) != ld::Atom::scopeGlobal) );
2319                 *offsetInAtom = 0;
2320                 return this->findAtomByName(this->nameFromSymbol(sym));
2321         }
2322         Atom<A>* target = this->findAtomByAddress(addr);
2323         *offsetInAtom = addr - target->_objAddress;
2324         return target;
2325 }
2326
2327 template <typename A>
2328 Atom<A>* Parser<A>::findAtomByName(const char* name)
2329 {
2330         uint8_t* p = _file->_atomsArray;
2331         for(int i=_file->_atomsArrayCount; i > 0; --i) {
2332                 Atom<A>* atom = (Atom<A>*)p;
2333                 if ( strcmp(name, atom->name()) == 0 )
2334                         return atom;
2335                 p += sizeof(Atom<A>);
2336         }
2337         return NULL;
2338 }
2339
2340 template <typename A>
2341 void Parser<A>::findTargetFromAddress(pint_t addr, TargetDesc& target)
2342 {
2343         if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2344                 // target is a stub, remove indirection
2345                 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2346                 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2347                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2348                 target.atom = NULL;
2349                 target.name = this->nameFromSymbol(sym);
2350                 target.weakImport = this->weakImportFromSymbol(sym);
2351                 target.addend = 0;
2352                 return;
2353         }
2354         Section<A>* section = this->sectionForAddress(addr);
2355         target.atom = section->findAtomByAddress(addr);
2356         target.addend = addr - target.atom->_objAddress;
2357         target.weakImport = false;
2358         target.name = NULL;
2359 }
2360
2361 template <typename A>
2362 void Parser<A>::findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target)
2363 {
2364         findTargetFromAddress(baseAddr, target);
2365         target.addend = addr - target.atom->_objAddress;
2366 }
2367
2368 template <typename A>
2369 void Parser<A>::findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum, TargetDesc& target)
2370 {
2371         if ( sectNum == R_ABS ) {
2372                 // target is absolute symbol that corresponds to addr
2373                 if ( _absoluteSection != NULL ) {
2374                         target.atom = _absoluteSection->findAbsAtomForValue(addr);
2375                         if ( target.atom != NULL ) {
2376                                 target.name = NULL;
2377                                 target.weakImport = false;
2378                                 target.addend = 0;
2379                                 return;
2380                         }
2381                 }
2382                 throwf("R_ABS reloc but no absolute symbol at target address");
2383         }
2384
2385         if ( hasStubsSection() && (stubsSectionNum() == sectNum) ) {
2386                 // target is a stub, remove indirection
2387                 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2388                 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2389                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2390                 // use direct reference when stub is to a static function
2391                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (this->nameFromSymbol(sym)[0] == 'L')) ) {
2392                         this->findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
2393                 }
2394                 else {
2395                         target.atom = NULL;
2396                         target.name = this->nameFromSymbol(sym);
2397                         target.weakImport = this->weakImportFromSymbol(sym);
2398                         target.addend = 0;
2399                 }
2400                 return;
2401         }
2402         Section<A>* section = this->sectionForNum(sectNum);
2403         target.atom = section->findAtomByAddress(addr);
2404         if ( target.atom == NULL ) {
2405                 typedef typename A::P::sint_t sint_t;
2406                 sint_t a = (sint_t)addr;
2407                 sint_t sectStart = (sint_t)(section->machoSection()->addr());
2408                 sint_t sectEnd  = sectStart + section->machoSection()->size();
2409                 if ( a < sectStart ) {
2410                         // target address is before start of section, so must be negative addend
2411                         target.atom = section->findAtomByAddress(sectStart);
2412                         target.addend = a - sectStart;
2413                         target.weakImport = false;
2414                         target.name = NULL;
2415                         return;
2416                 }
2417                 else if ( a >= sectEnd ) {
2418                         target.atom = section->findAtomByAddress(sectEnd-1);
2419                         target.addend = a - sectEnd;
2420                         target.weakImport = false;
2421                         target.name = NULL;
2422                         return;
2423                 }
2424         }
2425         assert(target.atom != NULL);
2426         target.addend = addr - target.atom->_objAddress;
2427         target.weakImport = false;
2428         target.name = NULL;
2429 }
2430
2431 template <typename A>
2432 void Parser<A>::addDtraceExtraInfos(const SourceLocation& src, const char* providerName)
2433 {
2434         // for every ___dtrace_stability$* and ___dtrace_typedefs$* undefine with
2435         // a matching provider name, add a by-name kDtraceTypeReference at probe site
2436         const char* dollar = strchr(providerName, '$');
2437         if ( dollar != NULL ) {
2438                 int providerNameLen = dollar-providerName+1;
2439                 for ( std::vector<const char*>::iterator it = _dtraceProviderInfo.begin(); it != _dtraceProviderInfo.end(); ++it) {
2440                         const char* typeDollar = strchr(*it, '$');
2441                         if ( typeDollar != NULL ) {
2442                                 if ( strncmp(typeDollar+1, providerName, providerNameLen) == 0 ) {
2443                                         addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindDtraceExtra,false, *it);
2444                                 }
2445                         }
2446                 }
2447         }
2448 }
2449
2450 template <typename A>
2451 const char* Parser<A>::scanSymbolTableForAddress(uint64_t addr)
2452 {
2453         uint64_t closestSymAddr = 0;
2454         const char* closestSymName = NULL;
2455         for (uint32_t i=0; i < this->_symbolCount; ++i) {
2456                 const macho_nlist<P>& sym =     symbolFromIndex(i);
2457                 // ignore stabs
2458                 if ( (sym.n_type() & N_STAB) != 0 )
2459                         continue;
2460
2461                 // only look at definitions
2462                 if ( (sym.n_type() & N_TYPE) != N_SECT )
2463                         continue;
2464
2465                 // return with exact match
2466                 if ( sym.n_value() == addr )
2467                         return nameFromSymbol(sym);
2468
2469                 // record closest seen so far
2470                 if ( (sym.n_value() < addr) && ((sym.n_value() > closestSymAddr) || (closestSymName == NULL)) )
2471                         closestSymName = nameFromSymbol(sym);
2472         }
2473
2474         return (closestSymName != NULL) ? closestSymName : "unknown";
2475 }
2476
2477
2478 template <typename A>
2479 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind setKind, const TargetDesc& target)
2480 {
2481         // some fixup pairs can be combined
2482         ld::Fixup::Cluster cl = ld::Fixup::k1of3;
2483         ld::Fixup::Kind firstKind = ld::Fixup::kindSetTargetAddress;
2484         bool combined = false;
2485         if ( target.addend == 0 ) {
2486                 cl = ld::Fixup::k1of1;
2487                 combined = true;
2488                 switch ( setKind ) {
2489                         case ld::Fixup::kindStoreLittleEndian32:
2490                                 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian32;
2491                                 break;
2492                         case ld::Fixup::kindStoreLittleEndian64:
2493                                 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian64;
2494                                 break;
2495                         case ld::Fixup::kindStoreBigEndian32:
2496                                 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian32;
2497                                 break;
2498                         case ld::Fixup::kindStoreBigEndian64:
2499                                 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian64;
2500                                 break;
2501                         case ld::Fixup::kindStoreX86BranchPCRel32:
2502                                 firstKind = ld::Fixup::kindStoreTargetAddressX86BranchPCRel32;
2503                                 break;
2504                         case ld::Fixup::kindStoreX86PCRel32:
2505                                 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32;
2506                                 break;
2507                         case ld::Fixup::kindStoreX86PCRel32GOTLoad:
2508                                 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32GOTLoad;
2509                                 break;
2510                         case ld::Fixup::kindStoreX86PCRel32TLVLoad:
2511                                 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32TLVLoad;
2512                                 break;
2513                         case ld::Fixup::kindStoreX86Abs32TLVLoad:
2514                                 firstKind = ld::Fixup::kindStoreTargetAddressX86Abs32TLVLoad;
2515                                 break;
2516                         case ld::Fixup::kindStoreARMBranch24:
2517                                 firstKind = ld::Fixup::kindStoreTargetAddressARMBranch24;
2518                                 break;
2519                         case ld::Fixup::kindStoreThumbBranch22:
2520                                 firstKind = ld::Fixup::kindStoreTargetAddressThumbBranch22;
2521                                 break;
2522                         default:
2523                                 combined = false;
2524                                 cl = ld::Fixup::k1of2;
2525                                 break;
2526                 }
2527         }
2528
2529         if ( target.atom != NULL ) {
2530                 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2531                         addFixup(src, cl, firstKind, target.atom);
2532                 }
2533                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2534                         addFixup(src, cl, firstKind, ld::Fixup::bindingByContentBound, target.atom);
2535                 }
2536                 else if ( (src.atom->section().type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
2537                         // backing string in CFStrings should always be direct
2538                         addFixup(src, cl, firstKind, target.atom);
2539                 }
2540                 else {
2541                         // change direct fixup to by-name fixup
2542                         addFixup(src, cl, firstKind, false, target.atom->name());
2543                 }
2544         }
2545         else {
2546                 addFixup(src, cl, firstKind, target.weakImport, target.name);
2547         }
2548         if ( target.addend == 0 ) {
2549                 if ( ! combined )
2550                         addFixup(src, ld::Fixup::k2of2, setKind);
2551         }
2552         else {
2553                 addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, target.addend);
2554                 addFixup(src, ld::Fixup::k3of3, setKind);
2555         }
2556 }
2557
2558 template <typename A>
2559 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase)
2560 {
2561         ld::Fixup::Cluster cl = (target.addend == 0) ? ld::Fixup::k1of4 : ld::Fixup::k1of5;
2562         if ( target.atom != NULL ) {
2563                 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2564                         addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.atom);
2565                 }
2566                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2567                         addFixup(src, cl, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
2568                 }
2569                 else {
2570                         addFixup(src, cl, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
2571                 }
2572         }
2573         else {
2574                 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.weakImport, target.name);
2575         }
2576         if ( target.addend == 0 ) {
2577                 assert(picBase.atom != NULL);
2578                 addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2579                 addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, picBase.addend);
2580                 addFixup(src, ld::Fixup::k4of4, kind);
2581         }
2582         else {
2583                 addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
2584                 addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2585                 addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, picBase.addend);
2586                 addFixup(src, ld::Fixup::k5of5, kind);
2587         }
2588 }
2589
2590
2591
2592 template <typename A>
2593 uint32_t TentativeDefinitionSection<A>::computeAtomCount(class Parser<A>& parser,
2594                                                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
2595                                                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
2596 {
2597         return parser.tentativeDefinitionCount();
2598 }
2599
2600 template <typename A>
2601 uint32_t TentativeDefinitionSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2602                                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
2603                                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&)
2604 {
2605         this->_beginAtoms = (Atom<A>*)p;
2606         uint32_t count = 0;
2607         for (uint32_t i=parser.undefinedStartIndex(); i < parser.undefinedEndIndex(); ++i) {
2608                 const macho_nlist<P>& sym =     parser.symbolFromIndex(i);
2609                 if ( ((sym.n_type() & N_TYPE) == N_UNDF) && (sym.n_value() != 0) ) {
2610                         uint64_t size = sym.n_value();
2611                         uint8_t alignP2 = GET_COMM_ALIGN(sym.n_desc());
2612                         if ( alignP2 == 0 ) {
2613                                 // common symbols align to their size
2614                                 // that is, a 4-byte common aligns to 4-bytes
2615                                 // if this size is not a power of two,
2616                                 // then round up to the next power of two
2617                                 alignP2 = 63 - (uint8_t)__builtin_clzll(size);
2618                                 if ( size != (1ULL << alignP2) )
2619                                         ++alignP2;
2620                         }
2621                         // limit alignment of extremely large commons to 2^15 bytes (8-page)
2622                         if ( alignP2 > 15 )
2623                                 alignP2 = 15;
2624                         Atom<A>* allocatedSpace = (Atom<A>*)p;
2625                         new (allocatedSpace) Atom<A>(*this, parser.nameFromSymbol(sym), (pint_t)ULLONG_MAX, size,
2626                                                                                 ld::Atom::definitionTentative,  ld::Atom::combineByName,
2627                                                                                 parser.scopeFromSymbol(sym), ld::Atom::typeZeroFill, ld::Atom::symbolTableIn,
2628                                                                                 parser.dontDeadStripFromSymbol(sym), false, false, ld::Atom::Alignment(alignP2) );
2629                         p += sizeof(Atom<A>);
2630                         ++count;
2631                 }
2632         }
2633         this->_endAtoms = (Atom<A>*)p;
2634         return count;
2635 }
2636
2637
2638 template <typename A>
2639 uint32_t AbsoluteSymbolSection<A>::computeAtomCount(class Parser<A>& parser,
2640                                                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
2641                                                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
2642 {
2643         return parser.absoluteSymbolCount();
2644 }
2645
2646 template <typename A>
2647 uint32_t AbsoluteSymbolSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2648                                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
2649                                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&)
2650 {
2651         this->_beginAtoms = (Atom<A>*)p;
2652         uint32_t count = 0;
2653         for (uint32_t i=0; i < parser.symbolCount(); ++i) {
2654                 const macho_nlist<P>& sym =     parser.symbolFromIndex(i);
2655                 if ( (sym.n_type() & N_TYPE) != N_ABS )
2656                         continue;
2657                 const char* absName = parser.nameFromSymbol(sym);
2658                 // ignore .objc_class_name_* symbols
2659                 if ( strncmp(absName, ".objc_class_name_", 17) == 0 )
2660                         continue;
2661                 // ignore .objc_class_name_* symbols
2662                 if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
2663                         continue;
2664                 // ignore empty *.eh symbols
2665                 if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
2666                         continue;
2667
2668                 Atom<A>* allocatedSpace = (Atom<A>*)p;
2669                 new (allocatedSpace) Atom<A>(*this, parser, sym, 0);
2670                 p += sizeof(Atom<A>);
2671                 ++count;
2672         }
2673         this->_endAtoms = (Atom<A>*)p;
2674         return count;
2675 }
2676
2677 template <typename A>
2678 Atom<A>* AbsoluteSymbolSection<A>::findAbsAtomForValue(typename A::P::uint_t value)
2679 {
2680         Atom<A>* end = this->_endAtoms;
2681         for(Atom<A>* p = this->_beginAtoms; p < end; ++p) {
2682                 if ( p->_objAddress == value )
2683                         return p;
2684         }
2685         return NULL;
2686 }
2687
2688
2689 template <typename A>
2690 uint32_t Parser<A>::indirectSymbol(uint32_t indirectIndex)
2691 {
2692         if ( indirectIndex >= _indirectTableCount )
2693                 throw "indirect symbol index out of range";
2694         return E::get32(_indirectTable[indirectIndex]);
2695 }
2696
2697 template <typename A>
2698 const macho_nlist<typename A::P>& Parser<A>::symbolFromIndex(uint32_t index)
2699 {
2700         if ( index > _symbolCount )
2701                 throw "symbol index out of range";
2702         return _symbols[index];
2703 }
2704
2705 template <typename A>
2706 const macho_section<typename A::P>*     Parser<A>::machOSectionFromSectionIndex(uint32_t index)
2707 {
2708         if ( index >= _machOSectionsCount )
2709                 throw "section index out of range";
2710         return &_sectionsStart[index];
2711 }
2712
2713 template <typename A>
2714 uint32_t Parser<A>::symbolIndexFromIndirectSectionAddress(pint_t addr, const macho_section<P>* sect)
2715 {
2716         uint32_t elementSize = 0;
2717         switch ( sect->flags() & SECTION_TYPE ) {
2718                 case S_SYMBOL_STUBS:
2719                         elementSize = sect->reserved2();
2720                         break;
2721                 case S_LAZY_SYMBOL_POINTERS:
2722                 case S_NON_LAZY_SYMBOL_POINTERS:
2723                         elementSize = sizeof(pint_t);
2724                         break;
2725                 default:
2726                         throw "section does not use inirect symbol table";
2727         }
2728         uint32_t indexInSection = (addr - sect->addr()) / elementSize;
2729         uint32_t indexIntoIndirectTable = sect->reserved1() + indexInSection;
2730         return this->indirectSymbol(indexIntoIndirectTable);
2731 }
2732
2733
2734
2735 template <typename A>
2736 const char* Parser<A>::nameFromSymbol(const macho_nlist<P>& sym)
2737 {
2738         return &_strings[sym.n_strx()];
2739 }
2740
2741 template <typename A>
2742 ld::Atom::Scope Parser<A>::scopeFromSymbol(const macho_nlist<P>& sym)
2743 {
2744         if ( (sym.n_type() & N_EXT) == 0 )
2745                 return ld::Atom::scopeTranslationUnit;
2746         else if ( (sym.n_type() & N_PEXT) != 0 )
2747                 return ld::Atom::scopeLinkageUnit;
2748         else if ( this->nameFromSymbol(sym)[0] == 'l' ) // since all 'l' symbols will be remove, don't make them global
2749                 return ld::Atom::scopeLinkageUnit;
2750         else
2751                 return ld::Atom::scopeGlobal;
2752 }
2753
2754 template <typename A>
2755 ld::Atom::Definition Parser<A>::definitionFromSymbol(const macho_nlist<P>& sym)
2756 {
2757         switch ( sym.n_type() & N_TYPE ) {
2758                 case N_ABS:
2759                         return ld::Atom::definitionAbsolute;
2760                 case N_SECT:
2761                         return ld::Atom::definitionRegular;
2762                 case N_UNDF:
2763                         if ( sym.n_value() != 0 )
2764                                 return ld::Atom::definitionTentative;
2765         }
2766         throw "definitionFromSymbol() bad symbol";
2767 }
2768
2769 template <typename A>
2770 ld::Atom::Combine Parser<A>::combineFromSymbol(const macho_nlist<P>& sym)
2771 {
2772         if ( sym.n_desc() & N_WEAK_DEF )
2773                 return ld::Atom::combineByName;
2774         else
2775                 return ld::Atom::combineNever;
2776 }
2777
2778
2779 template <typename A>
2780 ld::Atom::SymbolTableInclusion Parser<A>::inclusionFromSymbol(const macho_nlist<P>& sym)
2781 {
2782         const char* symbolName = nameFromSymbol(sym);
2783         // labels beginning with 'l' (lowercase ell) are automatically removed in final linked images <rdar://problem/4571042>
2784         // labels beginning with 'L' should have been stripped by the assembler, so are stripped now
2785         if ( sym.n_desc() & REFERENCED_DYNAMICALLY )
2786                 return ld::Atom::symbolTableInAndNeverStrip;
2787         else if ( symbolName[0] == 'l' )
2788                 return ld::Atom::symbolTableNotInFinalLinkedImages;
2789         else if ( symbolName[0] == 'L' )
2790                 return ld::Atom::symbolTableNotIn;
2791         else
2792                 return ld::Atom::symbolTableIn;
2793 }
2794
2795 template <typename A>
2796 bool Parser<A>::dontDeadStripFromSymbol(const macho_nlist<P>& sym)
2797 {
2798         return ( (sym.n_desc() & (N_NO_DEAD_STRIP|REFERENCED_DYNAMICALLY)) != 0 );
2799 }
2800
2801 template <typename A>
2802 bool Parser<A>::isThumbFromSymbol(const macho_nlist<P>& sym)
2803 {
2804         return ( sym.n_desc() & N_ARM_THUMB_DEF );
2805 }
2806
2807 template <typename A>
2808 bool Parser<A>::weakImportFromSymbol(const macho_nlist<P>& sym)
2809 {
2810         return ( ((sym.n_type() & N_TYPE) == N_UNDF) && ((sym.n_desc() & N_WEAK_REF) != 0) );
2811 }
2812
2813 template <typename A>
2814 bool Parser<A>::resolverFromSymbol(const macho_nlist<P>& sym)
2815 {
2816         return ( sym.n_desc() & N_SYMBOL_RESOLVER );
2817 }
2818
2819
2820 /* Skip over a LEB128 value (signed or unsigned).  */
2821 static void
2822 skip_leb128 (const uint8_t ** offset, const uint8_t * end)
2823 {
2824   while (*offset != end && **offset >= 0x80)
2825     (*offset)++;
2826   if (*offset != end)
2827     (*offset)++;
2828 }
2829
2830 /* Read a ULEB128 into a 64-bit word.  Return (uint64_t)-1 on overflow
2831    or error.  On overflow, skip past the rest of the uleb128.  */
2832 static uint64_t
2833 read_uleb128 (const uint8_t ** offset, const uint8_t * end)
2834 {
2835   uint64_t result = 0;
2836   int bit = 0;
2837
2838   do  {
2839     uint64_t b;
2840
2841     if (*offset == end)
2842       return (uint64_t) -1;
2843
2844     b = **offset & 0x7f;
2845
2846     if (bit >= 64 || b << bit >> bit != b)
2847       result = (uint64_t) -1;
2848     else
2849       result |= b << bit, bit += 7;
2850   } while (*(*offset)++ >= 0x80);
2851   return result;
2852 }
2853
2854
2855 /* Skip over a DWARF attribute of form FORM.  */
2856 template <typename A>
2857 bool Parser<A>::skip_form(const uint8_t ** offset, const uint8_t * end, uint64_t form,
2858                                                         uint8_t addr_size, bool dwarf64)
2859 {
2860   int64_t sz=0;
2861
2862   switch (form)
2863     {
2864     case DW_FORM_addr:
2865       sz = addr_size;
2866       break;
2867
2868     case DW_FORM_block2:
2869       if (end - *offset < 2)
2870         return false;
2871       sz = 2 + A::P::E::get16(*(uint16_t*)offset);
2872       break;
2873
2874     case DW_FORM_block4:
2875       if (end - *offset < 4)
2876         return false;
2877       sz = 2 + A::P::E::get32(*(uint32_t*)offset);
2878       break;
2879
2880     case DW_FORM_data2:
2881     case DW_FORM_ref2:
2882       sz = 2;
2883       break;
2884
2885     case DW_FORM_data4:
2886     case DW_FORM_ref4:
2887       sz = 4;
2888       break;
2889
2890     case DW_FORM_data8:
2891     case DW_FORM_ref8:
2892       sz = 8;
2893       break;
2894
2895     case DW_FORM_string:
2896       while (*offset != end && **offset)
2897         ++*offset;
2898     case DW_FORM_data1:
2899     case DW_FORM_flag:
2900     case DW_FORM_ref1:
2901       sz = 1;
2902       break;
2903
2904     case DW_FORM_block:
2905       sz = read_uleb128 (offset, end);
2906       break;
2907
2908     case DW_FORM_block1:
2909       if (*offset == end)
2910         return false;
2911       sz = 1 + **offset;
2912       break;
2913
2914     case DW_FORM_sdata:
2915     case DW_FORM_udata:
2916     case DW_FORM_ref_udata:
2917       skip_leb128 (offset, end);
2918       return true;
2919
2920     case DW_FORM_strp:
2921     case DW_FORM_ref_addr:
2922       sz = 4;
2923       break;
2924
2925     default:
2926       return false;
2927     }
2928   if (end - *offset < sz)
2929     return false;
2930   *offset += sz;
2931   return true;
2932 }
2933
2934
2935 template <typename A>
2936 const char* Parser<A>::getDwarfString(uint64_t form, const uint8_t* p)
2937 {
2938         if ( form == DW_FORM_string )
2939                 return (const char*)p;
2940         else if ( form == DW_FORM_strp ) {
2941                 uint32_t offset = E::get32(*((uint32_t*)p));
2942                 const char* dwarfStrings = (char*)_file->fileContent() + _file->_dwarfDebugStringSect->offset();
2943                 if ( offset > _file->_dwarfDebugStringSect->size() ) {
2944                         warning("unknown dwarf DW_FORM_strp (offset=0x%08X) is too big in %s\n", offset, this->_path);
2945                         return NULL;
2946                 }
2947                 return &dwarfStrings[offset];
2948         }
2949         warning("unknown dwarf string encoding (form=%lld) in %s\n", form, this->_path);
2950         return NULL;
2951 }
2952
2953
2954 template <typename A>
2955 struct AtomAndLineInfo {
2956         Atom<A>*                        atom;
2957         ld::Atom::LineInfo      info;
2958 };
2959
2960
2961 // <rdar://problem/5591394> Add support to ld64 for N_FUN stabs when used for symbolic constants
2962 // Returns whether a stabStr belonging to an N_FUN stab represents a
2963 // symbolic constant rather than a function
2964 template <typename A>
2965 bool Parser<A>::isConstFunStabs(const char *stabStr)
2966 {
2967         const char* colon;
2968         // N_FUN can be used for both constants and for functions. In case it's a constant,
2969         // the format of the stabs string is "symname:c=<value>;"
2970         // ':' cannot appear in the symbol name, except if it's an Objective-C method
2971         // (in which case the symbol name starts with + or -, and then it's definitely
2972         //  not a constant)
2973         return (stabStr != NULL) && (stabStr[0] != '+') && (stabStr[0] != '-')
2974                         && ((colon = strchr(stabStr, ':')) != NULL)
2975                         && (colon[1] == 'c') && (colon[2] == '=');
2976 }
2977
2978
2979 template <typename A>
2980 void Parser<A>::parseDebugInfo()
2981 {
2982         // check for dwarf __debug_info section
2983         if ( _file->_dwarfDebugInfoSect == NULL ) {
2984                 // if no DWARF debug info, look for stabs
2985                 this->parseStabs();
2986                 return;
2987         }
2988         if ( _file->_dwarfDebugInfoSect->size() == 0 )
2989                 return;
2990
2991         uint64_t stmtList;
2992         const char* tuDir;
2993         const char* tuName;
2994         if ( !read_comp_unit(&tuName, &tuDir, &stmtList) ) {
2995                 // if can't parse dwarf, warn and give up
2996                 _file->_dwarfTranslationUnitPath = NULL;
2997                 warning("can't parse dwarf compilation unit info in %s", _path);
2998                 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoNone;
2999                 return;
3000         }
3001         if ( (tuName != NULL) && (tuName[0] == '/') ) {
3002                 _file->_dwarfTranslationUnitPath = tuName;
3003         }
3004         else if ( (tuDir != NULL) && (tuName != NULL) ) {
3005                 asprintf((char**)&(_file->_dwarfTranslationUnitPath), "%s/%s", tuDir, tuName);
3006         }
3007         else if ( tuDir == NULL ) {
3008                 _file->_dwarfTranslationUnitPath = tuName;
3009         }
3010         else {
3011                 _file->_dwarfTranslationUnitPath = NULL;
3012         }
3013
3014         // add line number info to atoms from dwarf
3015         std::vector<AtomAndLineInfo<A> > entries;
3016         entries.reserve(64);
3017         if ( _file->_debugInfoKind == ld::relocatable::File::kDebugInfoDwarf ) {
3018                 // file with just data will have no __debug_line info
3019                 if ( (_file->_dwarfDebugLineSect != NULL) && (_file->_dwarfDebugLineSect->size() != 0) ) {
3020                         // validate stmt_list
3021                         if ( (stmtList != (uint64_t)-1) && (stmtList < _file->_dwarfDebugLineSect->size()) ) {
3022                                 const uint8_t* debug_line = (uint8_t*)_file->fileContent() + _file->_dwarfDebugLineSect->offset();
3023                                 struct line_reader_data* lines = line_open(&debug_line[stmtList],
3024                                                                                                                 _file->_dwarfDebugLineSect->size() - stmtList, E::little_endian);
3025                                 struct line_info result;
3026                                 Atom<A>* curAtom = NULL;
3027                                 uint32_t curAtomOffset = 0;
3028                                 uint32_t curAtomAddress = 0;
3029                                 uint32_t curAtomSize = 0;
3030                                 std::map<uint32_t,const char*>  dwarfIndexToFile;
3031                                 if ( lines != NULL ) {
3032                                         while ( line_next(lines, &result, line_stop_pc) ) {
3033                                                 //fprintf(stderr, "curAtom=%p, result.pc=0x%llX, result.line=%llu, result.end_of_sequence=%d,"
3034                                                 //                                " curAtomAddress=0x%X, curAtomSize=0x%X\n",
3035                                                 //              curAtom, result.pc, result.line, result.end_of_sequence, curAtomAddress, curAtomSize);
3036                                                 // work around weird debug line table compiler generates if no functions in __text section
3037                                                 if ( (curAtom == NULL) && (result.pc == 0) && result.end_of_sequence && (result.file == 1))
3038                                                         continue;
3039                                                 // for performance, see if in next pc is in current atom
3040                                                 if ( (curAtom != NULL) && (curAtomAddress <= result.pc) && (result.pc < (curAtomAddress+curAtomSize)) ) {
3041                                                         curAtomOffset = result.pc - curAtomAddress;
3042                                                 }
3043                                                 // or pc at end of current atom
3044                                                 else if ( result.end_of_sequence && (curAtom != NULL) && (result.pc == (curAtomAddress+curAtomSize)) ) {
3045                                                         curAtomOffset = result.pc - curAtomAddress;
3046                                                 }
3047                                                 // or only one function that is a one line function
3048                                                 else if ( result.end_of_sequence && (curAtom == NULL) && (this->findAtomByAddress(0) != NULL) && (result.pc == this->findAtomByAddress(0)->size()) ) {
3049                                                         curAtom                 = this->findAtomByAddress(0);
3050                                                         curAtomOffset   = result.pc - curAtom->objectAddress();
3051                                                         curAtomAddress  = curAtom->objectAddress();
3052                                                         curAtomSize             = curAtom->size();
3053                                                 }
3054                                                 else {
3055                                                         // do slow look up of atom by address
3056                                                         try {
3057                                                                 curAtom = this->findAtomByAddress(result.pc);
3058                                                         }
3059                                                         catch (...) {
3060                                                                 // in case of bug in debug info, don't abort link, just limp on
3061                                                                 curAtom = NULL;
3062                                                         }
3063                                                         if ( curAtom == NULL )
3064                                                                 break; // file has line info but no functions
3065                                                         if ( result.end_of_sequence && (curAtomAddress+curAtomSize < result.pc) ) {
3066                                                                 // a one line function can be returned by line_next() as one entry with pc at end of blob
3067                                                                 // look for alt atom starting at end of previous atom
3068                                                                 uint32_t previousEnd = curAtomAddress+curAtomSize;
3069                                                                 Atom<A>* alt = this->findAtomByAddressOrNullIfStub(previousEnd);
3070                                                                 if ( alt == NULL )
3071                                                                         continue; // ignore spurious debug info for stubs
3072                                                                 if ( result.pc <= alt->objectAddress() + alt->size() ) {
3073                                                                         curAtom                 = alt;
3074                                                                         curAtomOffset   = result.pc - alt->objectAddress();
3075                                                                         curAtomAddress  = alt->objectAddress();
3076                                                                         curAtomSize             = alt->size();
3077                                                                 }
3078                                                                 else {
3079                                                                         curAtomOffset   = result.pc - curAtom->objectAddress();
3080                                                                         curAtomAddress  = curAtom->objectAddress();
3081                                                                         curAtomSize             = curAtom->size();
3082                                                                 }
3083                                                         }
3084                                                         else {
3085                                                                 curAtomOffset   = result.pc - curAtom->objectAddress();
3086                                                                 curAtomAddress  = curAtom->objectAddress();
3087                                                                 curAtomSize             = curAtom->size();
3088                                                         }
3089                                                 }
3090                                                 const char* filename;
3091                                                 std::map<uint32_t,const char*>::iterator pos = dwarfIndexToFile.find(result.file);
3092                                                 if ( pos == dwarfIndexToFile.end() ) {
3093                                                         filename = line_file(lines, result.file);
3094                                                         dwarfIndexToFile[result.file] = filename;
3095                                                 }
3096                                                 else {
3097                                                         filename = pos->second;
3098                                                 }
3099                                                 // only record for ~8000 line info records per function
3100                                                 if ( curAtom->roomForMoreLineInfoCount() ) {
3101                                                         AtomAndLineInfo<A> entry;
3102                                                         entry.atom = curAtom;
3103                                                         entry.info.atomOffset = curAtomOffset;
3104                                                         entry.info.fileName = filename;
3105                                                         entry.info.lineNumber = result.line;
3106                                                         //fprintf(stderr, "addr=0x%08llX, line=%lld, file=%s, atom=%s, atom.size=0x%X, end=%d\n",
3107                                                         //              result.pc, result.line, filename, curAtom->name(), curAtomSize, result.end_of_sequence);
3108                                                         entries.push_back(entry);
3109                                                         curAtom->incrementLineInfoCount();
3110                                                 }
3111                                                 if ( result.end_of_sequence ) {
3112                                                         curAtom = NULL;
3113                                                 }
3114                                         }
3115                                         line_free(lines);
3116                                 }
3117                         }
3118                 }
3119         }
3120
3121         // assign line info start offset for each atom
3122         uint8_t* p = _file->_atomsArray;
3123         uint32_t liOffset = 0;
3124         for(int i=_file->_atomsArrayCount; i > 0; --i) {
3125                 Atom<A>* atom = (Atom<A>*)p;
3126                 atom->_lineInfoStartIndex = liOffset;
3127                 liOffset += atom->_lineInfoCount;
3128                 atom->_lineInfoCount = 0;
3129                 p += sizeof(Atom<A>);
3130         }
3131         assert(liOffset == entries.size());
3132         _file->_lineInfos.reserve(liOffset);
3133
3134         // copy each line info for each atom
3135         for (typename std::vector<AtomAndLineInfo<A> >::iterator it = entries.begin(); it != entries.end(); ++it) {
3136                 uint32_t slot = it->atom->_lineInfoStartIndex + it->atom->_lineInfoCount;
3137                 _file->_lineInfos[slot] = it->info;
3138                 it->atom->_lineInfoCount++;
3139         }
3140
3141         // done with temp vector
3142         entries.clear();
3143 }
3144
3145 template <typename A>
3146 void Parser<A>::parseStabs()
3147 {
3148         // scan symbol table for stabs entries
3149         Atom<A>* currentAtom = NULL;
3150         pint_t currentAtomAddress = 0;
3151         enum { start, inBeginEnd, inFun } state = start;
3152         for (uint32_t symbolIndex = 0; symbolIndex < _symbolCount; ++symbolIndex ) {
3153                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
3154                 bool useStab = true;
3155                 uint8_t type = sym.n_type();
3156                 const char* symString = (sym.n_strx() != 0) ? this->nameFromSymbol(sym) : NULL;
3157                 if ( (type & N_STAB) != 0 ) {
3158                         _file->_debugInfoKind =  (_hasUUID ? ld::relocatable::File::kDebugInfoStabsUUID : ld::relocatable::File::kDebugInfoStabs);
3159                         ld::relocatable::File::Stab stab;
3160                         stab.atom       = NULL;
3161                         stab.type       = type;
3162                         stab.other      = sym.n_sect();
3163                         stab.desc       = sym.n_desc();
3164                         stab.value      = sym.n_value();
3165                         stab.string = NULL;
3166                         switch (state) {
3167                                 case start:
3168                                         switch (type) {
3169                                                 case N_BNSYM:
3170                                                         // beginning of function block
3171                                                         state = inBeginEnd;
3172                                                         // fall into case to lookup atom by addresss
3173                                                 case N_LCSYM:
3174                                                 case N_STSYM:
3175                                                         currentAtomAddress = sym.n_value();
3176                                                         currentAtom = this->findAtomByAddress(currentAtomAddress);
3177                                                         if ( currentAtom != NULL ) {
3178                                                                 stab.atom = currentAtom;
3179                                                                 stab.string = symString;
3180                                                         }
3181                                                         else {
3182                                                                 fprintf(stderr, "can't find atom for stabs BNSYM at %08llX in %s",
3183                                                                         (uint64_t)sym.n_value(), _path);
3184                                                         }
3185                                                         break;
3186                                                 case N_SO:
3187                                                 case N_OSO:
3188                                                 case N_OPT:
3189                                                 case N_LSYM:
3190                                                 case N_RSYM:
3191                                                 case N_PSYM:
3192                                                         // not associated with an atom, just copy
3193                                                         stab.string = symString;
3194                                                         break;
3195                                                 case N_GSYM:
3196                                                 {
3197                                                         // n_value field is NOT atom address ;-(
3198                                                         // need to find atom by name match
3199                                                         const char* colon = strchr(symString, ':');
3200                                                         if ( colon != NULL ) {
3201                                                                 // build underscore leading name
3202                                                                 int nameLen = colon - symString;
3203                                                                 char symName[nameLen+2];
3204                                                                 strlcpy(&symName[1], symString, nameLen+1);
3205                                                                 symName[0] = '_';
3206                                                                 symName[nameLen+1] = '\0';
3207                                                                 currentAtom = this->findAtomByName(symName);
3208                                                                 if ( currentAtom != NULL ) {
3209                                                                         stab.atom = currentAtom;
3210                                                                         stab.string = symString;
3211                                                                 }
3212                                                         }
3213                                                         else {
3214                                                                 // might be a debug-note without trailing :G()
3215                                                                 currentAtom = this->findAtomByName(symString);
3216                                                                 if ( currentAtom != NULL ) {
3217                                                                         stab.atom = currentAtom;
3218                                                                         stab.string = symString;
3219                                                                 }
3220                                                         }
3221                                                         if ( stab.atom == NULL ) {
3222                                                                 // ld_classic added bogus GSYM stabs for old style dtrace probes
3223                                                                 if ( (strncmp(symString, "__dtrace_probe$", 15) != 0) )
3224                                                                         warning("can't find atom for N_GSYM stabs %s in %s", symString, _path);
3225                                                                 useStab = false;
3226                                                         }
3227                                                         break;
3228                                                 }
3229                                                 case N_FUN:
3230                                                         if ( isConstFunStabs(symString) ) {
3231                                                                 // constant not associated with a function
3232                                                                 stab.string = symString;
3233                                                         }
3234                                                         else {
3235                                                                 // old style stabs without BNSYM
3236                                                                 state = inFun;
3237                                                                 currentAtomAddress = sym.n_value();
3238                                                                 currentAtom = this->findAtomByAddress(currentAtomAddress);
3239                                                                 if ( currentAtom != NULL ) {
3240                                                                         stab.atom = currentAtom;
3241                                                                         stab.string = symString;
3242                                                                 }
3243                                                                 else {
3244                                                                         warning("can't find atom for stabs FUN at %08llX in %s",
3245                                                                                 (uint64_t)currentAtomAddress, _path);
3246                                                                 }
3247                                                         }
3248                                                         break;
3249                                                 case N_SOL:
3250                                                 case N_SLINE:
3251                                                         stab.string = symString;
3252                                                         // old stabs
3253                                                         break;
3254                                                 case N_BINCL:
3255                                                 case N_EINCL:
3256                                                 case N_EXCL:
3257                                                         stab.string = symString;
3258                                                         // -gfull built .o file
3259                                                         break;
3260                                                 default:
3261                                                         warning("unknown stabs type 0x%X in %s", type, _path);
3262                                         }
3263                                         break;
3264                                 case inBeginEnd:
3265                                         stab.atom = currentAtom;
3266                                         switch (type) {
3267                                                 case N_ENSYM:
3268                                                         state = start;
3269                                                         currentAtom = NULL;
3270                                                         break;
3271                                                 case N_LCSYM:
3272                                                 case N_STSYM:
3273                                                 {
3274                                                         Atom<A>* nestedAtom = this->findAtomByAddress(sym.n_value());
3275                                                         if ( nestedAtom != NULL ) {
3276                                                                 stab.atom = nestedAtom;
3277                                                                 stab.string = symString;
3278                                                         }
3279                                                         else {
3280                                                                 warning("can't find atom for stabs 0x%X at %08llX in %s",
3281                                                                         type, (uint64_t)sym.n_value(), _path);
3282                                                         }
3283                                                         break;
3284                                                 }
3285                                                 case N_LBRAC:
3286                                                 case N_RBRAC:
3287                                                 case N_SLINE:
3288                                                         // adjust value to be offset in atom
3289                                                         stab.value -= currentAtomAddress;
3290                                                 default:
3291                                                         stab.string = symString;
3292                                                         break;
3293                                         }
3294                                         break;
3295                                 case inFun:
3296                                         switch (type) {
3297                                                 case N_FUN:
3298                                                         if ( isConstFunStabs(symString) ) {
3299                                                                 stab.atom = currentAtom;
3300                                                                 stab.string = symString;
3301                                                         }
3302                                                         else {
3303                                                                 if ( sym.n_sect() != 0 ) {
3304                                                                         // found another start stab, must be really old stabs...
3305                                                                         currentAtomAddress = sym.n_value();
3306                                                                         currentAtom = this->findAtomByAddress(currentAtomAddress);
3307                                                                         if ( currentAtom != NULL ) {
3308                                                                                 stab.atom = currentAtom;
3309                                                                                 stab.string = symString;
3310                                                                         }
3311                                                                         else {
3312                                                                                 warning("can't find atom for stabs FUN at %08llX in %s",
3313                                                                                         (uint64_t)currentAtomAddress, _path);
3314                                                                         }
3315                                                                 }
3316                                                                 else {
3317                                                                         // found ending stab, switch back to start state
3318                                                                         stab.string = symString;
3319                                                                         stab.atom = currentAtom;
3320                                                                         state = start;
3321                                                                         currentAtom = NULL;
3322                                                                 }
3323                                                         }
3324                                                         break;
3325                                                 case N_LBRAC:
3326                                                 case N_RBRAC:
3327                                                 case N_SLINE:
3328                                                         // adjust value to be offset in atom
3329                                                         stab.value -= currentAtomAddress;
3330                                                         stab.atom = currentAtom;
3331                                                         break;
3332                                                 case N_SO:
3333                                                         stab.string = symString;
3334                                                         state = start;
3335                                                         break;
3336                                                 default:
3337                                                         stab.atom = currentAtom;
3338                                                         stab.string = symString;
3339                                                         break;
3340                                         }
3341                                         break;
3342                         }
3343                         // add to list of stabs for this .o file
3344                         if ( useStab )
3345                                 _file->_stabs.push_back(stab);
3346                 }
3347         }
3348 }
3349
3350
3351
3352 // Look at the compilation unit DIE and determine
3353 // its NAME, compilation directory (in COMP_DIR) and its
3354 // line number information offset (in STMT_LIST).  NAME and COMP_DIR
3355 // may be NULL (especially COMP_DIR) if they are not in the .o file;
3356 // STMT_LIST will be (uint64_t) -1.
3357 //
3358 // At present this assumes that there's only one compilation unit DIE.
3359 //
3360 template <typename A>
3361 bool Parser<A>::read_comp_unit(const char ** name, const char ** comp_dir,
3362                                                         uint64_t *stmt_list)
3363 {
3364         const uint8_t * debug_info;
3365         const uint8_t * debug_abbrev;
3366         const uint8_t * di;
3367         const uint8_t * da;
3368         const uint8_t * end;
3369         const uint8_t * enda;
3370         uint64_t sz;
3371         uint16_t vers;
3372         uint64_t abbrev_base;
3373         uint64_t abbrev;
3374         uint8_t address_size;
3375         bool dwarf64;
3376
3377         *name = NULL;
3378         *comp_dir = NULL;
3379         *stmt_list = (uint64_t) -1;
3380
3381         if ( (_file->_dwarfDebugInfoSect == NULL) || (_file->_dwarfDebugAbbrevSect == NULL) )
3382                 return false;
3383
3384         debug_info = (uint8_t*)_file->fileContent() + _file->_dwarfDebugInfoSect->offset();
3385         debug_abbrev = (uint8_t*)_file->fileContent() + _file->_dwarfDebugAbbrevSect->offset();
3386         di = debug_info;
3387
3388         if (_file->_dwarfDebugInfoSect->size() < 12)
3389                 /* Too small to be a real debug_info section.  */
3390                 return false;
3391         sz = A::P::E::get32(*(uint32_t*)di);
3392         di += 4;
3393         dwarf64 = sz == 0xffffffff;
3394         if (dwarf64)
3395                 sz = A::P::E::get64(*(uint64_t*)di), di += 8;
3396         else if (sz > 0xffffff00)
3397                 /* Unknown dwarf format.  */
3398                 return false;
3399
3400         /* Verify claimed size.  */
3401         if (sz + (di - debug_info) > _file->_dwarfDebugInfoSect->size() || sz <= (dwarf64 ? 23 : 11))
3402                 return false;
3403
3404         vers = A::P::E::get16(*(uint16_t*)di);
3405         if (vers < 2 || vers > 3)
3406         /* DWARF version wrong for this code.
3407            Chances are we could continue anyway, but we don't know for sure.  */
3408                 return false;
3409         di += 2;
3410
3411         /* Find the debug_abbrev section.  */
3412         abbrev_base = dwarf64 ? A::P::E::get64(*(uint64_t*)di) : A::P::E::get32(*(uint32_t*)di);
3413         di += dwarf64 ? 8 : 4;
3414
3415         if (abbrev_base > _file->_dwarfDebugAbbrevSect->size())
3416                 return false;
3417         da = debug_abbrev + abbrev_base;
3418         enda = debug_abbrev + _file->_dwarfDebugAbbrevSect->size();
3419
3420         address_size = *di++;
3421
3422         /* Find the abbrev number we're looking for.  */
3423         end = di + sz;
3424         abbrev = read_uleb128 (&di, end);
3425         if (abbrev == (uint64_t) -1)
3426                 return false;
3427
3428         /* Skip through the debug_abbrev section looking for that abbrev.  */
3429         for (;;)
3430         {
3431                 uint64_t this_abbrev = read_uleb128 (&da, enda);
3432                 uint64_t attr;
3433
3434                 if (this_abbrev == abbrev)
3435                         /* This is almost always taken.  */
3436                         break;
3437                 skip_leb128 (&da, enda); /* Skip the tag.  */
3438                 if (da == enda)
3439                         return false;
3440                 da++;  /* Skip the DW_CHILDREN_* value.  */
3441
3442                 do {
3443                         attr = read_uleb128 (&da, enda);
3444                         skip_leb128 (&da, enda);
3445                 } while (attr != 0 && attr != (uint64_t) -1);
3446                 if (attr != 0)
3447                         return false;
3448         }
3449
3450         /* Check that the abbrev is one for a DW_TAG_compile_unit.  */
3451         if (read_uleb128 (&da, enda) != DW_TAG_compile_unit)
3452         return false;
3453         if (da == enda)
3454         return false;
3455         da++;  /* Skip the DW_CHILDREN_* value.  */
3456
3457         /* Now, go through the DIE looking for DW_AT_name,
3458          DW_AT_comp_dir, and DW_AT_stmt_list.  */
3459         for (;;)
3460         {
3461                 uint64_t attr = read_uleb128 (&da, enda);
3462                 uint64_t form = read_uleb128 (&da, enda);
3463
3464                 if (attr == (uint64_t) -1)
3465                         return false;
3466                 else if (attr == 0)
3467                         return true;
3468
3469                 if (form == DW_FORM_indirect)
3470                         form = read_uleb128 (&di, end);
3471
3472                 if (attr == DW_AT_name)
3473                         *name = getDwarfString(form, di);
3474                 else if (attr == DW_AT_comp_dir)
3475                         *comp_dir = getDwarfString(form, di);
3476                 else if (attr == DW_AT_stmt_list && form == DW_FORM_data4)
3477                         *stmt_list = A::P::E::get32(*(uint32_t*)di);
3478                 else if (attr == DW_AT_stmt_list && form == DW_FORM_data8)
3479                         *stmt_list = A::P::E::get64(*(uint64_t*)di);
3480                 if (! skip_form (&di, end, form, address_size, dwarf64))
3481                         return false;
3482         }
3483 }
3484
3485
3486
3487 template <typename A>
3488 File<A>::~File()
3489 {
3490         free(_sectionsArray);
3491         free(_atomsArray);
3492 }
3493
3494 template <typename A>
3495 const char* File<A>::translationUnitSource() const
3496 {
3497         return _dwarfTranslationUnitPath;
3498 }
3499
3500
3501
3502 template <typename A>
3503 bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
3504 {
3505         handler.doFile(*this);
3506         uint8_t* p = _atomsArray;
3507         for(int i=_atomsArrayCount; i > 0; --i) {
3508                 handler.doAtom(*((Atom<A>*)p));
3509                 p += sizeof(Atom<A>);
3510         }
3511         return (_atomsArrayCount != 0);
3512 }
3513
3514 template <typename A>
3515 const char* Section<A>::makeSegmentName(const macho_section<typename A::P>* sect)
3516 {
3517         // mach-o section record only has room for 16-byte seg/sect names
3518         // so a 16-byte name has no trailing zero
3519         const char* name = sect->segname();
3520         if ( strlen(name) < 16 )
3521                 return name;
3522         char* tmp = new char[17];
3523         strlcpy(tmp, name, 17);
3524         return tmp;
3525 }
3526
3527 template <typename A>
3528 const char* Section<A>::makeSectionName(const macho_section<typename A::P>* sect)
3529 {
3530         const char* name = sect->sectname();
3531         if ( strlen(name) < 16 )
3532                 return name;
3533
3534         // special case common long section names so we don't have to malloc
3535         if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
3536                 return "__objc_classrefs";
3537         if ( strncmp(sect->sectname(), "__objc_classlist", 16) == 0 )
3538                 return "__objc_classlist";
3539         if ( strncmp(sect->sectname(), "__objc_nlclslist", 16) == 0 )
3540                 return "__objc_nlclslist";
3541         if ( strncmp(sect->sectname(), "__objc_nlcatlist", 16) == 0 )
3542                 return "__objc_nlcatlist";
3543         if ( strncmp(sect->sectname(), "__objc_protolist", 16) == 0 )
3544                 return "__objc_protolist";
3545         if ( strncmp(sect->sectname(), "__objc_protorefs", 16) == 0 )
3546                 return "__objc_protorefs";
3547         if ( strncmp(sect->sectname(), "__objc_superrefs", 16) == 0 )
3548                 return "__objc_superrefs";
3549         if ( strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0 )
3550                 return "__objc_imageinfo";
3551         if ( strncmp(sect->sectname(), "__objc_stringobj", 16) == 0 )
3552                 return "__objc_stringobj";
3553         if ( strncmp(sect->sectname(), "__gcc_except_tab", 16) == 0 )
3554                 return "__gcc_except_tab";
3555
3556         char* tmp = new char[17];
3557         strlcpy(tmp, name, 17);
3558         return tmp;
3559 }
3560
3561 template <typename A>
3562 bool Section<A>::readable(const macho_section<typename A::P>* sect)
3563 {
3564         return true;
3565 }
3566
3567 template <typename A>
3568 bool Section<A>::writable(const macho_section<typename A::P>* sect)
3569 {
3570         // mach-o .o files do not contain segment permissions
3571         // we just know TEXT is special
3572         return ( strcmp(sect->segname(), "__TEXT") != 0 );
3573 }
3574
3575 template <typename A>
3576 bool Section<A>::exectuable(const macho_section<typename A::P>* sect)
3577 {
3578         // mach-o .o files do not contain segment permissions
3579         // we just know TEXT is special
3580         return ( strcmp(sect->segname(), "__TEXT") == 0 );
3581 }
3582
3583
3584 template <typename A>
3585 ld::Section::Type Section<A>::sectionType(const macho_section<typename A::P>* sect)
3586 {
3587         switch ( sect->flags() & SECTION_TYPE ) {
3588                 case S_ZEROFILL:
3589                         return ld::Section::typeZeroFill;
3590                 case S_CSTRING_LITERALS:
3591                         if ( (strcmp(sect->sectname(), "__cstring") == 0) && (strcmp(sect->segname(), "__TEXT") == 0) )
3592                                 return ld::Section::typeCString;
3593                         else
3594                                 return ld::Section::typeNonStdCString;
3595                 case S_4BYTE_LITERALS:
3596                         return ld::Section::typeLiteral4;
3597                 case S_8BYTE_LITERALS:
3598                         return ld::Section::typeLiteral8;
3599                 case S_LITERAL_POINTERS:
3600                         return ld::Section::typeCStringPointer;
3601                 case S_NON_LAZY_SYMBOL_POINTERS:
3602                         return ld::Section::typeNonLazyPointer;
3603                 case S_LAZY_SYMBOL_POINTERS:
3604                         return ld::Section::typeLazyPointer;
3605                 case S_SYMBOL_STUBS:
3606                         return ld::Section::typeStub;
3607                 case S_MOD_INIT_FUNC_POINTERS:
3608                         return ld::Section::typeInitializerPointers;
3609                 case S_MOD_TERM_FUNC_POINTERS:
3610                         return ld::Section::typeTerminatorPointers;
3611                 case S_INTERPOSING:
3612                         return ld::Section::typeUnclassified;
3613                 case S_16BYTE_LITERALS:
3614                         return ld::Section::typeLiteral16;
3615                 case S_REGULAR:
3616                 case S_COALESCED:
3617                         if ( sect->flags() & S_ATTR_PURE_INSTRUCTIONS ) {
3618                                 return ld::Section::typeCode;
3619                         }
3620                         else if ( strcmp(sect->segname(), "__TEXT") == 0 ) {
3621                                 if ( strcmp(sect->sectname(), "__eh_frame") == 0 )
3622                                         return ld::Section::typeCFI;
3623                                 else if ( strcmp(sect->sectname(), "__ustring") == 0 )
3624                                         return ld::Section::typeUTF16Strings;
3625                                 else if ( strcmp(sect->sectname(), "__textcoal_nt") == 0 )
3626                                         return ld::Section::typeCode;
3627                                 else if ( strcmp(sect->sectname(), "__StaticInit") == 0 )
3628                                         return ld::Section::typeCode;
3629                                 else if ( strcmp(sect->sectname(), "__constructor") == 0 )
3630                                         return ld::Section::typeInitializerPointers;
3631                         }
3632                         else if ( strcmp(sect->segname(), "__DATA") == 0 ) {
3633                                 if ( strcmp(sect->sectname(), "__cfstring") == 0 )
3634                                         return ld::Section::typeCFString;
3635                                 else if ( strcmp(sect->sectname(), "__dyld") == 0 )
3636                                         return ld::Section::typeDyldInfo;
3637                                 else if ( strcmp(sect->sectname(), "__program_vars") == 0 )
3638                                         return ld::Section::typeDyldInfo;
3639                                 else if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
3640                                         return ld::Section::typeObjCClassRefs;
3641                                 else if ( strcmp(sect->sectname(), "__objc_catlist") == 0 )
3642                                         return ld::Section::typeObjC2CategoryList;
3643                         }
3644                         else if ( strcmp(sect->segname(), "__OBJC") == 0 ) {
3645                                 if ( strcmp(sect->sectname(), "__class") == 0 )
3646                                         return ld::Section::typeObjC1Classes;
3647                         }
3648                         break;
3649                 case S_THREAD_LOCAL_REGULAR:
3650                         return ld::Section::typeTLVInitialValues;
3651                 case S_THREAD_LOCAL_ZEROFILL:
3652                         return ld::Section::typeTLVZeroFill;
3653                 case S_THREAD_LOCAL_VARIABLES:
3654                         return ld::Section::typeTLVDefs;
3655                 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
3656                         return ld::Section::typeTLVInitializerPointers;
3657         }
3658         return ld::Section::typeUnclassified;
3659 }
3660
3661
3662 template <typename A>
3663 Atom<A>* Section<A>::findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end)
3664 {
3665         // do a binary search of atom array
3666         uint32_t atomCount = end - start;
3667         Atom<A>* base = start;
3668         for (uint32_t n = atomCount; n > 0; n /= 2) {
3669                 Atom<A>* pivot = &base[n/2];
3670                 pint_t atomStartAddr = pivot->_objAddress;
3671                 pint_t atomEndAddr = atomStartAddr + pivot->_size;
3672                 if ( atomStartAddr <= addr ) {
3673                         // address in normal atom
3674                         if (addr < atomEndAddr)
3675                                 return pivot;
3676                         // address in "end" label (but not in alias)
3677                         if ( (pivot->_size == 0) && (addr == atomEndAddr) && !pivot->isAlias() )
3678                                 return pivot;
3679                 }
3680                 if ( addr >= atomEndAddr ) {
3681                         // key > pivot
3682                         // move base to atom after pivot
3683                         base = &pivot[1];
3684                         --n;
3685                 }
3686                 else {
3687                         // key < pivot
3688                         // keep same base
3689                 }
3690         }
3691         return NULL;
3692 }
3693
3694 template <typename A>
3695 ld::Atom::Alignment Section<A>::alignmentForAddress(pint_t addr)
3696 {
3697         const uint32_t sectionAlignment = this->_machOSection->align();
3698         return ld::Atom::Alignment(sectionAlignment, (addr % (1 << sectionAlignment)));
3699 }
3700
3701 template <typename A>
3702 uint32_t Section<A>::sectionNum(class Parser<A>& parser) const
3703 {
3704         if ( _machOSection == NULL )
3705                 return 0;
3706         else
3707                 return 1 + (this->_machOSection - parser.firstMachOSection());
3708 }
3709
3710 // arm does not have zero cost exceptions
3711 template <> uint32_t CFISection<arm>::cfiCount() { return 0; }
3712
3713 template <typename A>
3714 uint32_t CFISection<A>::cfiCount()
3715 {
3716         // create ObjectAddressSpace object for use by libunwind
3717         OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
3718         return libunwind::CFI_Parser<OAS>::getCFICount(oas,
3719                                                                                 this->_machOSection->addr(), this->_machOSection->size());
3720 }
3721
3722 template <typename A>
3723 void CFISection<A>::warnFunc(void* ref, uint64_t funcAddr, const char* msg)
3724 {
3725         Parser<A>* parser = (Parser<A>*)ref;
3726         if ( ! parser->convertUnwindInfo() )
3727                 return;
3728         if ( funcAddr != CFI_INVALID_ADDRESS ) {
3729                 // atoms are not constructed yet, so scan symbol table for labels
3730                 const char* name = parser->scanSymbolTableForAddress(funcAddr);
3731                 warning("could not create compact unwind for %s: %s", name, msg);
3732         }
3733         else {
3734                 warning("could not create compact unwind: %s", msg);
3735         }
3736 }
3737
3738 template <>
3739 bool CFISection<x86_64>::needsRelocating()
3740 {
3741         return true;
3742 }
3743
3744 template <typename A>
3745 bool CFISection<A>::needsRelocating()
3746 {
3747         return false;
3748 }
3749
3750 template <>
3751 void CFISection<x86_64>::cfiParse(class Parser<x86_64>& parser, uint8_t* buffer,
3752                                                                         libunwind::CFI_Atom_Info<CFISection<x86_64>::OAS>::CFI_Atom_Info cfiArray[],
3753                                                                         uint32_t count)
3754 {
3755         // copy __eh_frame data to buffer
3756         memcpy(buffer, file().fileContent() + this->_machOSection->offset(), this->_machOSection->size());
3757
3758         // and apply relocations
3759         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + this->_machOSection->reloff());
3760         const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
3761         for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
3762                 uint64_t value = 0;
3763                 switch ( reloc->r_type() ) {
3764                         case X86_64_RELOC_SUBTRACTOR:
3765                                 value =  0 - parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3766                                 ++reloc;
3767                                 if ( reloc->r_extern() )
3768                                         value += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3769                                 break;
3770                         case X86_64_RELOC_UNSIGNED:
3771                                 value = parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3772                                 break;
3773                         case X86_64_RELOC_GOT:
3774                                 // this is used for the reference to the personality function in CIEs
3775                                 // store the symbol number of the personality function for later use as a Fixup
3776                                 value = reloc->r_symbolnum();
3777                                 break;
3778                         default:
3779                                 fprintf(stderr, "CFISection::cfiParse() unexpected relocation type at r_address=0x%08X\n", reloc->r_address());
3780                                 break;
3781                 }
3782                 uint64_t*       p64;
3783                 uint32_t*       p32;
3784                 switch ( reloc->r_length() ) {
3785                         case 3:
3786                                 p64 = (uint64_t*)&buffer[reloc->r_address()];
3787                                 E::set64(*p64, value + E::get64(*p64));
3788                                 break;
3789                         case 2:
3790                                 p32 = (uint32_t*)&buffer[reloc->r_address()];
3791                                 E::set32(*p32, value + E::get32(*p32));
3792                                 break;
3793                         default:
3794                                 fprintf(stderr, "CFISection::cfiParse() unexpected relocation size at r_address=0x%08X\n", reloc->r_address());
3795                                 break;
3796                 }
3797         }
3798
3799
3800         // create ObjectAddressSpace object for use by libunwind
3801         OAS oas(*this, buffer);
3802
3803         // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
3804         const char* msg;
3805         msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86_64>::parseCFIs(
3806                                                         oas, this->_machOSection->addr(), this->_machOSection->size(),
3807                                                         cfiArray, count, (void*)&parser, warnFunc);
3808         if ( msg != NULL )
3809                 throwf("malformed __eh_frame section: %s", msg);
3810 }
3811
3812 template <>
3813 void CFISection<x86>::cfiParse(class Parser<x86>& parser, uint8_t* buffer,
3814                                                                         libunwind::CFI_Atom_Info<CFISection<x86>::OAS>::CFI_Atom_Info cfiArray[],
3815                                                                         uint32_t count)
3816 {
3817         // create ObjectAddressSpace object for use by libunwind
3818         OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
3819
3820         // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
3821         const char* msg;
3822         msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86>::parseCFIs(
3823                                                         oas, this->_machOSection->addr(), this->_machOSection->size(),
3824                                                         cfiArray, count, (void*)&parser, warnFunc);
3825         if ( msg != NULL )
3826                 throwf("malformed __eh_frame section: %s", msg);
3827 }
3828
3829
3830
3831
3832 template <>
3833 void CFISection<arm>::cfiParse(class Parser<arm>& parser, uint8_t* buffer,
3834                                                                         libunwind::CFI_Atom_Info<CFISection<arm>::OAS>::CFI_Atom_Info cfiArray[],
3835                                                                         uint32_t count)
3836 {
3837         // arm does not use zero cost exceptions
3838         assert(count == 0);
3839 }
3840
3841
3842
3843 template <typename A>
3844 uint32_t CFISection<A>::computeAtomCount(class Parser<A>& parser,
3845                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
3846                                                                                         const struct Parser<A>::CFI_CU_InfoArrays& cfis)
3847 {
3848         return cfis.cfiCount;
3849 }
3850
3851
3852
3853 template <typename A>
3854 uint32_t CFISection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
3855                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
3856                                                                         const struct Parser<A>::CFI_CU_InfoArrays& cfis)
3857 {
3858         this->_beginAtoms = (Atom<A>*)p;
3859         // walk CFI_Atom_Info array and create atom for each entry
3860         const CFI_Atom_Info* start = &cfis.cfiArray[0];
3861         const CFI_Atom_Info* end   = &cfis.cfiArray[cfis.cfiCount];
3862         for(const CFI_Atom_Info* a=start; a < end; ++a) {
3863                 Atom<A>* space = (Atom<A>*)p;
3864                 new (space) Atom<A>(*this, (a->isCIE ? "CIE" : "FDE"), a->address, a->size,
3865                                                                                 ld::Atom::definitionRegular, ld::Atom::combineNever, ld::Atom::scopeTranslationUnit,
3866                                                                                 ld::Atom::typeCFI, ld::Atom::symbolTableNotInFinalLinkedImages,
3867                                                                                 false, false, false, ld::Atom::Alignment(0));
3868                 p += sizeof(Atom<A>);
3869         }
3870         this->_endAtoms = (Atom<A>*)p;
3871         return cfis.cfiCount;
3872 }
3873
3874
3875 template <> bool CFISection<x86_64>::bigEndian() { return false; }
3876 template <> bool CFISection<x86>::bigEndian() { return false; }
3877 template <> bool CFISection<arm>::bigEndian() { return false; }
3878
3879
3880 template <>
3881 void CFISection<x86_64>::addCiePersonalityFixups(class Parser<x86_64>& parser, const CFI_Atom_Info* cieInfo)
3882 {
3883         uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
3884         if ( personalityEncoding == 0x9B ) {
3885                 // compiler always produces X86_64_RELOC_GOT with addend of 4 to personality function
3886                 // CFISection<x86_64>::cfiParse() set targetAddress to be symbolIndex + 4 + addressInCIE
3887                 uint32_t symbolIndex = cieInfo->u.cieInfo.personality.targetAddress - 4
3888                                                                         - cieInfo->address - cieInfo->u.cieInfo.personality.offsetInCFI;
3889                 const macho_nlist<P>& sym = parser.symbolFromIndex(symbolIndex);
3890                 const char* personalityName = parser.nameFromSymbol(sym);
3891
3892                 Atom<x86_64>* cieAtom = this->findAtomByAddress(cieInfo->address);
3893                 Parser<x86_64>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
3894                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, personalityName);
3895                 parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, 4);
3896                 parser.addFixup(src, ld::Fixup::k3of3, ld::Fixup::kindStoreX86PCRel32GOT);
3897         }
3898         else if ( personalityEncoding != 0 ) {
3899                 throwf("unsupported address encoding (%02X) of personality function in CIE",
3900                                 personalityEncoding);
3901         }
3902 }
3903
3904 template <>
3905 void CFISection<x86>::addCiePersonalityFixups(class Parser<x86>& parser, const CFI_Atom_Info* cieInfo)
3906 {
3907         uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
3908         if ( (personalityEncoding == 0x9B) || (personalityEncoding == 0x90) ) {
3909                 uint32_t offsetInCFI = cieInfo->u.cieInfo.personality.offsetInCFI;
3910                 uint32_t nlpAddr = cieInfo->u.cieInfo.personality.targetAddress;
3911                 Atom<x86>* cieAtom = this->findAtomByAddress(cieInfo->address);
3912                 Atom<x86>* nlpAtom = parser.findAtomByAddress(nlpAddr);
3913                 assert(nlpAtom->contentType() == ld::Atom::typeNonLazyPointer);
3914                 Parser<x86>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
3915
3916                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, nlpAtom);
3917                 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
3918                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, offsetInCFI);
3919                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
3920         }
3921         else if ( personalityEncoding != 0 ) {
3922                 throwf("unsupported address encoding (%02X) of personality function in CIE", personalityEncoding);
3923         }
3924 }
3925
3926
3927 template <typename A>
3928 void CFISection<A>::addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo)
3929 {
3930         // FIX ME
3931         assert(0);
3932 }
3933
3934 template <typename A>
3935 void CFISection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cfis)
3936 {
3937         ld::Fixup::Kind store32 = bigEndian() ? ld::Fixup::kindStoreBigEndian32 : ld::Fixup::kindStoreLittleEndian32;
3938         ld::Fixup::Kind store64 = bigEndian() ? ld::Fixup::kindStoreBigEndian64 : ld::Fixup::kindStoreLittleEndian64;
3939
3940         // add all references for FDEs, including implicit group references
3941         const CFI_Atom_Info* end = &cfis.cfiArray[cfis.cfiCount];
3942         for(const CFI_Atom_Info* p = &cfis.cfiArray[0]; p < end; ++p) {
3943                 if ( p->isCIE ) {
3944                         // add reference to personality function if used
3945                         if ( p->u.cieInfo.personality.targetAddress != CFI_INVALID_ADDRESS ) {
3946                                 this->addCiePersonalityFixups(parser, p);
3947                         }
3948                 }
3949                 else {
3950                         // find FDE Atom
3951                         Atom<A>* fdeAtom = this->findAtomByAddress(p->address);
3952                         // find function Atom
3953                         Atom<A>* functionAtom = parser.findAtomByAddress(p->u.fdeInfo.function.targetAddress);
3954                         // find CIE Atom
3955                         Atom<A>* cieAtom = this->findAtomByAddress(p->u.fdeInfo.cie.targetAddress);
3956                         // find LSDA Atom
3957                         Atom<A>* lsdaAtom = NULL;
3958                         if ( p->u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS ) {
3959                                 lsdaAtom = parser.findAtomByAddress(p->u.fdeInfo.lsda.targetAddress);
3960                         }
3961                         // add reference from FDE to CIE (always 32-bit pc-rel)
3962                         typename Parser<A>::SourceLocation fdeToCieSrc(fdeAtom, p->u.fdeInfo.cie.offsetInCFI);
3963                         parser.addFixup(fdeToCieSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, fdeAtom);
3964                         parser.addFixup(fdeToCieSrc, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, p->u.fdeInfo.cie.offsetInCFI);
3965                         parser.addFixup(fdeToCieSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
3966                         parser.addFixup(fdeToCieSrc, ld::Fixup::k4of4, store32, cieAtom);
3967
3968                         // add reference from FDE to function
3969                         typename Parser<A>::SourceLocation fdeToFuncSrc(fdeAtom, p->u.fdeInfo.function.offsetInCFI);
3970                         switch (p->u.fdeInfo.function.encodingOfTargetAddress) {
3971                                 case DW_EH_PE_pcrel|DW_EH_PE_ptr:
3972                                         if ( sizeof(typename A::P::uint_t) == 8 ) {
3973                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
3974                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3975                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
3976                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store64);
3977                                                 break;
3978                                         }
3979                                         // else fall into 32-bit case
3980                                 case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
3981                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
3982                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3983                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
3984                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store32);
3985                                         break;
3986                                 default:
3987                                         throw "unsupported encoding in FDE of pointer to function";
3988                         }
3989
3990                         // add reference from FDE to LSDA
3991                         typename Parser<A>::SourceLocation fdeToLsdaSrc(fdeAtom,  p->u.fdeInfo.lsda.offsetInCFI);
3992                         if ( lsdaAtom != NULL ) {
3993                                 switch (p->u.fdeInfo.lsda.encodingOfTargetAddress) {
3994                                         case DW_EH_PE_pcrel|DW_EH_PE_ptr:
3995                                                 if ( sizeof(typename A::P::uint_t) == 8 ) {
3996                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
3997                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3998                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
3999                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store64);
4000                                                         break;
4001                                                 }
4002                                                 // else fall into 32-bit case
4003                                         case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
4004                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
4005                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
4006                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
4007                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store32);
4008                                         break;
4009                                         default:
4010                                                 throw "unsupported encoding in FDE of pointer to LSDA";
4011                                 }
4012                         }
4013
4014                         // FDE is in group lead by function atom
4015                         typename Parser<A>::SourceLocation fdeSrc(functionAtom,0);
4016                         parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateFDE, fdeAtom);
4017
4018                         // LSDA is in group lead by function atom
4019                         if ( lsdaAtom != NULL ) {
4020                                 parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, lsdaAtom);
4021                         }
4022                 }
4023         }
4024 }
4025
4026
4027
4028
4029 template <typename A>
4030 const void*      CFISection<A>::OAS::mappedAddress(pint_t addr)
4031 {
4032         if ( (_ehFrameStartAddr <= addr) && (addr < _ehFrameEndAddr) )
4033                 return &_ehFrameContent[addr-_ehFrameStartAddr];
4034         else {
4035                 // requested bytes are not in __eh_frame section
4036                 // this can occur when examining the instruction bytes in the __text
4037                 File<A>& file = _ehFrameSection.file();
4038                 for (uint32_t i=0; i < file._sectionsArrayCount; ++i ) {
4039                         const macho_section<typename A::P>* sect = file._sectionsArray[i]->machoSection();
4040                         // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
4041                         if ( sect != NULL ) {
4042                                 if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
4043                                         return file.fileContent() + sect->offset() + addr - sect->addr();
4044                                 }
4045                         }
4046                 }
4047                 throwf("__eh_frame parsing problem.  Can't find target of reference to address 0x%08llX", (uint64_t)addr);
4048         }
4049 }
4050
4051
4052 template <typename A>
4053 uint64_t CFISection<A>::OAS::getULEB128(pint_t& logicalAddr, pint_t end)
4054 {
4055         uintptr_t size = (end - logicalAddr);
4056         libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
4057         libunwind::LocalAddressSpace::pint_t sladdr = laddr;
4058         uint64_t result = libunwind::LocalAddressSpace::getULEB128(laddr, laddr+size);
4059         logicalAddr += (laddr-sladdr);
4060         return result;
4061 }
4062
4063 template <typename A>
4064 int64_t CFISection<A>::OAS::getSLEB128(pint_t& logicalAddr, pint_t end)
4065 {
4066         uintptr_t size = (end - logicalAddr);
4067         libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
4068         libunwind::LocalAddressSpace::pint_t sladdr = laddr;
4069         int64_t result = libunwind::LocalAddressSpace::getSLEB128(laddr, laddr+size);
4070         logicalAddr += (laddr-sladdr);
4071         return result;
4072 }
4073
4074 template <typename A>
4075 typename A::P::uint_t CFISection<A>::OAS::getEncodedP(pint_t& addr, pint_t end, uint8_t encoding)
4076 {
4077         pint_t startAddr = addr;
4078         pint_t p = addr;
4079         pint_t result;
4080
4081         // first get value
4082         switch (encoding & 0x0F) {
4083                 case DW_EH_PE_ptr:
4084                         result = getP(addr);
4085                         p += sizeof(pint_t);
4086                         addr = (pint_t)p;
4087                         break;
4088                 case DW_EH_PE_uleb128:
4089                         result = getULEB128(addr, end);
4090                         break;
4091                 case DW_EH_PE_udata2:
4092                         result = get16(addr);
4093                         p += 2;
4094                         addr = (pint_t)p;
4095                         break;
4096                 case DW_EH_PE_udata4:
4097                         result = get32(addr);
4098                         p += 4;
4099                         addr = (pint_t)p;
4100                         break;
4101                 case DW_EH_PE_udata8:
4102                         result = get64(addr);
4103                         p += 8;
4104                         addr = (pint_t)p;
4105                         break;
4106                 case DW_EH_PE_sleb128:
4107                         result = getSLEB128(addr, end);
4108                         break;
4109                 case DW_EH_PE_sdata2:
4110                         result = (int16_t)get16(addr);
4111                         p += 2;
4112                         addr = (pint_t)p;
4113                         break;
4114                 case DW_EH_PE_sdata4:
4115                         result = (int32_t)get32(addr);
4116                         p += 4;
4117                         addr = (pint_t)p;
4118                         break;
4119                 case DW_EH_PE_sdata8:
4120                         result = get64(addr);
4121                         p += 8;
4122                         addr = (pint_t)p;
4123                         break;
4124                 default:
4125                         throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4126         }
4127
4128         // then add relative offset
4129         switch ( encoding & 0x70 ) {
4130                 case DW_EH_PE_absptr:
4131                         // do nothing
4132                         break;
4133                 case DW_EH_PE_pcrel:
4134                         result += startAddr;
4135                         break;
4136                 case DW_EH_PE_textrel:
4137                         throw "DW_EH_PE_textrel pointer encoding not supported";
4138                         break;
4139                 case DW_EH_PE_datarel:
4140                         throw "DW_EH_PE_datarel pointer encoding not supported";
4141                         break;
4142                 case DW_EH_PE_funcrel:
4143                         throw "DW_EH_PE_funcrel pointer encoding not supported";
4144                         break;
4145                 case DW_EH_PE_aligned:
4146                         throw "DW_EH_PE_aligned pointer encoding not supported";
4147                         break;
4148                 default:
4149                         throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4150                         break;
4151         }
4152
4153 //  Note: DW_EH_PE_indirect is only used in CIEs to refernce the personality pointer
4154 //  When parsing .o files that pointer contains zero, so we don't to return that.
4155 //  Instead we skip the dereference and return the address of the pointer.
4156 //      if ( encoding & DW_EH_PE_indirect )
4157 //              result = getP(result);
4158
4159         return result;
4160 }
4161
4162 template <>
4163 const char* CUSection<x86_64>::personalityName(class Parser<x86_64>& parser, const macho_relocation_info<x86_64::P>* reloc)
4164 {
4165         assert(reloc->r_extern() && "reloc not extern on personality column in __compact_unwind section");
4166         assert((reloc->r_type() == X86_64_RELOC_UNSIGNED) && "wrong reloc type on personality column in __compact_unwind section");
4167         const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4168         return parser.nameFromSymbol(sym);
4169 }
4170
4171 template <>
4172 const char* CUSection<x86>::personalityName(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
4173 {
4174         assert(reloc->r_extern() && "reloc not extern on personality column in __compact_unwind section");
4175         assert((reloc->r_type() == GENERIC_RELOC_VANILLA) && "wrong reloc type on personality column in __compact_unwind section");
4176         const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4177         return parser.nameFromSymbol(sym);
4178 }
4179
4180 template <typename A>
4181 const char* CUSection<A>::personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
4182 {
4183         return NULL;
4184 }
4185
4186
4187 template <typename A>
4188 int CUSection<A>::infoSorter(const void* l, const void* r)
4189 {
4190         // sort references by symbol index, then address
4191         const Info* left = (Info*)l;
4192         const Info* right = (Info*)r;
4193         if ( left->functionSymbolIndex == right->functionSymbolIndex )
4194                 return (left->functionStartAddress - right->functionStartAddress);
4195         else
4196                 return (left->functionSymbolIndex - right->functionSymbolIndex);
4197 }
4198
4199 template <typename A>
4200 void CUSection<A>::parse(class Parser<A>& parser, uint32_t cnt, Info array[])
4201 {
4202         // walk section content and copy to Info array
4203         const macho_compact_unwind_entry<P>* const entries = (macho_compact_unwind_entry<P>*)(this->file().fileContent() + this->_machOSection->offset());
4204         for (uint32_t i=0; i < cnt; ++i) {
4205                 Info* info = &array[i];
4206                 const macho_compact_unwind_entry<P>* entry = &entries[i];
4207                 info->functionStartAddress      = entry->codeStart();
4208                 info->functionSymbolIndex   = 0xFFFFFFFF;
4209                 info->rangeLength                       = entry->codeLen();
4210                 info->compactUnwindInfo         = entry->compactUnwindInfo();
4211                 info->personality                       = NULL;
4212                 info->lsdaAddress                       = entry->lsda();
4213                 info->function                          = NULL;
4214                 info->lsda                                      = NULL;
4215                 if ( (info->compactUnwindInfo & UNWIND_PERSONALITY_MASK) != 0 )
4216                         warning("no bits should be set in UNWIND_PERSONALITY_MASK of compact unwind encoding in __LD,__compact_unwind section");
4217                 if ( info->lsdaAddress != 0 ) {
4218                         info->compactUnwindInfo |= UNWIND_HAS_LSDA;
4219                 }
4220         }
4221
4222         // scan relocs, local relocs are useless - ignore them
4223         // extern relocs are needed for personality references (possibly for function/lsda refs??)
4224         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(this->file().fileContent() + this->_machOSection->reloff());
4225         const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
4226         for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
4227                 if ( reloc->r_extern() ) {
4228                         // only expect external relocs on some colummns
4229                         if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::personalityFieldOffset() ) {
4230                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4231                                 array[entryIndex].personality = this->personalityName(parser, reloc);
4232                         }
4233                         else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::lsdaFieldOffset() ) {
4234                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4235                                 const macho_nlist<P>& lsdaSym = parser.symbolFromIndex(reloc->r_symbolnum());
4236                                 if ( (lsdaSym.n_type() & N_TYPE) == N_SECT )
4237                                         array[entryIndex].lsdaAddress = lsdaSym.n_value();
4238                                 else
4239                                         warning("unexpected extern relocation to lsda in __compact_unwind section");
4240                         }
4241                         else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::codeStartFieldOffset() ) {
4242                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4243                                 array[entryIndex].functionSymbolIndex = reloc->r_symbolnum();
4244                         }
4245                         else {
4246                                 warning("unexpected extern relocation in __compact_unwind section");
4247                         }
4248                 }
4249         }
4250
4251         // sort array by function start address so unwind infos will be contiguous for a given function
4252         ::qsort(array, cnt, sizeof(Info), infoSorter);
4253 }
4254
4255 template <typename A>
4256 uint32_t CUSection<A>::count()
4257 {
4258         const macho_section<P>* machoSect =     this->machoSection();
4259         if ( (machoSect->size() % sizeof(macho_compact_unwind_entry<P>)) != 0 )
4260                 throw "malformed __LD,__compact_unwind section, bad length";
4261
4262         return machoSect->size() / sizeof(macho_compact_unwind_entry<P>);
4263 }
4264
4265 template <typename A>
4266 void CUSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cus)
4267 {
4268         Info* const arrayStart = cus.cuArray;
4269         Info* const arrayEnd = &cus.cuArray[cus.cuCount];
4270         for (Info* info=arrayStart; info < arrayEnd; ++info) {
4271                 // if external reloc was used, real address is symbol n_value + addend
4272                 if ( info->functionSymbolIndex != 0xFFFFFFFF )
4273                         info->functionStartAddress += parser.symbolFromIndex(info->functionSymbolIndex).n_value();
4274                 // find function atom from address
4275                 info->function = parser.findAtomByAddress(info->functionStartAddress);
4276                 // find lsda atom from address
4277                 if ( info->lsdaAddress != 0 ) {
4278                         info->lsda = parser.findAtomByAddress(info->lsdaAddress);
4279                         // add lsda subordinate
4280                         typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4281                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, info->lsda);
4282                 }
4283                 if ( info->personality != NULL ) {
4284                         // add personality subordinate
4285                         typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4286                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinatePersonality, false, info->personality);
4287                 }
4288         }
4289
4290 }
4291
4292 template <typename A>
4293 SymboledSection<A>::SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
4294         : Section<A>(f, s), _type(ld::Atom::typeUnclassified)
4295 {
4296         switch ( s->flags() & SECTION_TYPE ) {
4297                 case S_ZEROFILL:
4298                         _type = ld::Atom::typeZeroFill;
4299                         break;
4300                 case S_MOD_INIT_FUNC_POINTERS:
4301                         _type = ld::Atom::typeInitializerPointers;
4302                         break;
4303                 case S_MOD_TERM_FUNC_POINTERS:
4304                         _type = ld::Atom::typeTerminatorPointers;
4305                         break;
4306                 case S_THREAD_LOCAL_VARIABLES:
4307                         _type = ld::Atom::typeTLV;
4308                         break;
4309                 case S_THREAD_LOCAL_ZEROFILL:
4310                         _type = ld::Atom::typeTLVZeroFill;
4311                         break;
4312                 case S_THREAD_LOCAL_REGULAR:
4313                         _type = ld::Atom::typeTLVInitialValue;
4314                         break;
4315                 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
4316                         _type = ld::Atom::typeTLVInitializerPointers;
4317                         break;
4318                 case S_REGULAR:
4319                         if ( strncmp(s->sectname(), "__gcc_except_tab", 16) == 0 )
4320                                 _type = ld::Atom::typeLSDA;
4321                         else if ( this->type() == ld::Section::typeInitializerPointers )
4322                                 _type = ld::Atom::typeInitializerPointers;
4323                         break;
4324         }
4325 }
4326
4327
4328 template <typename A>
4329 bool SymboledSection<A>::dontDeadStrip()
4330 {
4331         switch ( _type ) {
4332                 case ld::Atom::typeInitializerPointers:
4333                 case ld::Atom::typeTerminatorPointers:
4334                         return true;
4335                 default:
4336                         // model an object file without MH_SUBSECTIONS_VIA_SYMBOLS as one in which nothing can be dead stripped
4337                         if ( ! this->_file.canScatterAtoms() )
4338                                 return true;
4339                         // call inherited
4340                         return Section<A>::dontDeadStrip();
4341         }
4342         return false;
4343 }
4344
4345
4346 template <typename A>
4347 uint32_t SymboledSection<A>::computeAtomCount(class Parser<A>& parser,
4348                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
4349                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&)
4350 {
4351         const pint_t startAddr = this->_machOSection->addr();
4352         const pint_t endAddr = startAddr + this->_machOSection->size();
4353         const uint32_t sectNum = this->sectionNum(parser);
4354
4355         uint32_t count = 0;
4356         pint_t  addr;
4357         pint_t  size;
4358         const macho_nlist<P>* sym;
4359         while ( it.next(parser, sectNum, startAddr, endAddr, &addr, &size, &sym) ) {
4360                 ++count;
4361         }
4362         //fprintf(stderr, "computeAtomCount(%s,%s) => %d\n", this->segmentName(), this->sectionName(), count);
4363         return count;
4364 }
4365
4366 template <typename A>
4367 uint32_t SymboledSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4368                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
4369                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
4370 {
4371         this->_beginAtoms = (Atom<A>*)p;
4372
4373         //fprintf(stderr, "SymboledSection::appendAtoms() in section %s\n", this->_machOSection->sectname());
4374         const pint_t startAddr = this->_machOSection->addr();
4375         const pint_t endAddr = startAddr + this->_machOSection->size();
4376         const uint32_t sectNum = this->sectionNum(parser);
4377
4378         uint32_t count = 0;
4379         pint_t  addr;
4380         pint_t  size;
4381         const macho_nlist<P>* label;
4382         while ( it.next(parser, sectNum, startAddr, endAddr, &addr, &size, &label) ) {
4383                 Atom<A>* allocatedSpace = (Atom<A>*)p;
4384                 // is break because of label or CFI?
4385                 if ( label != NULL ) {
4386                         // The size is computed based on the address of the next label (or the end of the section for the last label)
4387                         // If there are two labels at the same address, we want them one to be an alias of the other.
4388                         // If the label is at the end of a section, it is has zero size, but is not an alias
4389                         const bool isAlias = ( (size == 0) && (addr <  endAddr) );
4390                         new (allocatedSpace) Atom<A>(*this, parser, *label, size, isAlias);
4391                         if ( isAlias )
4392                                 this->_hasAliases = true;
4393                 }
4394                 else {
4395                         ld::Atom::SymbolTableInclusion inclusion = ld::Atom::symbolTableNotIn;
4396                         ld::Atom::ContentType ctype = this->contentType();
4397                         if ( ctype == ld::Atom::typeLSDA )
4398                                 inclusion = ld::Atom::symbolTableInWithRandomAutoStripLabel;
4399                         new (allocatedSpace) Atom<A>(*this, "anon", addr, size, ld::Atom::definitionRegular, ld::Atom::combineNever,
4400                                                                                 ld::Atom::scopeTranslationUnit, ctype, inclusion,
4401                                                                                 this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
4402                 }
4403                 p += sizeof(Atom<A>);
4404                 ++count;
4405         }
4406
4407         this->_endAtoms = (Atom<A>*)p;
4408         return count;
4409 }
4410
4411
4412 template <typename A>
4413 uint32_t ImplicitSizeSection<A>::computeAtomCount(class Parser<A>& parser,
4414                                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
4415                                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
4416 {
4417         uint32_t count = 0;
4418         const macho_section<P>* sect = this->machoSection();
4419         const pint_t startAddr = sect->addr();
4420         const pint_t endAddr = startAddr + sect->size();
4421         for (pint_t addr = startAddr; addr < endAddr; addr += elementSizeAtAddress(addr) ) {
4422                 if ( useElementAt(parser, it, addr) )
4423                         ++count;
4424         }
4425         if ( it.fileHasOverlappingSymbols && (sect->size() != 0) && (this->combine(parser, startAddr) == ld::Atom::combineByNameAndContent) ) {
4426                 // if there are multiple labels in this section for the same address, then clone them into multi atoms
4427                 pint_t  prevSymbolAddr = (pint_t)(-1);
4428                 uint8_t prevSymbolSectNum = 0;
4429                 for(uint32_t i=0; i < it.sortedSymbolCount; ++i) {
4430                         const macho_nlist<P>& sym = parser.symbolFromIndex(it.sortedSymbolIndexes[i]);
4431                         const pint_t symbolAddr = sym.n_value();
4432                         const pint_t symbolSectNum = sym.n_sect();
4433                         if ( (symbolAddr == prevSymbolAddr) && (prevSymbolSectNum == symbolSectNum) && (symbolSectNum == this->sectionNum(parser)) ) {
4434                                 ++count;
4435                         }
4436                         prevSymbolAddr = symbolAddr;
4437                         prevSymbolSectNum = symbolSectNum;
4438                 }
4439         }
4440         return count;
4441 }
4442
4443 template <typename A>
4444 uint32_t ImplicitSizeSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4445                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
4446                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
4447 {
4448         this->_beginAtoms = (Atom<A>*)p;
4449
4450         const macho_section<P>* sect = this->machoSection();
4451         const pint_t startAddr = sect->addr();
4452         const pint_t endAddr = startAddr + sect->size();
4453         const uint32_t sectNum = this->sectionNum(parser);
4454         //fprintf(stderr, "ImplicitSizeSection::appendAtoms() in section %s\n", sect->sectname());
4455         uint32_t count = 0;
4456         pint_t  foundAddr;
4457         pint_t  size;
4458         const macho_nlist<P>* foundLabel;
4459         Atom<A>* allocatedSpace;
4460         while ( it.next(parser, sectNum, startAddr, endAddr, &foundAddr, &size, &foundLabel) ) {
4461                 if ( foundLabel != NULL ) {
4462                         pint_t labeledAtomSize = this->elementSizeAtAddress(foundAddr);
4463                         allocatedSpace = (Atom<A>*)p;
4464                         if ( this->ignoreLabel(parser.nameFromSymbol(*foundLabel)) ) {
4465                                 //fprintf(stderr, "  0x%08llX make annon\n", (uint64_t)foundAddr);
4466                                 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, foundAddr), foundAddr,
4467                                                                                         this->elementSizeAtAddress(foundAddr), this->definition(),
4468                                                                                         this->combine(parser, foundAddr), this->scopeAtAddress(parser, foundAddr),
4469                                                                                         this->contentType(), this->symbolTableInclusion(),
4470                                                                                         this->dontDeadStrip(), false, false, this->alignmentForAddress(foundAddr));
4471                         }
4472                         else {
4473                                 // make named atom for label
4474                                 //fprintf(stderr, "  0x%08llX make labeled\n", (uint64_t)foundAddr);
4475                                 new (allocatedSpace) Atom<A>(*this, parser, *foundLabel, labeledAtomSize);
4476                         }
4477                         ++count;
4478                         p += sizeof(Atom<A>);
4479                         foundAddr += labeledAtomSize;
4480                         size -= labeledAtomSize;
4481                 }
4482                 // some number of anonymous atoms
4483                 for (pint_t addr = foundAddr; addr < (foundAddr+size); addr += elementSizeAtAddress(addr) ) {
4484                         // make anon atoms for area before label
4485                         if ( this->useElementAt(parser, it, addr) ) {
4486                                 //fprintf(stderr, "  0x%08llX make annon\n", (uint64_t)addr);
4487                                 allocatedSpace = (Atom<A>*)p;
4488                                 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, addr), addr, this->elementSizeAtAddress(addr),
4489                                                                                         this->definition(), this->combine(parser, addr), this->scopeAtAddress(parser, addr),
4490                                                                                         this->contentType(), this->symbolTableInclusion(),
4491                                                                                         this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
4492                                 ++count;
4493                                 p += sizeof(Atom<A>);
4494                         }
4495                 }
4496         }
4497
4498         this->_endAtoms = (Atom<A>*)p;
4499
4500         return count;
4501 }
4502
4503
4504 template <typename A>
4505 unsigned long Literal4Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4506 {
4507         const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
4508         return *literalContent;
4509 }
4510
4511 template <typename A>
4512 bool Literal4Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4513                                                                                                         const ld::IndirectBindingTable& ind) const
4514 {
4515         assert(this->type() == rhs.section().type());
4516         const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
4517
4518         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4519         assert(rhsAtom != NULL);
4520         if ( rhsAtom != NULL ) {
4521                 const uint32_t* rhsLiteralContent = (uint32_t*)rhsAtom->contentPointer();
4522                 return (*literalContent == *rhsLiteralContent);
4523         }
4524         return false;
4525 }
4526
4527
4528 template <typename A>
4529 unsigned long Literal8Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4530 {
4531 #if __LP64__
4532         const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4533         return *literalContent;
4534 #else
4535         unsigned long hash = 5381;
4536         const uint8_t* byteContent = atom->contentPointer();
4537         for (int i=0; i < 8; ++i) {
4538                 hash = hash * 33 + byteContent[i];
4539         }
4540         return hash;
4541 #endif
4542 }
4543
4544 template <typename A>
4545 bool Literal8Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4546                                                                                                         const ld::IndirectBindingTable& ind) const
4547 {
4548         if ( rhs.section().type() != ld::Section::typeLiteral8 )
4549                 return false;
4550         assert(this->type() == rhs.section().type());
4551         const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4552
4553         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4554         assert(rhsAtom != NULL);
4555         if ( rhsAtom != NULL ) {
4556                 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
4557                 return (*literalContent == *rhsLiteralContent);
4558         }
4559         return false;
4560 }
4561
4562
4563 template <typename A>
4564 unsigned long Literal16Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4565 {
4566         unsigned long hash = 5381;
4567         const uint8_t* byteContent = atom->contentPointer();
4568         for (int i=0; i < 16; ++i) {
4569                 hash = hash * 33 + byteContent[i];
4570         }
4571         return hash;
4572 }
4573
4574 template <typename A>
4575 bool Literal16Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4576                                                                                                         const ld::IndirectBindingTable& ind) const
4577 {
4578         if ( rhs.section().type() != ld::Section::typeLiteral16 )
4579                 return false;
4580         assert(this->type() == rhs.section().type());
4581         const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4582
4583         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4584         assert(rhsAtom != NULL);
4585         if ( rhsAtom != NULL ) {
4586                 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
4587                 return ((literalContent[0] == rhsLiteralContent[0]) && (literalContent[1] == rhsLiteralContent[1]));
4588         }
4589         return false;
4590 }
4591
4592
4593
4594 template <typename A>
4595 typename A::P::uint_t CStringSection<A>::elementSizeAtAddress(pint_t addr)
4596 {
4597         const macho_section<P>* sect = this->machoSection();
4598         const char* stringContent = (char*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
4599         return strlen(stringContent) + 1;
4600 }
4601
4602 template <typename A>
4603 bool CStringSection<A>::useElementAt(Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
4604 {
4605         return true;
4606 }
4607
4608 template <typename A>
4609 bool CStringSection<A>::ignoreLabel(const char* label)
4610 {
4611         return (label[0] == 'L') || (label[0] == 'l');
4612 }
4613
4614 template <typename A>
4615 Atom<A>* CStringSection<A>::findAtomByAddress(pint_t addr)
4616 {
4617         Atom<A>* result = this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms);
4618         return result;
4619 }
4620
4621 template <typename A>
4622 unsigned long CStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4623 {
4624         unsigned long hash = 5381;
4625         const char* stringContent = (char*)atom->contentPointer();
4626         for (const char* s = stringContent; *s != '\0'; ++s) {
4627                 hash = hash * 33 + *s;
4628         }
4629         return hash;
4630 }
4631
4632
4633 template <typename A>
4634 bool CStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4635                                                                                                         const ld::IndirectBindingTable& ind) const
4636 {
4637         if ( rhs.section().type() != ld::Section::typeCString )
4638                 return false;
4639         assert(this->type() == rhs.section().type());
4640         assert(strcmp(this->sectionName(), rhs.section().sectionName())== 0);
4641         assert(strcmp(this->segmentName(), rhs.section().segmentName())== 0);
4642         const char* stringContent = (char*)atom->contentPointer();
4643
4644         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4645         assert(rhsAtom != NULL);
4646         if ( rhsAtom != NULL ) {
4647                 if ( atom->_size != rhsAtom->_size )
4648                         return false;
4649                 const char* rhsStringContent = (char*)rhsAtom->contentPointer();
4650                 return (strcmp(stringContent, rhsStringContent) == 0);
4651         }
4652         return false;
4653 }
4654
4655
4656 template <>
4657 ld::Fixup::Kind NonLazyPointerSection<x86>::fixupKind()
4658 {
4659         return ld::Fixup::kindStoreLittleEndian32;
4660 }
4661
4662 template <>
4663 ld::Fixup::Kind NonLazyPointerSection<arm>::fixupKind()
4664 {
4665         return ld::Fixup::kindStoreLittleEndian32;
4666 }
4667
4668
4669 template <>
4670 void NonLazyPointerSection<x86_64>::makeFixups(class Parser<x86_64>& parser, const struct Parser<x86_64>::CFI_CU_InfoArrays&)
4671 {
4672         assert(0 && "x86_64 should not have non-lazy-pointer sections in .o files");
4673 }
4674
4675 template <typename A>
4676 void NonLazyPointerSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
4677 {
4678         // add references for each NLP atom based on indirect symbol table
4679         const macho_section<P>* sect = this->machoSection();
4680         const pint_t endAddr = sect->addr() + sect->size();
4681         for( pint_t addr = sect->addr(); addr < endAddr; addr += sizeof(pint_t)) {
4682                 typename Parser<A>::SourceLocation      src;
4683                 typename Parser<A>::TargetDesc          target;
4684                 src.atom = this->findAtomByAddress(addr);
4685                 src.offsetInAtom = 0;
4686                 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
4687                 target.atom = NULL;
4688                 target.name = NULL;
4689                 target.weakImport = false;
4690                 target.addend = 0;
4691                 if ( symIndex == INDIRECT_SYMBOL_LOCAL ) {
4692                         // use direct reference for local symbols
4693                         const pint_t* nlpContent = (pint_t*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
4694                         pint_t targetAddr = P::getP(*nlpContent);
4695                         target.atom = parser.findAtomByAddress(targetAddr);
4696                         target.weakImport = false;
4697                         target.addend = (targetAddr - target.atom->objectAddress());
4698                         // <rdar://problem/8385011> if pointer to thumb function, mask of thumb bit (not an addend of +1)
4699                         if ( target.atom->isThumb() )
4700                                 target.addend &= (-2);
4701                         assert(src.atom->combine() == ld::Atom::combineNever);
4702                 }
4703                 else {
4704                         const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
4705                         // use direct reference for local symbols
4706                         if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
4707                                 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
4708                                 assert(src.atom->combine() == ld::Atom::combineNever);
4709                         }
4710                         else {
4711                                 target.name = parser.nameFromSymbol(sym);
4712                                 target.weakImport = parser.weakImportFromSymbol(sym);
4713                                 assert(src.atom->combine() == ld::Atom::combineByNameAndReferences);
4714                         }
4715                 }
4716                 parser.addFixups(src, this->fixupKind(), target);
4717         }
4718 }
4719
4720 template <typename A>
4721 ld::Atom::Combine NonLazyPointerSection<A>::combine(Parser<A>& parser, pint_t addr)
4722 {
4723         const macho_section<P>* sect = this->machoSection();
4724         uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
4725         if ( symIndex == INDIRECT_SYMBOL_LOCAL)
4726                 return ld::Atom::combineNever;
4727
4728         // don't coalesce non-lazy-pointers to local symbols
4729         const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
4730         if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) )
4731                 return ld::Atom::combineNever;
4732
4733         return ld::Atom::combineByNameAndReferences;
4734 }
4735
4736 template <typename A>
4737 const char* NonLazyPointerSection<A>::targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind)
4738 {
4739         assert(atom->combine() == ld::Atom::combineByNameAndReferences);
4740         assert(atom->fixupCount() == 1);
4741         ld::Fixup::iterator fit = atom->fixupsBegin();
4742         const char* name = NULL;
4743         switch ( fit->binding ) {
4744                 case ld::Fixup::bindingByNameUnbound:
4745                         name = fit->u.name;
4746                         break;
4747                 case ld::Fixup::bindingByContentBound:
4748                         name = fit->u.target->name();
4749                         break;
4750                 case ld::Fixup::bindingsIndirectlyBound:
4751                         name = ind.indirectName(fit->u.bindingIndex);
4752                         break;
4753                 default:
4754                         assert(0);
4755         }
4756         assert(name != NULL);
4757         return name;
4758 }
4759
4760 template <typename A>
4761 unsigned long NonLazyPointerSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4762 {
4763         assert(atom->combine() == ld::Atom::combineByNameAndReferences);
4764         unsigned long hash = 9508;
4765         for (const char* s = this->targetName(atom, ind); *s != '\0'; ++s) {
4766                 hash = hash * 33 + *s;
4767         }
4768         return hash;
4769 }
4770
4771 template <typename A>
4772 bool NonLazyPointerSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4773                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
4774 {
4775         if ( rhs.section().type() != ld::Section::typeNonLazyPointer )
4776                 return false;
4777         assert(this->type() == rhs.section().type());
4778         // there can be many non-lazy pointer in different section names
4779         // we only want to coalesce in same section name
4780         if ( *this != rhs.section() )
4781                 return false;
4782         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4783         assert(rhsAtom !=  NULL);
4784         const char* thisName = this->targetName(atom, indirectBindingTable);
4785         const char* rhsName = this->targetName(rhsAtom, indirectBindingTable);
4786         return (strcmp(thisName, rhsName) == 0);
4787 }
4788
4789 template <typename A>
4790 ld::Atom::Scope NonLazyPointerSection<A>::scopeAtAddress(Parser<A>& parser, pint_t addr)
4791 {
4792         const macho_section<P>* sect = this->machoSection();
4793         uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
4794         if ( symIndex == INDIRECT_SYMBOL_LOCAL)
4795                 return ld::Atom::scopeTranslationUnit;
4796         else
4797                 return ld::Atom::scopeLinkageUnit;
4798 }
4799
4800
4801 template <typename A>
4802 const uint8_t* CFStringSection<A>::targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
4803                                                                                                         ContentType* ct, unsigned int* count)
4804 {
4805         *ct = contentUnknown;
4806         for (ld::Fixup::iterator fit=atom->fixupsBegin(), end=atom->fixupsEnd(); fit != end; ++fit) {
4807                 const ld::Atom* targetAtom = NULL;
4808                 switch ( fit->binding ) {
4809                         case ld::Fixup::bindingByNameUnbound:
4810                                 // ignore reference to ___CFConstantStringClassReference
4811                                 // we are just looking for reference to backing string data
4812                                 assert(fit->offsetInAtom == 0);
4813                                 assert(strcmp(fit->u.name, "___CFConstantStringClassReference") == 0);
4814                                 break;
4815                         case ld::Fixup::bindingDirectlyBound:
4816                         case ld::Fixup::bindingByContentBound:
4817                                 targetAtom = fit->u.target;
4818                                 break;
4819                         case ld::Fixup::bindingsIndirectlyBound:
4820                                 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
4821                                 break;
4822                         default:
4823                                 assert(0 && "bad binding type");
4824                 }
4825                 assert(targetAtom != NULL);
4826                 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
4827                 if ( targetAtom->section().type() == ld::Section::typeCString ) {
4828                         *ct = contentUTF8;
4829                         *count = targetAtom->size();
4830                 }
4831                 else if ( targetAtom->section().type() == ld::Section::typeUTF16Strings ) {
4832                         *ct = contentUTF16;
4833                         *count = (targetAtom->size()+1)/2; // round up incase of buggy compiler that has only one trailing zero byte
4834                 }
4835                 assert(target !=  NULL);
4836                 return target->contentPointer();
4837         }
4838         assert(0);
4839         return NULL;
4840 }
4841
4842 template <typename A>
4843 unsigned long CFStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4844 {
4845         // base hash of CFString on hash of cstring it wraps
4846         ContentType cType;
4847         unsigned long hash;
4848         unsigned int charCount;
4849         const uint8_t* content = this->targetContent(atom, ind, &cType, &charCount);
4850         switch ( cType ) {
4851                 case contentUTF8:
4852                         hash = 9408;
4853                         for (const char* s = (char*)content; *s != '\0'; ++s) {
4854                                 hash = hash * 33 + *s;
4855                         }
4856                         return hash;
4857                 case contentUTF16:
4858                         hash = 407955;
4859                         --charCount; // don't add last 0x0000 to hash because some buggy compilers only have trailing single byte
4860                         for (const uint16_t* s = (uint16_t*)content; charCount > 0; ++s, --charCount) {
4861                                 hash = hash * 1025 + *s;
4862                         }
4863                         return hash;
4864                 case contentUnknown:
4865                         return 0;
4866         }
4867         return 0;
4868 }
4869
4870
4871 template <typename A>
4872 bool CFStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4873                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
4874 {
4875         if ( atom == &rhs )
4876                 return true;
4877         if ( rhs.section().type() != ld::Section::typeCFString)
4878                 return false;
4879         assert(this->type() == rhs.section().type());
4880         assert(strcmp(this->sectionName(), "__cfstring") == 0);
4881
4882         ContentType thisType;
4883         unsigned int charCount;
4884         const uint8_t* cstringContent = this->targetContent(atom, indirectBindingTable, &thisType, &charCount);
4885         ContentType rhsType;
4886         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4887         assert(rhsAtom !=  NULL);
4888         unsigned int rhsCharCount;
4889         const uint8_t* rhsStringContent = this->targetContent(rhsAtom, indirectBindingTable, &rhsType, &rhsCharCount);
4890
4891         if ( thisType != rhsType )
4892                 return false;
4893
4894         // no need to compare content of pointers are already the same
4895         if ( cstringContent == rhsStringContent )
4896                 return true;
4897
4898         // no need to compare content if size is different
4899         if ( charCount != rhsCharCount )
4900                 return false;
4901
4902         switch ( thisType ) {
4903                 case contentUTF8:
4904                         return (strcmp((char*)cstringContent, (char*)rhsStringContent) == 0);
4905                 case contentUTF16:
4906                         {
4907                                 const uint16_t* cstringContent16 = (uint16_t*)cstringContent;
4908                                 const uint16_t* rhsStringContent16 = (uint16_t*)rhsStringContent;
4909                                 for (unsigned int i = 0; i < charCount; ++i) {
4910                                         if ( cstringContent16[i] != rhsStringContent16[i] )
4911                                                 return false;
4912                                 }
4913                                 return true;
4914                         }
4915                 case contentUnknown:
4916                         return false;
4917         }
4918         return false;
4919 }
4920
4921
4922 template <typename A>
4923 typename A::P::uint_t ObjC1ClassSection<A>::elementSizeAtAddress(pint_t addr)
4924 {
4925         // nominal size for each class is 48 bytes, but sometimes the compiler
4926         // over aligns and there is padding after class data
4927         const macho_section<P>* sct = this->machoSection();
4928         uint32_t align = 1 << sct->align();
4929         uint32_t size = ((12 * sizeof(pint_t)) + align-1) & (-align);
4930         return size;
4931 }
4932
4933 template <typename A>
4934 const char* ObjC1ClassSection<A>::unlabeledAtomName(Parser<A>& parser, pint_t addr)
4935 {
4936         // 8-bytes into class object is pointer to class name
4937         const macho_section<P>* sct = this->machoSection();
4938         uint32_t classObjcFileOffset = sct->offset() - sct->addr() + addr;
4939         const uint8_t* mappedFileContent = this->file().fileContent();
4940         pint_t nameAddr = P::getP(*((pint_t*)(mappedFileContent+classObjcFileOffset+2*sizeof(pint_t))));
4941
4942         // find section containing string address to get string bytes
4943         const macho_section<P>* const sections = parser.firstMachOSection();
4944         const uint32_t sectionCount = parser.machOSectionCount();
4945         for (uint32_t i=0; i < sectionCount; ++i) {
4946                 const macho_section<P>* aSect = &sections[i];
4947                 if ( (aSect->addr() <= nameAddr) && (nameAddr < (aSect->addr()+aSect->size())) ) {
4948                         assert((aSect->flags() & SECTION_TYPE) == S_CSTRING_LITERALS);
4949                         uint32_t nameFileOffset = aSect->offset() - aSect->addr() + nameAddr;
4950                         const char* name = (char*)mappedFileContent + nameFileOffset;
4951                         // spin through symbol table to find absolute symbol corresponding to this class
4952                         for (uint32_t s=0; s < parser.symbolCount(); ++s) {
4953                                 const macho_nlist<P>& sym =     parser.symbolFromIndex(s);
4954                                 if ( (sym.n_type() & N_TYPE) != N_ABS )
4955                                         continue;
4956                                 const char* absName = parser.nameFromSymbol(sym);
4957                                 if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
4958                                         if ( strcmp(&absName[17], name) == 0 )
4959                                                 return absName;
4960                                 }
4961                         }
4962                         assert(0 && "obj class name not found in symbol table");
4963                 }
4964         }
4965         assert(0 && "obj class name not found");
4966         return "unknown objc class";
4967 }
4968
4969
4970 template <typename A>
4971 const char* ObjC2ClassRefsSection<A>::targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4972 {
4973         assert(atom->fixupCount() == 1);
4974         ld::Fixup::iterator fit = atom->fixupsBegin();
4975         const char* className = NULL;
4976         switch ( fit->binding ) {
4977                 case ld::Fixup::bindingByNameUnbound:
4978                         className = fit->u.name;
4979                         break;
4980                 case ld::Fixup::bindingDirectlyBound:
4981                 case ld::Fixup::bindingByContentBound:
4982                         className = fit->u.target->name();
4983                         break;
4984                 case ld::Fixup::bindingsIndirectlyBound:
4985                         className = ind.indirectName(fit->u.bindingIndex);
4986                         break;
4987                 default:
4988                         assert(0 && "unsupported binding in objc2 class ref section");
4989         }
4990         assert(className != NULL);
4991         return className;
4992 }
4993
4994
4995 template <typename A>
4996 unsigned long ObjC2ClassRefsSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4997 {
4998         unsigned long hash = 978;
4999         for (const char* s = targetClassName(atom, ind); *s != '\0'; ++s) {
5000                 hash = hash * 33 + *s;
5001         }
5002         return hash;
5003 }
5004
5005 template <typename A>
5006 bool ObjC2ClassRefsSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5007                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
5008 {
5009         assert(this->type() == rhs.section().type());
5010         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5011         assert(rhsAtom !=  NULL);
5012         const char* thisClassName = targetClassName(atom, indirectBindingTable);
5013         const char* rhsClassName = targetClassName(rhsAtom, indirectBindingTable);
5014         return (strcmp(thisClassName, rhsClassName) == 0);
5015 }
5016
5017
5018 template <typename A>
5019 const char* Objc1ClassReferences<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5020 {
5021         assert(atom->fixupCount() == 2);
5022         ld::Fixup::iterator fit = atom->fixupsBegin();
5023         if ( fit->kind == ld::Fixup::kindSetTargetAddress )
5024                 ++fit;
5025         const ld::Atom* targetAtom = NULL;
5026         switch ( fit->binding ) {
5027                 case ld::Fixup::bindingByContentBound:
5028                         targetAtom = fit->u.target;
5029                         break;
5030                 case ld::Fixup::bindingsIndirectlyBound:
5031                         targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5032                         if ( targetAtom == NULL ) {
5033                                 fprintf(stderr, "missing target named %s\n", ind.indirectName(fit->u.bindingIndex));
5034                         }
5035                         break;
5036                 default:
5037                         assert(0);
5038         }
5039         assert(targetAtom != NULL);
5040         const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5041         assert(target !=  NULL);
5042         return (char*)target->contentPointer();
5043 }
5044
5045
5046 template <typename A>
5047 const char* PointerToCStringSection<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5048 {
5049         assert(atom->fixupCount() == 1);
5050         ld::Fixup::iterator fit = atom->fixupsBegin();
5051         const ld::Atom* targetAtom = NULL;
5052         switch ( fit->binding ) {
5053                 case ld::Fixup::bindingByContentBound:
5054                         targetAtom = fit->u.target;
5055                         break;
5056                 case ld::Fixup::bindingsIndirectlyBound:
5057                         targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5058                         break;
5059                 default:
5060                         assert(0);
5061         }
5062         assert(targetAtom != NULL);
5063         const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5064         assert(target !=  NULL);
5065         return (char*)target->contentPointer();
5066 }
5067
5068 template <typename A>
5069 unsigned long PointerToCStringSection<A>::contentHash(const class Atom<A>* atom,
5070                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
5071 {
5072         // make hash from section name and target cstring name
5073         unsigned long hash = 123;
5074         for (const char* s = this->sectionName(); *s != '\0'; ++s) {
5075                 hash = hash * 33 + *s;
5076         }
5077         for (const char* s = this->targetCString(atom, indirectBindingTable); *s != '\0'; ++s) {
5078                 hash = hash * 33 + *s;
5079         }
5080         return hash;
5081 }
5082
5083 template <typename A>
5084 bool PointerToCStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5085                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
5086 {
5087         assert(this->type() == rhs.section().type());
5088         // there can be pointers-to-cstrings in different section names
5089         // we only want to coalesce in same section name
5090         if ( *this != rhs.section() )
5091                 return false;
5092
5093         // get string content for this
5094         const char* cstringContent = this->targetCString(atom, indirectBindingTable);
5095         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5096         assert(rhsAtom !=  NULL);
5097         const char* rhsCstringContent = this->targetCString(rhsAtom, indirectBindingTable);
5098
5099         assert(cstringContent != NULL);
5100         assert(rhsCstringContent != NULL);
5101         return (strcmp(cstringContent, rhsCstringContent) == 0);
5102 }
5103
5104
5105
5106 template <typename A>
5107 unsigned long UTF16StringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5108 {
5109         unsigned long hash = 5381;
5110         const uint16_t* stringContent = (uint16_t*)atom->contentPointer();
5111         // some buggy compilers end utf16 data with single byte, so don't use last word in hash computation
5112         unsigned int count = (atom->size()/2) - 1;
5113         for (const uint16_t* s = stringContent; count > 0; ++s, --count) {
5114                 hash = hash * 33 + *s;
5115         }
5116         return hash;
5117 }
5118
5119 template <typename A>
5120 bool UTF16StringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5121                                                                                                         const ld::IndirectBindingTable& ind) const
5122 {
5123         if ( rhs.section().type() != ld::Section::typeUTF16Strings )
5124                 return false;
5125         assert(0);
5126         return false;
5127 }
5128
5129
5130
5131
5132
5133
5134
5135 template <>
5136 uint32_t Section<x86_64>::x86_64PcRelOffset(uint8_t r_type)
5137 {
5138         switch ( r_type ) {
5139                 case X86_64_RELOC_SIGNED:
5140                         return 4;
5141                 case X86_64_RELOC_SIGNED_1:
5142                         return 5;
5143                 case X86_64_RELOC_SIGNED_2:
5144                         return 6;
5145                 case X86_64_RELOC_SIGNED_4:
5146                         return 8;
5147         }
5148         return 0;
5149 }
5150
5151
5152 template <>
5153 bool Section<x86_64>::addRelocFixup(class Parser<x86_64>& parser, const macho_relocation_info<P>* reloc)
5154 {
5155         const macho_section<P>* sect = this->machoSection();
5156         uint64_t srcAddr = sect->addr() + reloc->r_address();
5157         Parser<x86_64>::SourceLocation  src;
5158         Parser<x86_64>::TargetDesc              target;
5159         Parser<x86_64>::TargetDesc              toTarget;
5160         src.atom = this->findAtomByAddress(srcAddr);
5161         src.offsetInAtom = srcAddr - src.atom->_objAddress;
5162         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
5163         uint64_t contentValue = 0;
5164         const macho_relocation_info<x86_64::P>* nextReloc = &reloc[1];
5165         bool result = false;
5166         bool useDirectBinding;
5167         switch ( reloc->r_length() ) {
5168                 case 0:
5169                         contentValue = *fixUpPtr;
5170                         break;
5171                 case 1:
5172                         contentValue = (int64_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
5173                         break;
5174                 case 2:
5175                         contentValue = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
5176                         break;
5177                 case 3:
5178                         contentValue = E::get64(*((uint64_t*)fixUpPtr));
5179                         break;
5180         }
5181         target.atom = NULL;
5182         target.name = NULL;
5183         target.weakImport = false;
5184         target.addend = 0;
5185         if ( reloc->r_extern() ) {
5186                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
5187                 // use direct reference for local symbols
5188                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5189                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5190                         target.addend += contentValue;
5191                 }
5192                 else {
5193                         target.name = parser.nameFromSymbol(sym);
5194                         target.weakImport = parser.weakImportFromSymbol(sym);
5195                         target.addend = contentValue;
5196                 }
5197                 // cfstrings should always use direct reference to backing store
5198                 if ( (this->type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
5199                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5200                         target.addend = contentValue;
5201                 }
5202         }
5203         else {
5204                 if ( reloc->r_pcrel()  )
5205                         contentValue += srcAddr + x86_64PcRelOffset(reloc->r_type());
5206                 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5207         }
5208         switch ( reloc->r_type() ) {
5209                 case X86_64_RELOC_UNSIGNED:
5210                         if ( reloc->r_pcrel() )
5211                                 throw "pcrel and X86_64_RELOC_UNSIGNED not supported";
5212                         switch ( reloc->r_length() ) {
5213                                 case 0:
5214                                 case 1:
5215                                         throw "length < 2 and X86_64_RELOC_UNSIGNED not supported";
5216                                 case 2:
5217                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5218                                         break;
5219                                 case 3:
5220                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian64, target);
5221                                         break;
5222                         }
5223                         break;
5224                 case X86_64_RELOC_SIGNED:
5225                 case X86_64_RELOC_SIGNED_1:
5226                 case X86_64_RELOC_SIGNED_2:
5227                 case X86_64_RELOC_SIGNED_4:
5228                         if ( ! reloc->r_pcrel() )
5229                                 throw "not pcrel and X86_64_RELOC_SIGNED* not supported";
5230                         if ( reloc->r_length() != 2 )
5231                                 throw "length != 2 and X86_64_RELOC_SIGNED* not supported";
5232                         switch ( reloc->r_type() ) {
5233                                 case X86_64_RELOC_SIGNED:
5234                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
5235                                         break;
5236                                 case X86_64_RELOC_SIGNED_1:
5237                                         if ( reloc->r_extern() )
5238                                                 target.addend += 1;
5239                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_1, target);
5240                                         break;
5241                                 case X86_64_RELOC_SIGNED_2:
5242                                         if ( reloc->r_extern() )
5243                                                 target.addend += 2;
5244                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_2, target);
5245                                         break;
5246                                 case X86_64_RELOC_SIGNED_4:
5247                                         if ( reloc->r_extern() )
5248                                                 target.addend += 4;
5249                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_4, target);
5250                                         break;
5251                         }
5252                         break;
5253                 case X86_64_RELOC_BRANCH:
5254                         if ( ! reloc->r_pcrel() )
5255                                 throw "not pcrel and X86_64_RELOC_BRANCH not supported";
5256                         switch ( reloc->r_length() ) {
5257                                 case 2:
5258                                         if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5259                                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
5260                                                 parser.addDtraceExtraInfos(src, &target.name[16]);
5261                                         }
5262                                         else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5263                                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
5264                                                 parser.addDtraceExtraInfos(src, &target.name[20]);
5265                                         }
5266                                         else {
5267                                                 parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel32, target);
5268                                         }
5269                                         break;
5270                                 case 0:
5271                                         parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel8, target);
5272                                         break;
5273                                 default:
5274                                         throwf("length=%d and X86_64_RELOC_BRANCH not supported", reloc->r_length());
5275                         }
5276                         break;
5277                 case X86_64_RELOC_GOT:
5278                         if ( ! reloc->r_extern() )
5279                                 throw "not extern and X86_64_RELOC_GOT not supported";
5280                         if ( ! reloc->r_pcrel() )
5281                                 throw "not pcrel and X86_64_RELOC_GOT not supported";
5282                         if ( reloc->r_length() != 2 )
5283                                 throw "length != 2 and X86_64_RELOC_GOT not supported";
5284                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOT, target);
5285                         break;
5286                 case X86_64_RELOC_GOT_LOAD:
5287                         if ( ! reloc->r_extern() )
5288                                 throw "not extern and X86_64_RELOC_GOT_LOAD not supported";
5289                         if ( ! reloc->r_pcrel() )
5290                                 throw "not pcrel and X86_64_RELOC_GOT_LOAD not supported";
5291                         if ( reloc->r_length() != 2 )
5292                                 throw "length != 2 and X86_64_RELOC_GOT_LOAD not supported";
5293                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOTLoad, target);
5294                         break;
5295                 case X86_64_RELOC_SUBTRACTOR:
5296                         if ( reloc->r_pcrel() )
5297                                 throw "X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5298                         if ( reloc->r_length() < 2 )
5299                                 throw "X86_64_RELOC_SUBTRACTOR must have r_length of 2 or 3";
5300                         if ( !reloc->r_extern() )
5301                                 throw "X86_64_RELOC_SUBTRACTOR must have r_extern=1";
5302                         if ( nextReloc->r_type() != X86_64_RELOC_UNSIGNED )
5303                                 throw "X86_64_RELOC_SUBTRACTOR must be followed by X86_64_RELOC_UNSIGNED";
5304                         result = true;
5305                         if ( nextReloc->r_pcrel() )
5306                                 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5307                         if ( nextReloc->r_length() != reloc->r_length() )
5308                                 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR must have same r_length";
5309                         if ( nextReloc->r_extern() ) {
5310                                 const macho_nlist<P>& sym = parser.symbolFromIndex(nextReloc->r_symbolnum());
5311                                 // use direct reference for local symbols
5312                                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5313                                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), toTarget);
5314                                         toTarget.addend = contentValue;
5315                                         useDirectBinding = true;
5316                                 }
5317                                 else {
5318                                         toTarget.name = parser.nameFromSymbol(sym);
5319                                         toTarget.weakImport = parser.weakImportFromSymbol(sym);
5320                                         toTarget.addend = contentValue;
5321                                         useDirectBinding = false;
5322                                 }
5323                         }
5324                         else {
5325                                 parser.findTargetFromAddressAndSectionNum(contentValue, nextReloc->r_symbolnum(), toTarget);
5326                                 useDirectBinding = (toTarget.atom->scope() == ld::Atom::scopeTranslationUnit);
5327                         }
5328                         if ( useDirectBinding )
5329                                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.atom);
5330                         else
5331                                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.weakImport, toTarget.name);
5332                         parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, toTarget.addend);
5333                         if ( target.atom == NULL )
5334                                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, false, target.name);
5335                         else
5336                                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, target.atom);
5337                         if ( reloc->r_length() == 2 )
5338                                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
5339                         else
5340                                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian64);
5341                         break;
5342                 case X86_64_RELOC_TLV:
5343                         if ( ! reloc->r_extern() )
5344                                 throw "not extern and X86_64_RELOC_TLV not supported";
5345                         if ( ! reloc->r_pcrel() )
5346                                 throw "not pcrel and X86_64_RELOC_TLV not supported";
5347                         if ( reloc->r_length() != 2 )
5348                                 throw "length != 2 and X86_64_RELOC_TLV not supported";
5349                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
5350                         break;
5351                 default:
5352                         throwf("unknown relocation type %d", reloc->r_type());
5353         }
5354         return result;
5355 }
5356
5357
5358
5359 template <>
5360 bool Section<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<P>* reloc)
5361 {
5362         const macho_section<P>* sect = this->machoSection();
5363         uint32_t srcAddr;
5364         const uint8_t* fixUpPtr;
5365         uint32_t contentValue = 0;
5366         ld::Fixup::Kind kind = ld::Fixup::kindNone;
5367         Parser<x86>::SourceLocation     src;
5368         Parser<x86>::TargetDesc         target;
5369
5370         if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
5371                 srcAddr = sect->addr() + reloc->r_address();
5372                 src.atom = this->findAtomByAddress(srcAddr);
5373                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5374                 fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
5375                 switch ( reloc->r_type() ) {
5376                 case GENERIC_RELOC_VANILLA:
5377                         switch ( reloc->r_length() ) {
5378                                 case 0:
5379                                         contentValue = (int32_t)(int8_t)*fixUpPtr;
5380                                         if ( reloc->r_pcrel() ) {
5381                                                 kind = ld::Fixup::kindStoreX86BranchPCRel8;
5382                                                 contentValue += srcAddr + sizeof(uint8_t);
5383                                         }
5384                                         else
5385                                                 throw "r_length=0 and r_pcrel=0 not supported";
5386                                         break;
5387                                 case 1:
5388                                         contentValue = (int32_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
5389                                         if ( reloc->r_pcrel() ) {
5390                                                 kind = ld::Fixup::kindStoreX86PCRel16;
5391                                                 contentValue += srcAddr + sizeof(uint16_t);
5392                                         }
5393                                         else
5394                                                 kind = ld::Fixup::kindStoreLittleEndian16;
5395                                         break;
5396                                 case 2:
5397                                         contentValue = E::get32(*((uint32_t*)fixUpPtr));
5398                                         if ( reloc->r_pcrel() ) {
5399                                                 kind = ld::Fixup::kindStoreX86BranchPCRel32;
5400                                                 contentValue += srcAddr + sizeof(uint32_t);
5401                                         }
5402                                         else
5403                                                 kind = ld::Fixup::kindStoreLittleEndian32;
5404                                         break;
5405                                 case 3:
5406                                         throw "r_length=3 not supported";
5407                         }
5408                         if ( reloc->r_extern() ) {
5409                                 target.atom = NULL;
5410                                 const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
5411                                 target.name = parser.nameFromSymbol(targetSymbol);
5412                                 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
5413                                 target.addend = (int32_t)contentValue;
5414                         }
5415                         else {
5416                                 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5417                         }
5418                         if ( (kind == ld::Fixup::kindStoreX86BranchPCRel32) && (target.name != NULL) ) {
5419                                 if ( strncmp(target.name, "___dtrace_probe$", 16) == 0 ) {
5420                                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
5421                                         parser.addDtraceExtraInfos(src, &target.name[16]);
5422                                         return false;
5423                                 }
5424                                 else if ( strncmp(target.name, "___dtrace_isenabled$", 20) == 0 ) {
5425                                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
5426                                         parser.addDtraceExtraInfos(src, &target.name[20]);
5427                                         return false;
5428                                 }
5429                         }
5430                         parser.addFixups(src, kind, target);
5431                         return false;
5432                         break;
5433                 case GENERIC_RLEOC_TLV:
5434                         {
5435                                 if ( !reloc->r_extern() )
5436                                         throw "r_extern=0 and r_type=GENERIC_RLEOC_TLV not supported";
5437                                 if ( reloc->r_length() != 2 )
5438                                         throw "r_length!=2 and r_type=GENERIC_RLEOC_TLV not supported";
5439                                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
5440                                 // use direct reference for local symbols
5441                                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
5442                                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5443                                 }
5444                                 else {
5445                                         target.atom = NULL;
5446                                         target.name = parser.nameFromSymbol(sym);
5447                                         target.weakImport = parser.weakImportFromSymbol(sym);
5448                                 }
5449                                 target.addend = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
5450                                 if ( reloc->r_pcrel() ) {
5451                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
5452                                 }
5453                                 else {
5454                                         parser.addFixups(src, ld::Fixup::kindStoreX86Abs32TLVLoad, target);
5455                                 }
5456                                 return false;
5457                         }
5458                         break;
5459                 default:
5460                         throwf("unsupported i386 relocation type (%d)", reloc->r_type());
5461                 }
5462         }
5463         else {
5464                 // scattered relocation
5465                 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
5466                 srcAddr = sect->addr() + sreloc->r_address();
5467                 src.atom = this->findAtomByAddress(srcAddr);
5468                 assert(src.atom != NULL);
5469                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5470                 fixUpPtr = file().fileContent() + sect->offset() + sreloc->r_address();
5471                 uint32_t relocValue = sreloc->r_value();
5472                 bool result = false;
5473                 // file format allows pair to be scattered or not
5474                 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
5475                 const macho_relocation_info<P>* nextReloc = &reloc[1];
5476                 bool nextRelocIsPair = false;
5477                 uint32_t nextRelocAddress = 0;
5478                 uint32_t nextRelocValue = 0;
5479                 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
5480                         if ( nextReloc->r_type() == GENERIC_RELOC_PAIR ) {
5481                                 nextRelocIsPair = true;
5482                                 nextRelocAddress = nextReloc->r_address();
5483                                 result = true;  // iterator should skip next reloc, since we've consumed it here
5484                         }
5485                 }
5486                 else {
5487                         if ( nextSReloc->r_type() == GENERIC_RELOC_PAIR ) {
5488                                 nextRelocIsPair = true;
5489                                 nextRelocAddress = nextSReloc->r_address();
5490                                 nextRelocValue = nextSReloc->r_value();
5491                         }
5492                 }
5493                 switch (sreloc->r_type()) {
5494                         case GENERIC_RELOC_VANILLA:
5495                                 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
5496                                 target.atom = parser.findAtomByAddress(relocValue);
5497                                 if ( sreloc->r_pcrel() ) {
5498                                         switch ( sreloc->r_length() ) {
5499                                                 case 0:
5500                                                         contentValue = srcAddr + 1 + *fixUpPtr;
5501                                                         target.addend = (int32_t)contentValue - (int32_t)relocValue;
5502                                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel8, target);
5503                                                         break;
5504                                                 case 1:
5505                                                         contentValue = srcAddr + 2 + LittleEndian::get16(*((uint16_t*)fixUpPtr));
5506                                                         target.addend = (int32_t)contentValue - (int32_t)relocValue;
5507                                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel16, target);
5508                                                         break;
5509                                                 case 2:
5510                                                         contentValue = srcAddr + 4 + LittleEndian::get32(*((uint32_t*)fixUpPtr));
5511                                                         target.addend = (int32_t)contentValue - (int32_t)relocValue;
5512                                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
5513                                                         break;
5514                                                 case 3:
5515                                                         throw "unsupported r_length=3 for scattered pc-rel vanilla reloc";
5516                                                         break;
5517                                         }
5518                                 }
5519                                 else {
5520                                         if ( sreloc->r_length() != 2 )
5521                                                 throwf("unsupported r_length=%d for scattered vanilla reloc", sreloc->r_length());
5522                                         contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
5523                                         target.addend = (int32_t)contentValue - (int32_t)(target.atom->objectAddress());
5524                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5525                                 }
5526                                 break;
5527                         case GENERIC_RELOC_SECTDIFF:
5528                         case GENERIC_RELOC_LOCAL_SECTDIFF:
5529                                 {
5530                                         if ( !nextRelocIsPair )
5531                                                 throw "GENERIC_RELOC_SECTDIFF missing following pair";
5532                                         switch ( sreloc->r_length() ) {
5533                                                 case 0:
5534                                                 case 3:
5535                                                         throw "bad length for GENERIC_RELOC_SECTDIFF";
5536                                                 case 1:
5537                                                         contentValue = (int32_t)(int16_t)LittleEndian::get16(*((uint16_t*)fixUpPtr));
5538                                                         kind = ld::Fixup::kindStoreLittleEndian16;
5539                                                         break;
5540                                                 case 2:
5541                                                         contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
5542                                                         kind = ld::Fixup::kindStoreLittleEndian32;
5543                                                         break;
5544                                         }
5545                                         Atom<x86>* fromAtom  = parser.findAtomByAddress(nextRelocValue);
5546                                         uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5547                                         parser.findTargetFromAddress(sreloc->r_value(), target);
5548                                         // check for addend encoded in the section content
5549                                         int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
5550                                         if ( addend < 0 ) {
5551                                                 // switch binding base on coalescing
5552                                                 if ( target.atom == NULL ) {
5553                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
5554                                                 }
5555                                                 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
5556                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
5557                                                 }
5558                                                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
5559                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
5560                                                 }
5561                                                 else {
5562                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
5563                                                 }
5564                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
5565                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5566                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
5567                                                 parser.addFixup(src, ld::Fixup::k5of5, kind);
5568                                         }
5569                                         else {
5570                                                 // switch binding base on coalescing
5571                                                 if ( target.atom == NULL ) {
5572                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
5573                                                 }
5574                                                 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
5575                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
5576                                                 }
5577                                                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
5578                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
5579                                                 }
5580                                                 else {
5581                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
5582                                                 }
5583                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend+addend);
5584                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5585                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
5586                                                 parser.addFixup(src, ld::Fixup::k5of5, kind);
5587                                         }
5588                                 }
5589                                 break;
5590                 }
5591                 return result;
5592         }
5593 }
5594
5595
5596
5597
5598
5599 #if SUPPORT_ARCH_arm_any
5600 template <>
5601 bool Section<arm>::addRelocFixup(class Parser<arm>& parser, const macho_relocation_info<P>* reloc)
5602 {
5603         const macho_section<P>* sect = this->machoSection();
5604         bool result = false;
5605         uint32_t srcAddr;
5606         uint32_t dstAddr;
5607         uint32_t* fixUpPtr;
5608         int32_t displacement = 0;
5609         uint32_t instruction = 0;
5610         pint_t contentValue = 0;
5611         Parser<arm>::SourceLocation     src;
5612         Parser<arm>::TargetDesc         target;
5613         const macho_relocation_info<P>* nextReloc;
5614
5615         if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
5616                 bool externSymbolIsThumbDef = false;
5617                 srcAddr = sect->addr() + reloc->r_address();
5618                 src.atom = this->findAtomByAddress(srcAddr);
5619                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5620                 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + reloc->r_address());
5621                 if ( reloc->r_type() != ARM_RELOC_PAIR )
5622                         instruction = LittleEndian::get32(*fixUpPtr);
5623                 if ( reloc->r_extern() ) {
5624                         const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
5625                         // use direct reference for local symbols
5626                         if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) && (((targetSymbol.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(targetSymbol)[0] == 'L')) ) {
5627                                 parser.findTargetFromAddressAndSectionNum(targetSymbol.n_value(), targetSymbol.n_sect(), target);
5628                         }
5629                         else {
5630                                 target.atom = NULL;
5631                                 target.name = parser.nameFromSymbol(targetSymbol);
5632                                 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
5633                                 if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) &&  (targetSymbol.n_desc() & N_ARM_THUMB_DEF) )
5634                                         externSymbolIsThumbDef = true;
5635                         }
5636                 }
5637                 switch ( reloc->r_type() ) {
5638                         case ARM_RELOC_BR24:
5639                                 // Sign-extend displacement
5640                                 displacement = (instruction & 0x00FFFFFF) << 2;
5641                                 if ( (displacement & 0x02000000) != 0 )
5642                                         displacement |= 0xFC000000;
5643                                 // The pc added will be +8 from the pc
5644                                 displacement += 8;
5645                                 // If this is BLX add H << 1
5646                                 if ((instruction & 0xFE000000) == 0xFA000000)
5647                                         displacement += ((instruction & 0x01000000) >> 23);
5648                                 if ( reloc->r_extern() ) {
5649                                         target.addend = srcAddr + displacement;
5650                                         if ( externSymbolIsThumbDef )
5651                                                 target.addend &= -2; // remove thumb bit
5652                                 }
5653                                 else {
5654                                         dstAddr = srcAddr + displacement;
5655                                         parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
5656                                 }
5657                                 // special case "calls" for dtrace
5658                                 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5659                                         parser.addFixup(src, ld::Fixup::k1of1,
5660                                                                                                                         ld::Fixup::kindStoreARMDtraceCallSiteNop, false, target.name);
5661                                         parser.addDtraceExtraInfos(src, &target.name[16]);
5662                                 }
5663                                 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5664                                         parser.addFixup(src, ld::Fixup::k1of1,
5665                                                                                                                         ld::Fixup::kindStoreARMDtraceIsEnableSiteClear, false, target.name);
5666                                         parser.addDtraceExtraInfos(src, &target.name[20]);
5667                                 }
5668                                 else {
5669                                         parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
5670                                 }
5671                                 break;
5672                         case ARM_THUMB_RELOC_BR22:
5673                                 // thumb2 added two more bits to displacement, complicating the displacement decoding
5674                                 {
5675                                         uint32_t s = (instruction >> 10) & 0x1;
5676                                         uint32_t j1 = (instruction >> 29) & 0x1;
5677                                         uint32_t j2 = (instruction >> 27) & 0x1;
5678                                         uint32_t imm10 = instruction & 0x3FF;
5679                                         uint32_t imm11 = (instruction >> 16) & 0x7FF;
5680                                         uint32_t i1 = (j1 == s);
5681                                         uint32_t i2 = (j2 == s);
5682                                         uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
5683                                         int32_t sdis = dis;
5684                                         if ( s )
5685                                                 sdis |= 0xFE000000;
5686                                         displacement = sdis;
5687                                 }
5688                                 // The pc added will be +4 from the pc
5689                                 displacement += 4;
5690                                 // If the instruction was blx, force the low 2 bits to be clear
5691                                 dstAddr = srcAddr + displacement;
5692                                 if ((instruction & 0xD0000000) == 0xC0000000)
5693                                         dstAddr &= 0xFFFFFFFC;
5694
5695                                 if ( reloc->r_extern() ) {
5696                                         target.addend = dstAddr;
5697                                 }
5698                                 else {
5699                                         parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
5700                                 }
5701                                 // special case "calls" for dtrace
5702                                 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5703                                         parser.addFixup(src, ld::Fixup::k1of1,
5704                                                                                                                         ld::Fixup::kindStoreThumbDtraceCallSiteNop, false, target.name);
5705                                         parser.addDtraceExtraInfos(src, &target.name[16]);
5706                                 }
5707                                 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5708                                         parser.addFixup(src, ld::Fixup::k1of1,
5709                                                                                                                         ld::Fixup::kindStoreThumbDtraceIsEnableSiteClear, false, target.name);
5710                                         parser.addDtraceExtraInfos(src, &target.name[20]);
5711                                 }
5712                                 else {
5713                                         parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
5714                                 }
5715                                 break;
5716                         case ARM_RELOC_VANILLA:
5717                                 if ( reloc->r_length() != 2 )
5718                                         throw "bad length for ARM_RELOC_VANILLA";
5719                                 contentValue = LittleEndian::get32(*fixUpPtr);
5720                                 if ( reloc->r_extern() ) {
5721                                         target.addend = (int32_t)contentValue;
5722                                         if ( externSymbolIsThumbDef )
5723                                                 target.addend &= -2; // remove thumb bit
5724                                 }
5725                                 else {
5726                                         parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5727                                         // possible non-extern relocation turned into by-name ref because target is a weak-def
5728                                         if ( target.atom != NULL ) {
5729                                                 if ( target.atom->isThumb() )
5730                                                         target.addend &= -2; // remove thumb bit
5731                                                 // if reference to LSDA, add group subordinate fixup
5732                                                 if ( target.atom->contentType() == ld::Atom::typeLSDA ) {
5733                                                         Parser<arm>::SourceLocation     src2;
5734                                                         src2.atom = src.atom;
5735                                                         src2.offsetInAtom = 0;
5736                                                         parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, target.atom);
5737                                                 }
5738                                         }
5739                                 }
5740                                 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5741                                 break;
5742                         case ARM_THUMB_32BIT_BRANCH:
5743                                 // silently ignore old unnecessary reloc
5744                                 break;
5745                         case ARM_RELOC_HALF:
5746                                 nextReloc = &reloc[1];
5747                                 if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
5748                                         uint32_t instruction16;
5749                                         uint32_t other16 = (nextReloc->r_address() & 0xFFFF);
5750                                         bool isThumb;
5751                                         if ( reloc->r_length() & 2 ) {
5752                                                 isThumb = true;
5753                                                 uint32_t i =    ((instruction & 0x00000400) >> 10);
5754                                                 uint32_t imm4 =  (instruction & 0x0000000F);
5755                                                 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
5756                                                 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
5757                                                 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
5758                                         }
5759                                         else {
5760                                                 isThumb = false;
5761                                                 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
5762                                                 uint32_t imm12 = (instruction & 0x00000FFF);
5763                                                 instruction16 = (imm4 << 12) | imm12;
5764                                         }
5765                                         if ( reloc->r_length() & 1 ) {
5766                                                 // high 16
5767                                                 dstAddr = ((instruction16 << 16) | other16);
5768                         if ( reloc->r_extern() ) {
5769                             target.addend = dstAddr;
5770                                                         if ( externSymbolIsThumbDef )
5771                                                                 target.addend &= -2; // remove thumb bit
5772                                                 }
5773                         else {
5774                             parser.findTargetFromAddress(dstAddr, target);
5775                             if ( target.atom->isThumb() )
5776                                 target.addend &= (-2); // remove thumb bit
5777                         }
5778                                                 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16), target);
5779                                         }
5780                                         else {
5781                                                 // low 16
5782                                                 dstAddr = (other16 << 16) | instruction16;
5783                         if ( reloc->r_extern() ) {
5784                             target.addend = dstAddr;
5785                                                         if ( externSymbolIsThumbDef )
5786                                                                 target.addend &= -2; // remove thumb bit
5787                         }
5788                         else {
5789                             parser.findTargetFromAddress(dstAddr, target);
5790                             if ( target.atom->isThumb() )
5791                                 target.addend &= (-2); // remove thumb bit
5792                         }
5793                                                 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16), target);
5794                                         }
5795                                         result = true;
5796                                 }
5797                                 else
5798                                         throw "for ARM_RELOC_HALF, next reloc is not ARM_RELOC_PAIR";
5799                                 break;
5800                         default:
5801                                 throwf("unknown relocation type %d", reloc->r_type());
5802                                 break;
5803                 }
5804         }
5805         else {
5806                 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
5807                 // file format allows pair to be scattered or not
5808                 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
5809                 nextReloc = &reloc[1];
5810                 srcAddr = sect->addr() + sreloc->r_address();
5811                 dstAddr = sreloc->r_value();
5812                 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + sreloc->r_address());
5813                 instruction = LittleEndian::get32(*fixUpPtr);
5814                 src.atom = this->findAtomByAddress(srcAddr);
5815                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5816                 bool nextRelocIsPair = false;
5817                 uint32_t nextRelocAddress = 0;
5818                 uint32_t nextRelocValue = 0;
5819                 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
5820                         if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
5821                                 nextRelocIsPair = true;
5822                                 nextRelocAddress = nextReloc->r_address();
5823                                 result = true;
5824                         }
5825                 }
5826                 else {
5827                         if ( nextSReloc->r_type() == ARM_RELOC_PAIR ) {
5828                                 nextRelocIsPair = true;
5829                                 nextRelocAddress = nextSReloc->r_address();
5830                                 nextRelocValue = nextSReloc->r_value();
5831                                 result = true;
5832                         }
5833                 }
5834                 switch ( sreloc->r_type() ) {
5835                         case ARM_RELOC_VANILLA:
5836                                 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
5837                                 if ( sreloc->r_length() != 2 )
5838                                         throw "bad length for ARM_RELOC_VANILLA";
5839                                 target.atom = parser.findAtomByAddress(sreloc->r_value());
5840                                 if ( target.atom == NULL )
5841                                         throwf("bad r_value (0x%08X) for ARM_RELOC_VANILLA\n", sreloc->r_value());
5842                                 contentValue = LittleEndian::get32(*fixUpPtr);
5843                                 target.addend = contentValue - target.atom->_objAddress;
5844                                 if ( target.atom->isThumb() )
5845                                         target.addend &= -2; // remove thumb bit
5846                                 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5847                                 break;
5848                         case ARM_RELOC_BR24:
5849                                 // Sign-extend displacement
5850                                 displacement = (instruction & 0x00FFFFFF) << 2;
5851                                 if ( (displacement & 0x02000000) != 0 )
5852                                         displacement |= 0xFC000000;
5853                                 // The pc added will be +8 from the pc
5854                                 displacement += 8;
5855                                 // If this is BLX add H << 1
5856                                 if ((instruction & 0xFE000000) == 0xFA000000)
5857                                         displacement += ((instruction & 0x01000000) >> 23);
5858                                 target.atom = parser.findAtomByAddress(sreloc->r_value());
5859                                 target.addend = (int64_t)(srcAddr + displacement) - (int64_t)(target.atom->_objAddress);
5860                                 parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
5861                                 break;
5862                         case ARM_THUMB_RELOC_BR22:
5863                                 // thumb2 added two more bits to displacement, complicating the displacement decoding
5864                                 {
5865                                         uint32_t s = (instruction >> 10) & 0x1;
5866                                         uint32_t j1 = (instruction >> 29) & 0x1;
5867                                         uint32_t j2 = (instruction >> 27) & 0x1;
5868                                         uint32_t imm10 = instruction & 0x3FF;
5869                                         uint32_t imm11 = (instruction >> 16) & 0x7FF;
5870                                         uint32_t i1 = (j1 == s);
5871                                         uint32_t i2 = (j2 == s);
5872                                         uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
5873                                         int32_t sdis = dis;
5874                                         if ( s )
5875                                                 sdis |= 0xFE000000;
5876                                         displacement = sdis;
5877                                 }
5878                                 // The pc added will be +4 from the pc
5879                                 displacement += 4;
5880                                 dstAddr = srcAddr+displacement;
5881                                 // If the instruction was blx, force the low 2 bits to be clear
5882                                 if ((instruction & 0xF8000000) == 0xE8000000)
5883                                         dstAddr &= 0xFFFFFFFC;
5884                                 target.atom = parser.findAtomByAddress(sreloc->r_value());
5885                                 target.addend = dstAddr - target.atom->_objAddress;
5886                                 parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
5887                                 break;
5888                         case ARM_RELOC_SECTDIFF:
5889                         case ARM_RELOC_LOCAL_SECTDIFF:
5890                                 {
5891                                         if ( ! nextRelocIsPair )
5892                                                 throw "ARM_RELOC_SECTDIFF missing following pair";
5893                                         if ( sreloc->r_length() != 2 )
5894                                                 throw "bad length for ARM_RELOC_SECTDIFF";
5895                                         contentValue = LittleEndian::get32(*fixUpPtr);
5896                                         Atom<arm>* fromAtom  = parser.findAtomByAddress(nextRelocValue);
5897                                         uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5898                                         uint32_t offsetInTarget;
5899                                         Atom<arm>* targetAtom = parser.findAtomByAddressOrLocalTargetOfStub(sreloc->r_value(), &offsetInTarget);
5900                                         // check for addend encoded in the section content
5901                     int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
5902                                         if ( targetAtom->isThumb() )
5903                                                 addend &= -2; // remove thumb bit
5904                                         // if reference to LSDA, add group subordinate fixup
5905                                         if ( targetAtom->contentType() == ld::Atom::typeLSDA ) {
5906                                                 Parser<arm>::SourceLocation     src2;
5907                                                 src2.atom = src.atom;
5908                                                 src2.offsetInAtom = 0;
5909                                                 parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, targetAtom);
5910                                         }
5911                                         if ( addend < 0 ) {
5912                                                 // switch binding base on coalescing
5913                                                 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5914                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
5915                                                 }
5916                                                 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5917                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5918                                                 }
5919                                                 else {
5920                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5921                                                 }
5922                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, offsetInTarget);
5923                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5924                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
5925                                                 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
5926                                         }
5927                                         else {
5928                                                 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5929                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
5930                                                 }
5931                                                 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5932                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5933                                                 }
5934                                                 else {
5935                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5936                                                 }
5937                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)(offsetInTarget+addend));
5938                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5939                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
5940                                                 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
5941                                         }
5942                                 }
5943                                 break;
5944                         case ARM_RELOC_HALF_SECTDIFF:
5945                                 if ( nextRelocIsPair ) {
5946                                         instruction = LittleEndian::get32(*fixUpPtr);
5947                                         Atom<arm>* fromAtom  = parser.findAtomByAddress(nextRelocValue);
5948                                         uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5949                                         Atom<arm>* targetAtom  = parser.findAtomByAddress(sreloc->r_value());
5950                                         uint32_t offsetInTarget = sreloc->r_value() - targetAtom->_objAddress;
5951                                         uint32_t instruction16;
5952                                         uint32_t other16 = (nextRelocAddress & 0xFFFF);
5953                                         bool isThumb;
5954                                         if ( sreloc->r_length() & 2 ) {
5955                                                 isThumb = true;
5956                                                 uint32_t i =    ((instruction & 0x00000400) >> 10);
5957                                                 uint32_t imm4 =  (instruction & 0x0000000F);
5958                                                 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
5959                                                 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
5960                                                 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
5961                                         }
5962                                         else {
5963                                                 isThumb = false;
5964                                                 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
5965                                                 uint32_t imm12 = (instruction & 0x00000FFF);
5966                                                 instruction16 = (imm4 << 12) | imm12;
5967                                         }
5968                                         if ( sreloc->r_length() & 1 )
5969                                                 dstAddr = ((instruction16 << 16) | other16);
5970                                         else
5971                                                 dstAddr = (other16 << 16) | instruction16;
5972                                         if ( targetAtom->isThumb() )
5973                                                 dstAddr &= (-2); // remove thumb bit
5974                     int32_t addend = dstAddr - (sreloc->r_value() - nextRelocValue);
5975                                         if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5976                                                 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
5977                                         }
5978                                         else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5979                                                 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5980                                         }
5981                                         else {
5982                                                 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5983                                         }
5984                                         parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)offsetInTarget+addend);
5985                                         parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5986                                         parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
5987                                         if ( sreloc->r_length() & 1 ) {
5988                                                 // high 16
5989                                                 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
5990                                         }
5991                                         else {
5992                                                 // low 16
5993                                                 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
5994                                         }
5995                                         result = true;
5996                                 }
5997                                 else
5998                                         throw "ARM_RELOC_HALF_SECTDIFF reloc missing following pair";
5999                                 break;
6000                         case ARM_RELOC_HALF:
6001                                 if ( nextRelocIsPair ) {
6002                                         instruction = LittleEndian::get32(*fixUpPtr);
6003                                         Atom<arm>* targetAtom  = parser.findAtomByAddress(sreloc->r_value());
6004                                         uint32_t instruction16;
6005                                         uint32_t other16 = (nextRelocAddress & 0xFFFF);
6006                                         bool isThumb;
6007                                         if ( sreloc->r_length() & 2 ) {
6008                                                 isThumb = true;
6009                                                 uint32_t i =    ((instruction & 0x00000400) >> 10);
6010                                                 uint32_t imm4 =  (instruction & 0x0000000F);
6011                                                 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
6012                                                 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
6013                                                 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
6014                                         }
6015                                         else {
6016                                                 isThumb = false;
6017                                                 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
6018                                                 uint32_t imm12 = (instruction & 0x00000FFF);
6019                                                 instruction16 = (imm4 << 12) | imm12;
6020                                         }
6021                                         if ( sreloc->r_length() & 1 )
6022                                                 dstAddr = ((instruction16 << 16) | other16);
6023                                         else
6024                                                 dstAddr = (other16 << 16) | instruction16;
6025                                         if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
6026                                                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, targetAtom);
6027                                         }
6028                                         else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
6029                                                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
6030                                         }
6031                                         else {
6032                                                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
6033                                         }
6034                                         parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, dstAddr - targetAtom->_objAddress);
6035                                         if ( sreloc->r_length() & 1 ) {
6036                                                 // high 16
6037                                                 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
6038                                         }
6039                                         else {
6040                                                 // low 16
6041                                                 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
6042                                         }
6043                                         result = true;
6044                                 }
6045                                 else
6046                                         throw "scattered ARM_RELOC_HALF reloc missing following pair";
6047                                 break;
6048                         default:
6049                                 throwf("unknown ARM scattered relocation type %d", sreloc->r_type());
6050                 }
6051         }
6052         return result;
6053 }
6054 #endif
6055
6056
6057
6058
6059
6060 template <typename A>
6061 bool ObjC1ClassSection<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
6062 {
6063         // inherited
6064         FixedSizeSection<A>::addRelocFixup(parser, reloc);
6065
6066         assert(0 && "needs template specialization");
6067         return false;
6068 }
6069
6070 template <>
6071 bool ObjC1ClassSection<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
6072 {
6073         // if this is the reloc for the super class name string, add implicit reference to super class
6074         if ( ((reloc->r_address() & R_SCATTERED) == 0) && (reloc->r_type() == GENERIC_RELOC_VANILLA) ) {
6075                 assert( reloc->r_length() == 2 );
6076                 assert( ! reloc->r_pcrel() );
6077
6078                 const macho_section<P>* sect = this->machoSection();
6079                 Parser<x86>::SourceLocation     src;
6080                 uint32_t srcAddr = sect->addr() + reloc->r_address();
6081                 src.atom = this->findAtomByAddress(srcAddr);
6082                 src.offsetInAtom = srcAddr - src.atom->objectAddress();
6083                 if ( src.offsetInAtom == 4 ) {
6084                         Parser<x86>::TargetDesc         stringTarget;
6085                         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6086                         uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6087                         parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
6088
6089                         assert(stringTarget.atom != NULL);
6090                         assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
6091                         const char* superClassBaseName = (char*)stringTarget.atom->rawContentPointer();
6092                         char* superClassName = new char[strlen(superClassBaseName) + 20];
6093                         strcpy(superClassName, ".objc_class_name_");
6094                         strcat(superClassName, superClassBaseName);
6095
6096                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, superClassName);
6097                 }
6098         }
6099         // inherited
6100         return FixedSizeSection<x86>::addRelocFixup(parser, reloc);
6101 }
6102
6103
6104
6105 template <typename A>
6106 bool Objc1ClassReferences<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
6107 {
6108         // inherited
6109         PointerToCStringSection<A>::addRelocFixup(parser, reloc);
6110
6111         assert(0 && "needs template specialization");
6112         return false;
6113 }
6114
6115
6116
6117 template <>
6118 bool Objc1ClassReferences<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
6119 {
6120         // add implict class refs, fixups not usable yet, so look at relocations
6121         assert( (reloc->r_address() & R_SCATTERED) == 0 );
6122         assert( reloc->r_type() == GENERIC_RELOC_VANILLA );
6123         assert( reloc->r_length() == 2 );
6124         assert( ! reloc->r_pcrel() );
6125
6126         const macho_section<P>* sect = this->machoSection();
6127         Parser<x86>::SourceLocation     src;
6128         uint32_t srcAddr = sect->addr() + reloc->r_address();
6129         src.atom = this->findAtomByAddress(srcAddr);
6130         src.offsetInAtom = srcAddr - src.atom->objectAddress();
6131         Parser<x86>::TargetDesc         stringTarget;
6132         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6133         uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6134         parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
6135
6136         assert(stringTarget.atom != NULL);
6137         assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
6138         const char* baseClassName = (char*)stringTarget.atom->rawContentPointer();
6139         char* objcClassName = new char[strlen(baseClassName) + 20];
6140         strcpy(objcClassName, ".objc_class_name_");
6141         strcat(objcClassName, baseClassName);
6142
6143         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, objcClassName);
6144
6145         // inherited
6146         return PointerToCStringSection<x86>::addRelocFixup(parser, reloc);
6147 }
6148
6149
6150 template <typename A>
6151 void Section<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
6152 {
6153         const macho_section<P>* sect = this->machoSection();
6154         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + sect->reloff());
6155         const uint32_t relocCount = sect->nreloc();
6156         for (uint32_t r = 0; r < relocCount; ++r) {
6157                 try {
6158                         if ( this->addRelocFixup(parser, &relocs[r]) )
6159                                 ++r; // skip next
6160                 }
6161                 catch (const char* msg) {
6162                         throwf("in section %s,%s reloc %u: %s", sect->segname(), Section<A>::makeSectionName(sect), r, msg);
6163                 }
6164         }
6165
6166         // add follow-on fixups if .o file is missing .subsections_via_symbols
6167         if ( this->addFollowOnFixups() ) {
6168                 Atom<A>* end = &_endAtoms[-1];
6169                 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
6170                         typename Parser<A>::SourceLocation src(p, 0);
6171                         Atom<A>* nextAtom = &p[1];
6172                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
6173                 }
6174         }
6175         else if ( this->type() == ld::Section::typeCode ) {
6176                 // if FDE broke text not at a symbol, use followOn to keep code together
6177                 Atom<A>* end = &_endAtoms[-1];
6178                 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
6179                         typename Parser<A>::SourceLocation src(p, 0);
6180                         Atom<A>* nextAtom = &p[1];
6181                         if ( (p->symbolTableInclusion() == ld::Atom::symbolTableIn) && (nextAtom->symbolTableInclusion() == ld::Atom::symbolTableNotIn) ) {
6182                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
6183                         }
6184                 }
6185         }
6186
6187         // <rdar://problem/9218847> track data-in-code
6188         if ( parser.hasDataInCodeLabels() && (this->type() == ld::Section::typeCode) ) {
6189                 for (uint32_t i=0; i < parser.symbolCount(); ++i) {
6190                         const macho_nlist<P>& sym =     parser.symbolFromIndex(i);
6191                         // ignore stabs
6192                         if ( (sym.n_type() & N_STAB) != 0 )
6193                                 continue;
6194                         // ignore non-definitions
6195                         if ( (sym.n_type() & N_TYPE) != N_SECT )
6196                                 continue;
6197
6198                         // 'L' labels do not denote atom breaks
6199                         const char* symbolName = parser.nameFromSymbol(sym);
6200                         if ( symbolName[0] == 'L' ) {
6201                                 if ( strncmp(symbolName, "L$start$", 8) == 0 ) {
6202                                         ld::Fixup::Kind kind = ld::Fixup::kindNone;
6203                                         if ( strncmp(&symbolName[8], "data$", 5) == 0 )
6204                                                 kind = ld::Fixup::kindDataInCodeStartData;
6205                                         else if ( strncmp(&symbolName[8], "code$", 5) == 0 )
6206                                                 kind = ld::Fixup::kindDataInCodeEnd;
6207                                         else if ( strncmp(&symbolName[8], "jt8$", 4) == 0 )
6208                                                 kind = ld::Fixup::kindDataInCodeStartJT8;
6209                                         else if ( strncmp(&symbolName[8], "jt16$", 4) == 0 )
6210                                                 kind = ld::Fixup::kindDataInCodeStartJT16;
6211                                         else if ( strncmp(&symbolName[8], "jt32$", 4) == 0 )
6212                                                 kind = ld::Fixup::kindDataInCodeStartJT32;
6213                                         else if ( strncmp(&symbolName[8], "jta32$", 4) == 0 )
6214                                                 kind = ld::Fixup::kindDataInCodeStartJTA32;
6215                                         else
6216                                                 warning("unknown L$start$ label %s in file %s", symbolName, this->file().path());
6217                                         if ( kind != ld::Fixup::kindNone ) {
6218                                                 Atom<A>* inAtom = parser.findAtomByAddress(sym.n_value());
6219                                                 typename Parser<A>::SourceLocation src(inAtom, sym.n_value() - inAtom->objectAddress());
6220                                                 parser.addFixup(src, ld::Fixup::k1of1, kind);
6221                                         }
6222                                 }
6223                         }
6224                 }
6225         }
6226
6227         // <rdar://problem/11150575> Handle LC_DATA_IN_CODE in object files
6228         if ( this->type() == ld::Section::typeCode ) {
6229                 const pint_t startAddr = this->_machOSection->addr();
6230                 const pint_t endAddr = startAddr + this->_machOSection->size();
6231                 for ( const macho_data_in_code_entry<P>* p = parser.dataInCodeStart(); p != parser.dataInCodeEnd(); ++p ) {
6232                         if ( (p->offset() >= startAddr) && (p->offset() < endAddr) ) {
6233                                 ld::Fixup::Kind kind = ld::Fixup::kindNone;
6234                                 switch ( p->kind() ) {
6235                                         case DICE_KIND_DATA:
6236                                                 kind = ld::Fixup::kindDataInCodeStartData;
6237                                                 break;
6238                                         case DICE_KIND_JUMP_TABLE8:
6239                                                 kind = ld::Fixup::kindDataInCodeStartJT8;
6240                                                 break;
6241                                         case DICE_KIND_JUMP_TABLE16:
6242                                                 kind = ld::Fixup::kindDataInCodeStartJT16;
6243                                                 break;
6244                                         case DICE_KIND_JUMP_TABLE32:
6245                                                 kind = ld::Fixup::kindDataInCodeStartJT32;
6246                                                 break;
6247                                         case DICE_KIND_ABS_JUMP_TABLE32:
6248                                                 kind = ld::Fixup::kindDataInCodeStartJTA32;
6249                                                 break;
6250                                         default:
6251                                                 kind = ld::Fixup::kindDataInCodeStartData;
6252                                                 warning("uknown LC_DATA_IN_CODE kind (%d) at offset 0x%08X", p->kind(), p->offset());
6253                                                 break;
6254                                 }
6255                                 Atom<A>* inAtom = parser.findAtomByAddress(p->offset());
6256                                 typename Parser<A>::SourceLocation srcStart(inAtom, p->offset() - inAtom->objectAddress());
6257                                 parser.addFixup(srcStart, ld::Fixup::k1of1, kind);
6258                                 typename Parser<A>::SourceLocation srcEnd(inAtom, p->offset() + p->length() - inAtom->objectAddress());
6259                                 parser.addFixup(srcEnd, ld::Fixup::k1of1, ld::Fixup::kindDataInCodeEnd);
6260                         }
6261                 }
6262         }
6263
6264
6265         // add follow-on fixups for aliases
6266         if ( _hasAliases ) {
6267                 for(Atom<A>* p = _beginAtoms; p < _endAtoms; ++p) {
6268                         if ( p->isAlias() && ! this->addFollowOnFixups() ) {
6269                                 Atom<A>* targetOfAlias = &p[1];
6270                                 assert(p < &_endAtoms[-1]);
6271                                 assert(p->_objAddress == targetOfAlias->_objAddress);
6272                                 typename Parser<A>::SourceLocation src(p, 0);
6273                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, targetOfAlias);
6274                         }
6275                 }
6276         }
6277 }
6278
6279
6280
6281 //
6282 // main function used by linker to instantiate ld::Files
6283 //
6284 ld::relocatable::File* parse(const uint8_t* fileContent, uint64_t fileLength,
6285                                                          const char* path, time_t modTime, ld::File::Ordinal ordinal, const ParserOptions& opts)
6286 {
6287         switch ( opts.architecture ) {
6288 #if SUPPORT_ARCH_x86_64
6289                 case CPU_TYPE_X86_64:
6290                         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) )
6291                                 return mach_o::relocatable::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
6292                         break;
6293 #endif
6294 #if SUPPORT_ARCH_i386
6295                 case CPU_TYPE_I386:
6296                         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) )
6297                                 return mach_o::relocatable::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
6298                         break;
6299 #endif
6300 #if SUPPORT_ARCH_arm_any
6301                 case CPU_TYPE_ARM:
6302                         if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) )
6303                                 return mach_o::relocatable::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
6304                         break;
6305 #endif
6306         }
6307         return NULL;
6308 }
6309
6310 //
6311 // used by archive reader to validate member object file
6312 //
6313 bool isObjectFile(const uint8_t* fileContent, uint64_t fileLength, const ParserOptions& opts)
6314 {
6315         switch ( opts.architecture ) {
6316                 case CPU_TYPE_X86_64:
6317                         return ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) );
6318                 case CPU_TYPE_I386:
6319                         return ( mach_o::relocatable::Parser<x86>::validFile(fileContent) );
6320                 case CPU_TYPE_ARM:
6321                         return ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) );
6322         }
6323         return false;
6324 }
6325
6326 //
6327 // used by linker to infer architecture when no -arch is on command line
6328 //
6329 bool isObjectFile(const uint8_t* fileContent, cpu_type_t* result, cpu_subtype_t* subResult)
6330 {
6331         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
6332                 *result = CPU_TYPE_X86_64;
6333                 *subResult = CPU_SUBTYPE_X86_64_ALL;
6334                 return true;
6335         }
6336         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
6337                 *result = CPU_TYPE_I386;
6338                 *subResult = CPU_SUBTYPE_X86_ALL;
6339                 return true;
6340         }
6341         if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
6342                 *result = CPU_TYPE_ARM;
6343                 const macho_header<Pointer32<LittleEndian> >* header = (const macho_header<Pointer32<LittleEndian> >*)fileContent;
6344                 *subResult = header->cpusubtype();
6345                 return true;
6346         }
6347         return false;
6348 }
6349
6350 //
6351 // used by linker is error messages to describe bad .o file
6352 //
6353 const char* archName(const uint8_t* fileContent)
6354 {
6355         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
6356                 return mach_o::relocatable::Parser<x86_64>::fileKind(fileContent);
6357         }
6358         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
6359                 return mach_o::relocatable::Parser<x86>::fileKind(fileContent);
6360         }
6361         if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
6362                 return mach_o::relocatable::Parser<arm>::fileKind(fileContent);
6363         }
6364         return NULL;
6365 }
6366
6367 //
6368 // Used by archive reader when -ObjC option is specified
6369 //
6370 bool hasObjC2Categories(const uint8_t* fileContent)
6371 {
6372         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
6373                 return mach_o::relocatable::Parser<x86_64>::hasObjC2Categories(fileContent);
6374         }
6375         else if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
6376                 return mach_o::relocatable::Parser<arm>::hasObjC2Categories(fileContent);
6377         }
6378         else if ( mach_o::relocatable::Parser<x86>::validFile(fileContent, false, 0) ) {
6379                 return mach_o::relocatable::Parser<x86>::hasObjC2Categories(fileContent);
6380         }
6381         return false;
6382 }
6383
6384 //
6385 // Used by archive reader when -ObjC option is specified
6386 //
6387 bool hasObjC1Categories(const uint8_t* fileContent)
6388 {
6389         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent, false, 0) ) {
6390                 return mach_o::relocatable::Parser<x86>::hasObjC1Categories(fileContent);
6391         }
6392         return false;
6393 }
6394
6395
6396
6397 } // namespace relocatable
6398 } // namespace mach_o
6399
6400