src/ld/parsers/macho_relocatable_file.cpp

   1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
   2  *
   3  * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
   4  *
   5  * @APPLE_LICENSE_HEADER_START@
   6  *
   7  * This file contains Original Code and/or Modifications of Original Code
   8  * as defined in and that are subject to the Apple Public Source License
   9  * Version 2.0 (the 'License'). You may not use this file except in
  10  * compliance with the License. Please obtain a copy of the License at
  11  * http://www.opensource.apple.com/apsl/ and read it before using this
  12  * file.
  13  *
  14  * The Original Code and all software distributed under the License are
  15  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  19  * Please see the License for the specific language governing rights and
  20  * limitations under the License.
  21  *
  22  * @APPLE_LICENSE_HEADER_END@
  23  */
  24
  25
  26 #include <stdint.h>
  27 #include <stdlib.h>
  28 #include <math.h>
  29 #include <unistd.h>
  30 #include <fcntl.h>
  31 #include <sys/param.h>
  32 #include <sys/stat.h>
  33 #include <sys/mman.h>
  34
  35 #include "MachOFileAbstraction.hpp"
  36
  37 #include "libunwind/DwarfInstructions.hpp"
  38 #include "libunwind/AddressSpace.hpp"
  39 #include "libunwind/Registers.hpp"
  40
  41 #include <vector>
  42 #include <set>
  43 #include <map>
  44 #include <algorithm>
  45
  46 #include "dwarf2.h"
  47 #include "debugline.h"
  48
  49 #include "Architectures.hpp"
  50 #include "ld.hpp"
  51 #include "macho_relocatable_file.h"
  52
  53
  54
  55 extern void throwf(const char* format, ...) __attribute__ ((noreturn,format(printf, 1, 2)));
  56 extern void warning(const char* format, ...) __attribute__((format(printf, 1, 2)));
  57
  58 namespace mach_o {
  59 namespace relocatable {
  60
  61
  62 // forward reference
  63 template <typename A> class Parser;
  64 template <typename A> class Atom;
  65 template <typename A> class Section;
  66 template <typename A> class CFISection;
  67 template <typename A> class CUSection;
  68
  69 template <typename A>
  70 class File : public ld::relocatable::File
  71 {
  72 public:
  73                                                                                         File(const char* p, time_t mTime, const uint8_t* content, ld::File::Ordinal ord) :
  74                                                                                                 ld::relocatable::File(p,mTime,ord), _fileContent(content),
  75                                                                                                 _sectionsArray(NULL), _atomsArray(NULL),
  76                                                                                                 _sectionsArrayCount(0), _atomsArrayCount(0),
  77                                                                                                 _debugInfoKind(ld::relocatable::File::kDebugInfoNone),
  78                                                                                                 _dwarfTranslationUnitDir(NULL), _dwarfTranslationUnitFile(NULL),
  79                                                                                                 _dwarfDebugInfoSect(NULL), _dwarfDebugAbbrevSect(NULL),
  80                                                                                                 _dwarfDebugLineSect(NULL), _dwarfDebugStringSect(NULL),
  81                                                                                                 _objConstraint(ld::File::objcConstraintNone),
  82                                                                                                 _cpuSubType(0),
  83                                                                                                 _canScatterAtoms(false) {}
  84         virtual                                                                 ~File();
  85
  86         // overrides of ld::File
  87         virtual bool                                                                            forEachAtom(ld::File::AtomHandler&) const;
  88         virtual bool                                                                            justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const
  89                                                                                                                                                                         { return false; }
  90
  91         // overrides of ld::relocatable::File
  92         virtual ObjcConstraint                                                          objCConstraint() const                  { return _objConstraint; }
  93         virtual uint32_t                                                                        cpuSubType() const                              { return _cpuSubType; }
  94         virtual DebugInfoKind                                                           debugInfo() const                               { return _debugInfoKind; }
  95         virtual const std::vector<ld::relocatable::File::Stab>* stabs() const                                   { return &_stabs; }
  96         virtual bool                                                                            canScatterAtoms() const                 { return _canScatterAtoms; }
  97         bool                                                                                            translationUnitSource(const char** dir, const char** name) const;
  98
  99         const uint8_t*                                                                          fileContent()                                   { return _fileContent; }
 100 private:
 101         friend class Atom<A>;
 102         friend class Section<A>;
 103         friend class Parser<A>;
 104         friend class CFISection<A>::OAS;
 105
 106         typedef typename A::P                                   P;
 107
 108         const uint8_t*                                                  _fileContent;
 109         Section<A>**                                                    _sectionsArray;
 110         uint8_t*                                                                _atomsArray;
 111         uint32_t                                                                _sectionsArrayCount;
 112         uint32_t                                                                _atomsArrayCount;
 113         std::vector<ld::Fixup>                                  _fixups;
 114         std::vector<ld::Atom::UnwindInfo>               _unwindInfos;
 115         std::vector<ld::Atom::LineInfo>                 _lineInfos;
 116         std::vector<ld::relocatable::File::Stab>_stabs;
 117         ld::relocatable::File::DebugInfoKind    _debugInfoKind;
 118         const char*                                                             _dwarfTranslationUnitDir;
 119         const char*                                                             _dwarfTranslationUnitFile;
 120         const macho_section<P>*                                 _dwarfDebugInfoSect;
 121         const macho_section<P>*                                 _dwarfDebugAbbrevSect;
 122         const macho_section<P>*                                 _dwarfDebugLineSect;
 123         const macho_section<P>*                                 _dwarfDebugStringSect;
 124         ld::File::ObjcConstraint                                _objConstraint;
 125         uint32_t                                                                _cpuSubType;
 126         bool                                                                    _canScatterAtoms;
 127 };
 128
 129
 130 template <typename A>
 131 class Section : public ld::Section
 132 {
 133 public:
 134         typedef typename A::P::uint_t   pint_t;
 135         typedef typename A::P                   P;
 136         typedef typename A::P::E                E;
 137
 138         virtual                                                 ~Section()                                      { }
 139         class File<A>&                                  file() const                            { return _file; }
 140         const macho_section<P>*                 machoSection() const            { return _machOSection; }
 141         uint32_t                                                sectionNum(class Parser<A>&) const;
 142         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr);
 143         virtual ld::Atom::ContentType   contentType()                           { return ld::Atom::typeUnclassified; }
 144         virtual bool                                    dontDeadStrip()                         { return (this->_machOSection->flags() & S_ATTR_NO_DEAD_STRIP); }
 145         virtual Atom<A>*                                findAtomByAddress(pint_t addr) { return this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms); }
 146         virtual bool                                    addFollowOnFixups() const       { return ! _file.canScatterAtoms(); }
 147         virtual uint32_t                                appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 148                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 149                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
 150         virtual uint32_t                                computeAtomCount(class Parser<A>& parser,
 151                                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 152                                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
 153         virtual void                                    makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 154         virtual bool                                    addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
 155         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const { return 0; }
 156         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 157                                                                                                         const ld::IndirectBindingTable& ind) const { return false; }
 158         static const char*                              makeSectionName(const macho_section<typename A::P>* s);
 159
 160 protected:
 161                                                 Section(File<A>& f, const macho_section<typename A::P>* s)
 162                                                         : ld::Section(makeSegmentName(s), makeSectionName(s), sectionType(s)),
 163                                                                 _file(f), _machOSection(s), _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
 164                                                 Section(File<A>& f, const char* segName, const char* sectName, ld::Section::Type t, bool hidden=false)
 165                                                         : ld::Section(segName, sectName, t, hidden), _file(f), _machOSection(NULL),
 166                                                                 _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
 167
 168
 169         Atom<A>*                                                findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end);
 170         uint32_t                                                x86_64PcRelOffset(uint8_t r_type);
 171         static const char*                              makeSegmentName(const macho_section<typename A::P>* s);
 172         static bool                                             readable(const macho_section<typename A::P>* s);
 173         static bool                                             writable(const macho_section<typename A::P>* s);
 174         static bool                                             exectuable(const macho_section<typename A::P>* s);
 175         static ld::Section::Type                sectionType(const macho_section<typename A::P>* s);
 176
 177         File<A>&                                                _file;
 178         const macho_section<P>*                 _machOSection;
 179         class Atom<A>*                                  _beginAtoms;
 180         class Atom<A>*                                  _endAtoms;
 181         bool                                                    _hasAliases;
 182 };
 183
 184
 185 template <typename A>
 186 class CFISection : public Section<A>
 187 {
 188 public:
 189                                                 CFISection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 190                                                         : Section<A>(f, s) { }
 191         uint32_t                        cfiCount();
 192
 193         virtual ld::Atom::ContentType   contentType()           { return ld::Atom::typeCFI; }
 194         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 195         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 196         virtual void            makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 197         virtual bool            addFollowOnFixups() const       { return false; }
 198
 199
 200         ///
 201         /// ObjectFileAddressSpace is used as a template parameter to UnwindCursor for parsing
 202         /// dwarf CFI information in an object file.
 203         ///
 204         class OAS
 205         {
 206         public:
 207                         typedef typename A::P::uint_t   pint_t;
 208                         typedef typename A::P                   P;
 209                         typedef typename A::P::E                E;
 210                         typedef typename A::P::uint_t   sint_t;
 211
 212                                                         OAS(CFISection<A>& ehFrameSection, const uint8_t* ehFrameBuffer) :
 213                                                                 _ehFrameSection(ehFrameSection),
 214                                                                 _ehFrameContent(ehFrameBuffer),
 215                                                                 _ehFrameStartAddr(ehFrameSection.machoSection()->addr()),
 216                                                                 _ehFrameEndAddr(ehFrameSection.machoSection()->addr()+ehFrameSection.machoSection()->size()) {}
 217
 218                         uint8_t                 get8(pint_t addr) { return *((uint8_t*)mappedAddress(addr)); }
 219                         uint16_t                get16(pint_t addr)      { return E::get16(*((uint16_t*)mappedAddress(addr))); }
 220                         uint32_t                get32(pint_t addr)      { return E::get32(*((uint32_t*)mappedAddress(addr))); }
 221                         uint64_t                get64(pint_t addr)      { return E::get64(*((uint64_t*)mappedAddress(addr))); }
 222                         pint_t                  getP(pint_t addr)       { return P::getP(*((pint_t*)mappedAddress(addr))); }
 223                         uint64_t                getULEB128(pint_t& addr, pint_t end);
 224                         int64_t                 getSLEB128(pint_t& addr, pint_t end);
 225                         pint_t                  getEncodedP(pint_t& addr, pint_t end, uint8_t encoding);
 226         private:
 227                 const void*                     mappedAddress(pint_t addr);
 228
 229                 CFISection<A>&                          _ehFrameSection;
 230                 const uint8_t*                          _ehFrameContent;
 231                 pint_t                                          _ehFrameStartAddr;
 232                 pint_t                                          _ehFrameEndAddr;
 233         };
 234
 235
 236         typedef typename A::P::uint_t                   pint_t;
 237         typedef libunwind::CFI_Atom_Info<OAS>   CFI_Atom_Info;
 238
 239         void                            cfiParse(class Parser<A>& parser, uint8_t* buffer, CFI_Atom_Info cfiArray[], uint32_t cfiCount);
 240         bool                            needsRelocating();
 241
 242         static bool                     bigEndian();
 243 private:
 244         void                            addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo);
 245         static void                     warnFunc(void* ref, uint64_t funcAddr, const char* msg);
 246 };
 247
 248
 249 template <typename A>
 250 class CUSection : public Section<A>
 251 {
 252 public:
 253                                                 CUSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 254                                                         : Section<A>(f, s) { }
 255
 256         typedef typename A::P::uint_t   pint_t;
 257         typedef typename A::P                   P;
 258         typedef typename A::P::E                E;
 259
 260         virtual uint32_t                computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
 261         virtual uint32_t                appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
 262         virtual void                    makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 263         virtual bool                    addFollowOnFixups() const       { return false; }
 264
 265         struct Info {
 266                 pint_t          functionStartAddress;
 267                 uint32_t        functionSymbolIndex;
 268                 uint32_t        rangeLength;
 269                 uint32_t        compactUnwindInfo;
 270                 const char*     personality;
 271                 pint_t          lsdaAddress;
 272                 Atom<A>*        function;
 273                 Atom<A>*        lsda;
 274         };
 275
 276         uint32_t                                count();
 277         void                                    parse(class Parser<A>& parser, uint32_t cnt, Info array[]);
 278
 279
 280 private:
 281
 282         const char*                             personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc);
 283
 284         static int                              infoSorter(const void* l, const void* r);
 285
 286 };
 287
 288
 289 template <typename A>
 290 class TentativeDefinitionSection : public Section<A>
 291 {
 292 public:
 293                                                 TentativeDefinitionSection(Parser<A>& parser, File<A>& f)
 294                                                         : Section<A>(f, "__DATA", "__comm/tent", ld::Section::typeTentativeDefs)  {}
 295
 296         virtual ld::Atom::ContentType   contentType()           { return ld::Atom::typeZeroFill; }
 297         virtual bool            addFollowOnFixups() const       { return false; }
 298         virtual Atom<A>*        findAtomByAddress(typename A::P::uint_t addr) { throw "TentativeDefinitionSection::findAtomByAddress() should never be called"; }
 299         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
 300                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 301         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 302                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 303                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&);
 304         virtual void            makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
 305 private:
 306         typedef typename A::P::uint_t   pint_t;
 307         typedef typename A::P                   P;
 308 };
 309
 310
 311 template <typename A>
 312 class AbsoluteSymbolSection : public Section<A>
 313 {
 314 public:
 315                                                 AbsoluteSymbolSection(Parser<A>& parser, File<A>& f)
 316                                                         : Section<A>(f, "__DATA", "__abs", ld::Section::typeAbsoluteSymbols, true)  {}
 317
 318         virtual ld::Atom::ContentType   contentType()           { return ld::Atom::typeUnclassified; }
 319         virtual bool                                    dontDeadStrip()         { return false; }
 320         virtual ld::Atom::Alignment             alignmentForAddress(typename A::P::uint_t addr) { return ld::Atom::Alignment(0); }
 321         virtual bool            addFollowOnFixups() const       { return false; }
 322         virtual Atom<A>*        findAtomByAddress(typename A::P::uint_t addr) { throw "AbsoluteSymbolSection::findAtomByAddress() should never be called"; }
 323         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
 324                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 325         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 326                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 327                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&);
 328         virtual void            makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
 329         virtual Atom<A>*        findAbsAtomForValue(typename A::P::uint_t);
 330
 331 private:
 332         typedef typename A::P::uint_t   pint_t;
 333         typedef typename A::P                   P;
 334 };
 335
 336
 337 template <typename A>
 338 class SymboledSection : public Section<A>
 339 {
 340 public:
 341                                                 SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s);
 342         virtual ld::Atom::ContentType   contentType() { return _type; }
 343         virtual bool                                    dontDeadStrip();
 344         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
 345                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 346         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 347                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
 348                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 349 protected:
 350         typedef typename A::P::uint_t   pint_t;
 351         typedef typename A::P                   P;
 352
 353         ld::Atom::ContentType                   _type;
 354 };
 355
 356
 357 template <typename A>
 358 class TLVDefsSection : public SymboledSection<A>
 359 {
 360 public:
 361                                                 TLVDefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s) :
 362                                                         SymboledSection<A>(parser, f, s) { }
 363
 364 private:
 365
 366 };
 367
 368
 369 template <typename A>
 370 class ImplicitSizeSection : public Section<A>
 371 {
 372 public:
 373                                                 ImplicitSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 374                                                         : Section<A>(f, s) { }
 375         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 376         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 377 protected:
 378         typedef typename A::P::uint_t   pint_t;
 379         typedef typename A::P                   P;
 380
 381         virtual bool                                            addFollowOnFixups() const               { return false; }
 382         virtual const char*                                     unlabeledAtomName(Parser<A>& parser, pint_t addr) = 0;
 383         virtual ld::Atom::SymbolTableInclusion  symbolTableInclusion()          { return ld::Atom::symbolTableNotIn; }
 384         virtual pint_t                                          elementSizeAtAddress(pint_t addr) = 0;
 385         virtual ld::Atom::Scope                         scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeLinkageUnit; }
 386         virtual bool                                            useElementAt(Parser<A>& parser,
 387                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr) = 0;
 388         virtual ld::Atom::Definition            definition()                                    { return ld::Atom::definitionRegular; }
 389         virtual ld::Atom::Combine                       combine(Parser<A>& parser, pint_t addr) = 0;
 390         virtual bool                                            ignoreLabel(const char* label)  { return (label[0] == 'L'); }
 391 };
 392
 393 template <typename A>
 394 class FixedSizeSection : public ImplicitSizeSection<A>
 395 {
 396 public:
 397                                                 FixedSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 398                                                         : ImplicitSizeSection<A>(parser, f, s) { }
 399 protected:
 400         typedef typename A::P::uint_t   pint_t;
 401         typedef typename A::P                   P;
 402         typedef typename A::P::E                E;
 403
 404         virtual bool                                    useElementAt(Parser<A>& parser,
 405                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
 406                                                                                                                 { return true; }
 407 };
 408
 409
 410 template <typename A>
 411 class Literal4Section : public FixedSizeSection<A>
 412 {
 413 public:
 414                                                 Literal4Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 415                                                         : FixedSizeSection<A>(parser, f, s) {}
 416 protected:
 417         typedef typename A::P::uint_t   pint_t;
 418         typedef typename A::P                   P;
 419
 420         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(2); }
 421         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "4-byte-literal"; }
 422         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 4; }
 423         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 424         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 425         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 426                                                                                                         const ld::IndirectBindingTable& ind) const;
 427 };
 428
 429 template <typename A>
 430 class Literal8Section : public FixedSizeSection<A>
 431 {
 432 public:
 433                                                 Literal8Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 434                                                         : FixedSizeSection<A>(parser, f, s) {}
 435 protected:
 436         typedef typename A::P::uint_t   pint_t;
 437         typedef typename A::P                   P;
 438
 439         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(3); }
 440         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "8-byte-literal"; }
 441         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 8; }
 442         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 443         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 444         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 445                                                                                                         const ld::IndirectBindingTable& ind) const;
 446 };
 447
 448 template <typename A>
 449 class Literal16Section : public FixedSizeSection<A>
 450 {
 451 public:
 452                                                 Literal16Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 453                                                         : FixedSizeSection<A>(parser, f, s) {}
 454 protected:
 455         typedef typename A::P::uint_t   pint_t;
 456         typedef typename A::P                   P;
 457
 458         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(4); }
 459         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "16-byte-literal"; }
 460         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 16; }
 461         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 462         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 463         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 464                                                                                                         const ld::IndirectBindingTable& ind) const;
 465 };
 466
 467
 468 template <typename A>
 469 class NonLazyPointerSection : public FixedSizeSection<A>
 470 {
 471 public:
 472                                                 NonLazyPointerSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 473                                                         : FixedSizeSection<A>(parser, f, s) {}
 474 protected:
 475         typedef typename A::P::uint_t   pint_t;
 476         typedef typename A::P                   P;
 477
 478         virtual void                                    makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 479         virtual ld::Atom::ContentType   contentType()                                                   { return ld::Atom::typeNonLazyPointer; }
 480         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 481         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "non_lazy_ptr"; }
 482         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 483         virtual ld::Atom::Scope                 scopeAtAddress(Parser<A>& parser, pint_t addr);
 484         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t);
 485         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 486         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 487         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 488                                                                                                         const ld::IndirectBindingTable& ind) const;
 489
 490 private:
 491         static const char*                              targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind);
 492         static ld::Fixup::Kind                  fixupKind();
 493 };
 494
 495
 496 template <typename A>
 497 class CFStringSection : public FixedSizeSection<A>
 498 {
 499 public:
 500                                                 CFStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 501                                                         : FixedSizeSection<A>(parser, f, s) {}
 502 protected:
 503         typedef typename A::P::uint_t   pint_t;
 504
 505         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 506         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "CFString"; }
 507         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 4*sizeof(pint_t); }
 508         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndReferences; }
 509         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 510         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 511         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 512                                                                                                         const ld::IndirectBindingTable& ind) const;
 513 private:
 514         enum ContentType { contentUTF8, contentUTF16, contentUnknown };
 515         static const uint8_t*                   targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
 516                                                                                                 ContentType* ct, unsigned int* count);
 517 };
 518
 519
 520 template <typename A>
 521 class ObjC1ClassSection : public FixedSizeSection<A>
 522 {
 523 public:
 524                                                 ObjC1ClassSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 525                                                         : FixedSizeSection<A>(parser, f, s) {}
 526 protected:
 527         typedef typename A::P::uint_t   pint_t;
 528         typedef typename A::P                   P;
 529         typedef typename A::P::E                E;
 530
 531         virtual ld::Atom::Scope                 scopeAtAddress(Parser<A>& , pint_t )    { return ld::Atom::scopeGlobal; }
 532         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(2); }
 533         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t);
 534         virtual ld::Atom::SymbolTableInclusion  symbolTableInclusion()                  { return ld::Atom::symbolTableIn; }
 535         virtual pint_t                                  elementSizeAtAddress(pint_t addr);
 536         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineNever; }
 537         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 538         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
 539                                                                                                                                                         { return 0; }
 540         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 541                                                                                                         const ld::IndirectBindingTable& ind) const { return false; }
 542         virtual bool                                    addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
 543 };
 544
 545
 546 template <typename A>
 547 class ObjC2ClassRefsSection : public FixedSizeSection<A>
 548 {
 549 public:
 550                                                 ObjC2ClassRefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 551                                                         : FixedSizeSection<A>(parser, f, s) {}
 552 protected:
 553         typedef typename A::P::uint_t   pint_t;
 554
 555         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 556         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "objc-class-ref"; }
 557         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 558         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndReferences; }
 559         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 560         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 561         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 562                                                                                                         const ld::IndirectBindingTable& ind) const;
 563 private:
 564         const char*                                             targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 565 };
 566
 567
 568 template <typename A>
 569 class ObjC2CategoryListSection : public FixedSizeSection<A>
 570 {
 571 public:
 572                                                 ObjC2CategoryListSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 573                                                         : FixedSizeSection<A>(parser, f, s) {}
 574 protected:
 575         typedef typename A::P::uint_t   pint_t;
 576
 577         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 578         virtual ld::Atom::Scope                 scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeTranslationUnit; }
 579         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "objc-cat-list"; }
 580         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 581         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineNever; }
 582         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 583 private:
 584         const char*                                             targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 585 };
 586
 587
 588 template <typename A>
 589 class PointerToCStringSection : public FixedSizeSection<A>
 590 {
 591 public:
 592                                                 PointerToCStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 593                                                         : FixedSizeSection<A>(parser, f, s) {}
 594 protected:
 595         typedef typename A::P::uint_t   pint_t;
 596
 597         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 598         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "pointer-to-literal-cstring"; }
 599         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 600         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndReferences; }
 601         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 602         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 603         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 604                                                                                                         const ld::IndirectBindingTable& ind) const;
 605         virtual const char*                             targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 606 };
 607
 608
 609 template <typename A>
 610 class Objc1ClassReferences : public PointerToCStringSection<A>
 611 {
 612 public:
 613                                                 Objc1ClassReferences(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 614                                                         : PointerToCStringSection<A>(parser, f, s) {}
 615
 616         typedef typename A::P::uint_t   pint_t;
 617         typedef typename A::P                   P;
 618
 619         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "pointer-to-literal-objc-class-name"; }
 620         virtual bool                                    addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
 621         virtual const char*                             targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 622 };
 623
 624
 625 template <typename A>
 626 class CStringSection : public ImplicitSizeSection<A>
 627 {
 628 public:
 629                                                 CStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 630                                                         : ImplicitSizeSection<A>(parser, f, s) {}
 631 protected:
 632         typedef typename A::P::uint_t   pint_t;
 633         typedef typename A::P                   P;
 634
 635         virtual ld::Atom::ContentType   contentType()                                                   { return ld::Atom::typeCString; }
 636         virtual Atom<A>*                                findAtomByAddress(pint_t addr);
 637         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "cstring"; }
 638         virtual pint_t                                  elementSizeAtAddress(pint_t addr);
 639         virtual bool                                    ignoreLabel(const char* label);
 640         virtual bool                                    useElementAt(Parser<A>& parser,
 641                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr);
 642         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 643         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 644         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 645                                                                                                         const ld::IndirectBindingTable& ind) const;
 646
 647 };
 648
 649
 650 template <typename A>
 651 class UTF16StringSection : public SymboledSection<A>
 652 {
 653 public:
 654                                                 UTF16StringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 655                                                         : SymboledSection<A>(parser, f, s) {}
 656 protected:
 657         typedef typename A::P::uint_t   pint_t;
 658         typedef typename A::P                   P;
 659
 660         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 661         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 662         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 663                                                                                                         const ld::IndirectBindingTable& ind) const;
 664 };
 665
 666
 667 //
 668 // Atoms in mach-o files
 669 //
 670 template <typename A>
 671 class Atom : public ld::Atom
 672 {
 673 public:
 674         // overrides of ld::Atom
 675         virtual ld::File*                                                       file() const            { return &sect().file(); }
 676         virtual bool                                                            translationUnitSource(const char** dir, const char** nm) const
 677                                                                                                                                         { return sect().file().translationUnitSource(dir, nm); }
 678         virtual const char*                                                     name() const            { return _name; }
 679         virtual uint64_t                                                        size() const            { return _size; }
 680         virtual uint64_t                                                        objectAddress() const { return _objAddress; }
 681         virtual void                                                            copyRawContent(uint8_t buffer[]) const;
 682         virtual const uint8_t*                                          rawContentPointer() const { return contentPointer(); }
 683         virtual unsigned long                                           contentHash(const ld::IndirectBindingTable& ind) const
 684                                                                                                                         { if ( _hash == 0 ) _hash = sect().contentHash(this, ind); return _hash; }
 685         virtual bool                                                            canCoalesceWith(const ld::Atom& rhs, const ld::IndirectBindingTable& ind) const
 686                                                                                                                         { return sect().canCoalesceWith(this, rhs, ind); }
 687         virtual ld::Fixup::iterator                                     fixupsBegin() const     { return &machofile()._fixups[_fixupsStartIndex]; }
 688         virtual ld::Fixup::iterator                                     fixupsEnd()     const   { return &machofile()._fixups[_fixupsStartIndex+_fixupsCount]; }
 689         virtual ld::Atom::UnwindInfo::iterator          beginUnwind() const     { return &machofile()._unwindInfos[_unwindInfoStartIndex]; }
 690         virtual ld::Atom::UnwindInfo::iterator          endUnwind()     const   { return &machofile()._unwindInfos[_unwindInfoStartIndex+_unwindInfoCount];  }
 691         virtual ld::Atom::LineInfo::iterator            beginLineInfo() const{ return &machofile()._lineInfos[_lineInfoStartIndex]; }
 692         virtual ld::Atom::LineInfo::iterator            endLineInfo() const { return &machofile()._lineInfos[_lineInfoStartIndex+_lineInfoCount];  }
 693
 694 private:
 695
 696         enum {  kFixupStartIndexBits = 32,
 697                         kLineInfoStartIndexBits = 32,
 698                         kUnwindInfoStartIndexBits = 24,
 699                         kFixupCountBits = 24,
 700                         kLineInfoCountBits = 12,
 701                         kUnwindInfoCountBits = 4
 702                 }; // must sum to 128
 703
 704 public:
 705         // methods for all atoms from mach-o object file
 706                         Section<A>&                                                     sect() const                    { return (Section<A>&)section(); }
 707                         File<A>&                                                        machofile() const                       { return ((Section<A>*)(this->_section))->file(); }
 708                         void                                                            setFixupsRange(uint32_t s, uint32_t c);
 709                         void                                                            setUnwindInfoRange(uint32_t s, uint32_t c);
 710                         void                                                            extendUnwindInfoRange();
 711                         void                                                            setLineInfoRange(uint32_t s, uint32_t c);
 712                         bool                                                            roomForMoreLineInfoCount() { return (_lineInfoCount < ((1<<kLineInfoCountBits)-1)); }
 713                         void                                                            incrementLineInfoCount() { assert(roomForMoreLineInfoCount()); ++_lineInfoCount; }
 714                         void                                                            incrementFixupCount() { if (_fixupsCount == ((1 << kFixupCountBits)-1))
 715                                                                                                                                                         throwf("too may fixups in %s", name()); ++_fixupsCount; }
 716                         const uint8_t*                                          contentPointer() const;
 717                         uint32_t                                                        fixupCount() const { return _fixupsCount; }
 718                         void                                                            verifyAlignment() const;
 719
 720         typedef typename A::P                                           P;
 721         typedef typename A::P::E                                        E;
 722         typedef typename A::P::uint_t                           pint_t;
 723                                                                                                 // constuct via all attributes
 724                                                                                                 Atom(Section<A>& sct, const char* nm, pint_t addr, uint64_t sz,
 725                                                                                                         ld::Atom::Definition d, ld::Atom::Combine c, ld::Atom::Scope s,
 726                                                                                                         ld::Atom::ContentType ct, ld::Atom::SymbolTableInclusion i,
 727                                                                                                         bool dds, bool thumb, bool al, ld::Atom::Alignment a)
 728                                                                                                                 : ld::Atom((ld::Section&)sct, d, c, s, ct, i, dds, thumb, al, a),
 729                                                                                                                         _size(sz), _objAddress(addr), _name(nm), _hash(0),
 730                                                                                                                         _fixupsStartIndex(0), _lineInfoStartIndex(0),
 731                                                                                                                         _unwindInfoStartIndex(0), _fixupsCount(0),
 732                                                                                                                         _lineInfoCount(0), _unwindInfoCount(0) { }
 733                                                                                                 // construct via symbol table entry
 734                                                                                                 Atom(Section<A>& sct, Parser<A>& parser, const macho_nlist<P>& sym,
 735                                                                                                                                 uint64_t sz, bool alias=false)
 736                                                                                                                 : ld::Atom((ld::Section&)sct, parser.definitionFromSymbol(sym),
 737                                                                                                                                 parser.combineFromSymbol(sym), parser.scopeFromSymbol(sym),
 738                                                                                                                                 parser.resolverFromSymbol(sym) ? ld::Atom::typeResolver : sct.contentType(),
 739                                                                                                                                 parser.inclusionFromSymbol(sym),
 740                                                                                                                                 parser.dontDeadStripFromSymbol(sym) || sct.dontDeadStrip(),
 741                                                                                                                                 parser.isThumbFromSymbol(sym), alias,
 742                                                                                                                                 sct.alignmentForAddress(sym.n_value())),
 743                                                                                                                         _size(sz), _objAddress(sym.n_value()),
 744                                                                                                                         _name(parser.nameFromSymbol(sym)), _hash(0),
 745                                                                                                                         _fixupsStartIndex(0), _lineInfoStartIndex(0),
 746                                                                                                                         _unwindInfoStartIndex(0), _fixupsCount(0),
 747                                                                                                                         _lineInfoCount(0), _unwindInfoCount(0) {
 748                                                                                                                                 // <rdar://problem/6783167> support auto-hidden weak symbols
 749                                                                                                                                 if ( _scope == ld::Atom::scopeGlobal &&
 750                                                                                                                                                 (sym.n_desc() & (N_WEAK_DEF|N_WEAK_REF)) == (N_WEAK_DEF|N_WEAK_REF) )
 751                                                                                                                                         this->setAutoHide();
 752                                                                                                                                         this->verifyAlignment();
 753                                                                                                                         }
 754
 755 private:
 756         friend class Parser<A>;
 757         friend class Section<A>;
 758         friend class CStringSection<A>;
 759         friend class AbsoluteSymbolSection<A>;
 760
 761         pint_t                                                                          _size;
 762         pint_t                                                                          _objAddress;
 763         const char*                                                                     _name;
 764         mutable unsigned long                                           _hash;
 765
 766         uint64_t                                                                        _fixupsStartIndex               : kFixupStartIndexBits,
 767                                                                                                 _lineInfoStartIndex             : kLineInfoStartIndexBits,
 768                                                                                                 _unwindInfoStartIndex   : kUnwindInfoStartIndexBits,
 769                                                                                                 _fixupsCount                    : kFixupCountBits,
 770                                                                                                 _lineInfoCount                  : kLineInfoCountBits,
 771                                                                                                 _unwindInfoCount                : kUnwindInfoCountBits;
 772
 773 };
 774
 775
 776
 777 template <typename A>
 778 void Atom<A>::setFixupsRange(uint32_t startIndex, uint32_t count)
 779 {
 780         if ( count >= (1 << kFixupCountBits) )
 781                 throwf("too many fixups in function %s", this->name());
 782         if ( startIndex >= (1 << kFixupStartIndexBits) )
 783                 throwf("too many fixups in file");
 784         assert(((startIndex+count) <= sect().file()._fixups.size()) && "fixup index out of range");
 785         _fixupsStartIndex = startIndex;
 786         _fixupsCount = count;
 787 }
 788
 789 template <typename A>
 790 void Atom<A>::setUnwindInfoRange(uint32_t startIndex, uint32_t count)
 791 {
 792         if ( count >= (1 << kUnwindInfoCountBits) )
 793                 throwf("too many compact unwind infos in function %s", this->name());
 794         if ( startIndex >= (1 << kUnwindInfoStartIndexBits) )
 795                 throwf("too many compact unwind infos (%d) in file", startIndex);
 796         assert((startIndex+count) <= sect().file()._unwindInfos.size() && "unwindinfo index out of range");
 797         _unwindInfoStartIndex = startIndex;
 798         _unwindInfoCount = count;
 799 }
 800
 801 template <typename A>
 802 void Atom<A>::extendUnwindInfoRange()
 803 {
 804         if ( _unwindInfoCount+1 >= (1 << kUnwindInfoCountBits) )
 805                 throwf("too many compact unwind infos in function %s", this->name());
 806         _unwindInfoCount += 1;
 807 }
 808
 809 template <typename A>
 810 void Atom<A>::setLineInfoRange(uint32_t startIndex, uint32_t count)
 811 {
 812         assert((count < (1 << kLineInfoCountBits)) && "too many line infos");
 813         assert((startIndex+count) < sect().file()._lineInfos.size() && "line info index out of range");
 814         _lineInfoStartIndex = startIndex;
 815         _lineInfoCount = count;
 816 }
 817
 818 template <typename A>
 819 const uint8_t* Atom<A>::contentPointer() const
 820 {
 821         const macho_section<P>* sct = this->sect().machoSection();
 822         uint32_t fileOffset = sct->offset() - sct->addr() + this->_objAddress;
 823         return this->sect().file().fileContent()+fileOffset;
 824 }
 825
 826
 827 template <typename A>
 828 void Atom<A>::copyRawContent(uint8_t buffer[]) const
 829 {
 830         // copy base bytes
 831         if ( this->contentType() == ld::Atom::typeZeroFill ) {
 832                 bzero(buffer, _size);
 833         }
 834         else if ( _size != 0 ) {
 835                 memcpy(buffer, this->contentPointer(), _size);
 836         }
 837 }
 838
 839 template <>
 840 void Atom<arm>::verifyAlignment() const
 841 {
 842         if ( (this->section().type() == ld::Section::typeCode) && ! isThumb() ) {
 843                 if ( ((_objAddress % 4) != 0) || (this->alignment().powerOf2 < 2) )
 844                         warning("ARM function not 4-byte aligned: %s from %s", this->name(), this->file()->path());
 845         }
 846 }
 847
 848 template <typename A>
 849 void Atom<A>::verifyAlignment() const
 850 {
 851 }
 852
 853
 854 template <typename A>
 855 class Parser
 856 {
 857 public:
 858         static bool                                                                             validFile(const uint8_t* fileContent, bool subtypeMustMatch=false,
 859                                                                                                                                 cpu_subtype_t subtype=0);
 860         static const char*                                                              fileKind(const uint8_t* fileContent);
 861         static bool                                                                             hasObjC2Categories(const uint8_t* fileContent);
 862         static bool                                                                             hasObjC1Categories(const uint8_t* fileContent);
 863         static ld::relocatable::File*                                   parse(const uint8_t* fileContent, uint64_t fileLength,
 864                                                                                                                         const char* path, time_t modTime, ld::File::Ordinal ordinal,
 865                                                                                                                          const ParserOptions& opts) {
 866                                                                                                                                 Parser p(fileContent, fileLength, path, modTime,
 867                                                                                                                                                 ordinal, opts.convertUnwindInfo);
 868                                                                                                                                 return p.parse(opts);
 869                                                                                                                 }
 870
 871         typedef typename A::P                                           P;
 872         typedef typename A::P::E                                        E;
 873         typedef typename A::P::uint_t                           pint_t;
 874
 875         struct SourceLocation {
 876                                                                 SourceLocation() {}
 877                                                                 SourceLocation(Atom<A>* a, uint32_t o) : atom(a), offsetInAtom(o) {}
 878                 Atom<A>*        atom;
 879                 uint32_t        offsetInAtom;
 880         };
 881
 882         struct TargetDesc {
 883                 Atom<A>*        atom;
 884                 const char*     name;           // only used if targetAtom is NULL
 885                 int64_t         addend;
 886                 bool            weakImport;     // only used if targetAtom is NULL
 887         };
 888
 889         struct FixupInAtom {
 890                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) :
 891                         fixup(src.offsetInAtom, c, k, target), atom(src.atom) { src.atom->incrementFixupCount(); }
 892
 893                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) :
 894                         fixup(src.offsetInAtom, c, k, b, target), atom(src.atom) { src.atom->incrementFixupCount(); }
 895
 896                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) :
 897                         fixup(src.offsetInAtom, c, k, wi, name), atom(src.atom) { src.atom->incrementFixupCount(); }
 898
 899                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) :
 900                         fixup(src.offsetInAtom, c, k, b, name), atom(src.atom) { src.atom->incrementFixupCount(); }
 901
 902                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) :
 903                         fixup(src.offsetInAtom, c, k, addend), atom(src.atom) { src.atom->incrementFixupCount(); }
 904
 905                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) :
 906                         fixup(src.offsetInAtom, c, k, (uint64_t)0), atom(src.atom) { src.atom->incrementFixupCount(); }
 907
 908                 ld::Fixup               fixup;
 909                 Atom<A>*                atom;
 910         };
 911
 912         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) {
 913                 _allFixups.push_back(FixupInAtom(src, c, k, target));
 914         }
 915
 916         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) {
 917                 _allFixups.push_back(FixupInAtom(src, c, k, b, target));
 918         }
 919
 920         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) {
 921                 _allFixups.push_back(FixupInAtom(src, c, k, wi, name));
 922         }
 923
 924         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) {
 925                 _allFixups.push_back(FixupInAtom(src, c, k, b, name));
 926         }
 927
 928         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) {
 929                 _allFixups.push_back(FixupInAtom(src, c, k, addend));
 930         }
 931
 932         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) {
 933                 _allFixups.push_back(FixupInAtom(src, c, k));
 934         }
 935
 936
 937         uint32_t                                                                                symbolCount() { return _symbolCount; }
 938         uint32_t                                                                                indirectSymbol(uint32_t indirectIndex);
 939         const macho_nlist<P>&                                                   symbolFromIndex(uint32_t index);
 940         const char*                                                                             nameFromSymbol(const macho_nlist<P>& sym);
 941         ld::Atom::Scope                                                                 scopeFromSymbol(const macho_nlist<P>& sym);
 942         static ld::Atom::Definition                                             definitionFromSymbol(const macho_nlist<P>& sym);
 943         static ld::Atom::Combine                                                combineFromSymbol(const macho_nlist<P>& sym);
 944                         ld::Atom::SymbolTableInclusion                  inclusionFromSymbol(const macho_nlist<P>& sym);
 945         static bool                                                                             dontDeadStripFromSymbol(const macho_nlist<P>& sym);
 946         static bool                                                                             isThumbFromSymbol(const macho_nlist<P>& sym);
 947         static bool                                                                             weakImportFromSymbol(const macho_nlist<P>& sym);
 948         static bool                                                                             resolverFromSymbol(const macho_nlist<P>& sym);
 949         uint32_t                                                                                symbolIndexFromIndirectSectionAddress(pint_t,const macho_section<P>*);
 950         const macho_section<P>*                                                 firstMachOSection() { return _sectionsStart; }
 951         const macho_section<P>*                                                 machOSectionFromSectionIndex(uint32_t index);
 952         uint32_t                                                                                machOSectionCount() { return _machOSectionsCount; }
 953         uint32_t                                                                                undefinedStartIndex() { return _undefinedStartIndex; }
 954         uint32_t                                                                                undefinedEndIndex() { return _undefinedEndIndex; }
 955         void                                                                                    addFixup(FixupInAtom f) { _allFixups.push_back(f); }
 956         Section<A>*                                                                             sectionForNum(unsigned int sectNum);
 957         Section<A>*                                                                             sectionForAddress(pint_t addr);
 958         Atom<A>*                                                                                findAtomByAddress(pint_t addr);
 959         Atom<A>*                                                                                findAtomByAddressOrNullIfStub(pint_t addr);
 960         Atom<A>*                                                                                findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom);
 961         Atom<A>*                                                                                findAtomByName(const char* name);       // slow!
 962         void                                                                                    findTargetFromAddress(pint_t addr, TargetDesc& target);
 963         void                                                                                    findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target);
 964         void                                                                                    findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum,
 965                                                                                                                                                                                 TargetDesc& target);
 966         uint32_t                                                                                tentativeDefinitionCount() { return _tentativeDefinitionCount; }
 967         uint32_t                                                                                absoluteSymbolCount() { return _absoluteSymbolCount; }
 968
 969         bool                                                                                    hasStubsSection() { return (_stubsSectionNum != 0); }
 970         unsigned int                                                                    stubsSectionNum() { return _stubsSectionNum; }
 971         void                                                                                    addDtraceExtraInfos(const SourceLocation& src, const char* provider);
 972         const char*                                                                             scanSymbolTableForAddress(uint64_t addr);
 973         bool                                                                                    convertUnwindInfo() { return _convertUnwindInfo; }
 974         bool                                                                                    hasDataInCodeLabels() { return _hasDataInCodeLabels; }
 975
 976
 977         void                                                    addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target);
 978         void                                                    addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase);
 979
 980
 981
 982         struct LabelAndCFIBreakIterator {
 983                 typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
 984                                                                 LabelAndCFIBreakIterator(const uint32_t* ssa, uint32_t ssc, const pint_t* cfisa,
 985                                                                                                                 uint32_t cfisc, bool ols)
 986                                                                         : sortedSymbolIndexes(ssa), sortedSymbolCount(ssc), cfiStartsArray(cfisa),
 987                                                                                 cfiStartsCount(cfisc), fileHasOverlappingSymbols(ols),
 988                                                                                 newSection(false), cfiIndex(0), symIndex(0) {}
 989                 bool                                    next(Parser<A>& parser, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
 990                                                                                 pint_t* addr, pint_t* size, const macho_nlist<P>** sym);
 991                 pint_t                                  peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr);
 992                 void                                    beginSection() { newSection = true; symIndex = 0; }
 993
 994                 const uint32_t* const           sortedSymbolIndexes;
 995                 const uint32_t                          sortedSymbolCount;
 996                 const pint_t*                           cfiStartsArray;
 997                 const uint32_t                          cfiStartsCount;
 998                 const bool                                      fileHasOverlappingSymbols;
 999                 bool                                            newSection;
1000                 uint32_t                                        cfiIndex;
1001                 uint32_t                                        symIndex;
1002         };
1003
1004         struct CFI_CU_InfoArrays {
1005                         typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
1006                         typedef typename CUSection<A>::Info CU_Info;
1007                                                 CFI_CU_InfoArrays(const CFI_Atom_Info* cfiAr, uint32_t cfiC, CU_Info* cuAr, uint32_t cuC)
1008                                                         : cfiArray(cfiAr), cuArray(cuAr), cfiCount(cfiC), cuCount(cuC) {}
1009                 const CFI_Atom_Info* const      cfiArray;
1010                         CU_Info* const                  cuArray;
1011                 const uint32_t                          cfiCount;
1012                 const uint32_t                          cuCount;
1013         };
1014
1015
1016
1017 private:
1018         friend class Section<A>;
1019
1020         enum SectionType { sectionTypeIgnore, sectionTypeLiteral4, sectionTypeLiteral8, sectionTypeLiteral16,
1021                                                 sectionTypeNonLazy, sectionTypeCFI, sectionTypeCString, sectionTypeCStringPointer,
1022                                                 sectionTypeUTF16Strings, sectionTypeCFString, sectionTypeObjC2ClassRefs, typeObjC2CategoryList,
1023                                                 sectionTypeObjC1Classes, sectionTypeSymboled, sectionTypeObjC1ClassRefs,
1024                                                 sectionTypeTentativeDefinitions, sectionTypeAbsoluteSymbols, sectionTypeTLVDefs,
1025                                                 sectionTypeCompactUnwind };
1026
1027         template <typename P>
1028         struct MachOSectionAndSectionClass
1029         {
1030                 const macho_section<P>* sect;
1031                 SectionType                             type;
1032
1033                 static int sorter(const void* l, const void* r) {
1034                         const MachOSectionAndSectionClass<P>* left = (MachOSectionAndSectionClass<P>*)l;
1035                         const MachOSectionAndSectionClass<P>* right = (MachOSectionAndSectionClass<P>*)r;
1036                         int64_t diff = left->sect->addr() - right->sect->addr();
1037                         if ( diff == 0 )
1038                                 return 0;
1039                         if ( diff < 0 )
1040                                 return -1;
1041                         else
1042                                 return 1;
1043                 }
1044         };
1045
1046         struct ParserAndSectionsArray { Parser* parser; const uint32_t* sortedSectionsArray; };
1047
1048
1049                                                                                                         Parser(const uint8_t* fileContent, uint64_t fileLength,
1050                                                                                                                         const char* path, time_t modTime,
1051                                                                                                                         ld::File::Ordinal ordinal, bool convertUnwindInfo);
1052         ld::relocatable::File*                                                  parse(const ParserOptions& opts);
1053         uint8_t                                                                                 loadCommandSizeMask();
1054         bool                                                                                    parseLoadCommands();
1055         void                                                                                    makeSections();
1056         void                                                                                    prescanSymbolTable();
1057         void                                                                                    makeSortedSymbolsArray(uint32_t symArray[], const uint32_t sectionArray[]);
1058         void                                                                                    makeSortedSectionsArray(uint32_t array[]);
1059         static int                                                                              pointerSorter(const void* l, const void* r);
1060         static int                                                                              symbolIndexSorter(void* extra, const void* l, const void* r);
1061         static int                                                                              sectionIndexSorter(void* extra, const void* l, const void* r);
1062
1063         void                                                                                    parseDebugInfo();
1064         void                                                                                    parseStabs();
1065         static bool                                                                             isConstFunStabs(const char *stabStr);
1066         bool                                                                                    read_comp_unit(const char ** name, const char ** comp_dir,
1067                                                                                                                                                                                                 uint64_t *stmt_list);
1068         const char*                                                                             getDwarfString(uint64_t form, const uint8_t* p);
1069         bool                                                                                    skip_form(const uint8_t ** offset, const uint8_t * end,
1070                                                                                                                                 uint64_t form, uint8_t addr_size, bool dwarf64);
1071
1072
1073         // filled in by constructor
1074         const uint8_t*                                                          _fileContent;
1075         uint32_t                                                                        _fileLength;
1076         const char*                                                                     _path;
1077         time_t                                                                          _modTime;
1078         ld::File::Ordinal                                                       _ordinal;
1079
1080         // filled in by parseLoadCommands()
1081         File<A>*                                                                        _file;
1082         const macho_nlist<P>*                                           _symbols;
1083         uint32_t                                                                        _symbolCount;
1084         const char*                                                                     _strings;
1085         uint32_t                                                                        _stringsSize;
1086         const uint32_t*                                                         _indirectTable;
1087         uint32_t                                                                        _indirectTableCount;
1088         uint32_t                                                                        _undefinedStartIndex;
1089         uint32_t                                                                        _undefinedEndIndex;
1090         const macho_section<P>*                                         _sectionsStart;
1091         uint32_t                                                                        _machOSectionsCount;
1092         bool                                                                            _hasUUID;
1093
1094         // filled in by parse()
1095         CFISection<A>*                                                          _EHFrameSection;
1096         CUSection<A>*                                                           _compactUnwindSection;
1097         AbsoluteSymbolSection<A>*                                       _absoluteSection;
1098         uint32_t                                                                        _tentativeDefinitionCount;
1099         uint32_t                                                                        _absoluteSymbolCount;
1100         uint32_t                                                                        _symbolsInSections;
1101         bool                                                                            _hasLongBranchStubs;
1102         bool                                                                            _AppleObjc; // FSF has objc that uses different data layout
1103         bool                                                                            _overlappingSymbols;
1104         bool                                                                            _convertUnwindInfo;
1105         bool                                                                            _hasDataInCodeLabels;
1106         unsigned int                                                            _stubsSectionNum;
1107         const macho_section<P>*                                         _stubsMachOSection;
1108         std::vector<const char*>                                        _dtraceProviderInfo;
1109         std::vector<FixupInAtom>                                        _allFixups;
1110 };
1111
1112
1113
1114 template <typename A>
1115 Parser<A>::Parser(const uint8_t* fileContent, uint64_t fileLength, const char* path, time_t modTime,
1116                                         ld::File::Ordinal ordinal, bool convertDUI)
1117                 : _fileContent(fileContent), _fileLength(fileLength), _path(path), _modTime(modTime),
1118                         _ordinal(ordinal), _file(NULL),
1119                         _symbols(NULL), _symbolCount(0), _strings(NULL), _stringsSize(0),
1120                         _indirectTable(NULL), _indirectTableCount(0),
1121                         _undefinedStartIndex(0), _undefinedEndIndex(0),
1122                         _sectionsStart(NULL), _machOSectionsCount(0), _hasUUID(false),
1123                         _EHFrameSection(NULL), _compactUnwindSection(NULL), _absoluteSection(NULL),
1124                         _tentativeDefinitionCount(0), _absoluteSymbolCount(0),
1125                         _symbolsInSections(0), _hasLongBranchStubs(false),  _AppleObjc(false),
1126                         _overlappingSymbols(false), _convertUnwindInfo(convertDUI), _hasDataInCodeLabels(false),
1127                         _stubsSectionNum(0), _stubsMachOSection(NULL)
1128 {
1129 }
1130
1131
1132 template <>
1133 bool Parser<x86>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1134 {
1135         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1136         if ( header->magic() != MH_MAGIC )
1137                 return false;
1138         if ( header->cputype() != CPU_TYPE_I386 )
1139                 return false;
1140         if ( header->filetype() != MH_OBJECT )
1141                 return false;
1142         return true;
1143 }
1144
1145 template <>
1146 bool Parser<x86_64>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1147 {
1148         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1149         if ( header->magic() != MH_MAGIC_64 )
1150                 return false;
1151         if ( header->cputype() != CPU_TYPE_X86_64 )
1152                 return false;
1153         if ( header->filetype() != MH_OBJECT )
1154                 return false;
1155         return true;
1156 }
1157
1158 template <>
1159 bool Parser<arm>::validFile(const uint8_t* fileContent, bool subtypeMustMatch, cpu_subtype_t subtype)
1160 {
1161         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1162         if ( header->magic() != MH_MAGIC )
1163                 return false;
1164         if ( header->cputype() != CPU_TYPE_ARM )
1165                 return false;
1166         if ( header->filetype() != MH_OBJECT )
1167                 return false;
1168         if ( subtypeMustMatch ) {
1169                 if ( (cpu_subtype_t)header->cpusubtype() == subtype )
1170                         return true;
1171                 // hack until libcc_kext.a is made fat
1172                 if ( header->cpusubtype() == CPU_SUBTYPE_ARM_ALL )
1173                         return true;
1174                 return false;
1175         }
1176         return true;
1177 }
1178
1179
1180
1181 template <>
1182 const char* Parser<x86>::fileKind(const uint8_t* fileContent)
1183 {
1184         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1185         if ( header->magic() != MH_MAGIC )
1186                 return NULL;
1187         if ( header->cputype() != CPU_TYPE_I386 )
1188                 return NULL;
1189         return "i386";
1190 }
1191
1192 template <>
1193 const char* Parser<x86_64>::fileKind(const uint8_t* fileContent)
1194 {
1195         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1196         if ( header->magic() != MH_MAGIC )
1197                 return NULL;
1198         if ( header->cputype() != CPU_TYPE_X86_64 )
1199                 return NULL;
1200         return "x86_64";
1201 }
1202
1203 template <>
1204 const char* Parser<arm>::fileKind(const uint8_t* fileContent)
1205 {
1206         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1207         if ( header->magic() != MH_MAGIC )
1208                 return NULL;
1209         if ( header->cputype() != CPU_TYPE_ARM )
1210                 return NULL;
1211         for (const ArchInfo* t=archInfoArray; t->archName != NULL; ++t) {
1212                 if ( (t->cpuType == CPU_TYPE_ARM) && ((cpu_subtype_t)header->cpusubtype() == t->cpuSubType) ) {
1213                         return t->archName;
1214                 }
1215         }
1216         return "arm???";
1217 }
1218
1219
1220 template <typename A>
1221 bool Parser<A>::hasObjC2Categories(const uint8_t* fileContent)
1222 {
1223         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1224         const uint32_t cmd_count = header->ncmds();
1225         const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1226         const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1227         const macho_load_command<P>* cmd = cmds;
1228         for (uint32_t i = 0; i < cmd_count; ++i) {
1229                 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1230                         const macho_segment_command<P>* segment = (macho_segment_command<P>*)cmd;
1231                         const macho_section<P>* sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1232                         for (uint32_t si=0; si < segment->nsects(); ++si) {
1233                                 const macho_section<P>* sect = &sectionsStart[si];
1234                                 if ( (sect->size() > 0)
1235                                         && (strcmp(sect->sectname(), "__objc_catlist") == 0)
1236                                         && (strcmp(sect->segname(), "__DATA") == 0) ) {
1237                                                 return true;
1238                                 }
1239                         }
1240                 }
1241                 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1242                 if ( cmd > cmdsEnd )
1243                         throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1244         }
1245         return false;
1246 }
1247
1248
1249 template <typename A>
1250 bool Parser<A>::hasObjC1Categories(const uint8_t* fileContent)
1251 {
1252         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1253         const uint32_t cmd_count = header->ncmds();
1254         const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1255         const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1256         const macho_load_command<P>* cmd = cmds;
1257         for (uint32_t i = 0; i < cmd_count; ++i) {
1258                 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1259                         const macho_segment_command<P>* segment = (macho_segment_command<P>*)cmd;
1260                         const macho_section<P>* sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1261                         for (uint32_t si=0; si < segment->nsects(); ++si) {
1262                                 const macho_section<P>* sect = &sectionsStart[si];
1263                                 if ( (sect->size() > 0)
1264                                         && (strcmp(sect->sectname(), "__category") == 0)
1265                                         && (strcmp(sect->segname(), "__OBJC") == 0) ) {
1266                                                 return true;
1267                                 }
1268                         }
1269                 }
1270                 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1271                 if ( cmd > cmdsEnd )
1272                         throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1273         }
1274         return false;
1275 }
1276
1277 template <typename A>
1278 int Parser<A>::pointerSorter(const void* l, const void* r)
1279 {
1280         // sort references by address
1281         const pint_t* left = (pint_t*)l;
1282         const pint_t* right = (pint_t*)r;
1283         return (*left - *right);
1284 }
1285
1286 template <typename A>
1287 typename A::P::uint_t Parser<A>::LabelAndCFIBreakIterator::peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr)
1288 {
1289         pint_t symbolAddr;
1290         if ( symIndex < sortedSymbolCount )
1291                 symbolAddr = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]).n_value();
1292         else
1293                 symbolAddr = endAddr;
1294         pint_t cfiAddr;
1295         if ( cfiIndex < cfiStartsCount )
1296                 cfiAddr = cfiStartsArray[cfiIndex];
1297         else
1298                 cfiAddr = endAddr;
1299         if ( (cfiAddr < symbolAddr) && (cfiAddr >= startAddr) ) {
1300                 if ( cfiAddr <  endAddr )
1301                         return cfiAddr;
1302                 else
1303                         return endAddr;
1304         }
1305         else  {
1306                 if ( symbolAddr <  endAddr )
1307                         return symbolAddr;
1308                 else
1309                         return endAddr;
1310         }
1311 }
1312
1313 //
1314 // Parses up a section into chunks based on labels and CFI information.
1315 // Each call returns the next chunk address and size, and (if the break
1316 // was becuase of a label, the symbol). Returns false when no more chunks.
1317 //
1318 template <typename A>
1319 bool Parser<A>::LabelAndCFIBreakIterator::next(Parser<A>& parser, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
1320                                                                                                 pint_t* addr, pint_t* size, const macho_nlist<P>** symbol)
1321 {
1322         // may not be a label on start of section, but need atom demarcation there
1323         if ( newSection ) {
1324                 newSection = false;
1325                 // advance symIndex until we get to the first label at or past the start of this section
1326                 while ( symIndex < sortedSymbolCount ) {
1327                         const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1328                         pint_t nextSymbolAddr = sym.n_value();
1329                         //fprintf(stderr, "sectNum=%d, nextSymbolAddr=0x%08llX, name=%s\n", sectNum, (uint64_t)nextSymbolAddr, parser.nameFromSymbol(sym));
1330                         if ( (nextSymbolAddr > startAddr) || ((nextSymbolAddr == startAddr) && (sym.n_sect() == sectNum)) )
1331                                 break;
1332                         ++symIndex;
1333                 }
1334                 if ( symIndex < sortedSymbolCount ) {
1335                         const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1336                         pint_t nextSymbolAddr = sym.n_value();
1337                         // if next symbol found is not in this section
1338                         if ( sym.n_sect() != sectNum ) {
1339                                 // check for CFI break instead of symbol break
1340                                 if ( cfiIndex < cfiStartsCount ) {
1341                                         pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1342                                         if ( nextCfiAddr < endAddr ) {
1343                                                 // use cfi
1344                                                 ++cfiIndex;
1345                                                 *addr = nextCfiAddr;
1346                                                 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1347                                                 *symbol = NULL;
1348                                                 return true;
1349                                         }
1350                                 }
1351                                 *addr = startAddr;
1352                                 *size = endAddr - startAddr;
1353                                 *symbol = NULL;
1354                                 if ( startAddr == endAddr )
1355                                         return false;  // zero size section
1356                                 else
1357                                         return true;  // whole section is one atom with no label
1358                         }
1359                         // if also CFI break here, eat it
1360                         if ( cfiIndex < cfiStartsCount ) {
1361                                 if ( cfiStartsArray[cfiIndex] == nextSymbolAddr )
1362                                         ++cfiIndex;
1363                         }
1364                         if ( nextSymbolAddr == startAddr ) {
1365                                 // label at start of section, return it as chunk
1366                                 ++symIndex;
1367                                 *addr = startAddr;
1368                                 *size = peek(parser, startAddr, endAddr) - startAddr;
1369                                 *symbol = &sym;
1370                                 return true;
1371                         }
1372                         // return chunk before first symbol
1373                         *addr = startAddr;
1374                         *size = nextSymbolAddr - startAddr;
1375                         *symbol = NULL;
1376                         return true;
1377                 }
1378                 // no symbols left in whole file, so entire section is one chunk
1379                 *addr = startAddr;
1380                 *size = endAddr - startAddr;
1381                 *symbol = NULL;
1382                 if ( startAddr == endAddr )
1383                         return false;  // zero size section
1384                 else
1385                         return true;  // whole section is one atom with no label
1386         }
1387
1388         while ( (symIndex < sortedSymbolCount) && (cfiIndex < cfiStartsCount) ) {
1389                 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1390                 pint_t nextSymbolAddr = sym.n_value();
1391                 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1392                 if ( nextSymbolAddr <  nextCfiAddr ) {
1393                         if ( nextSymbolAddr >= endAddr )
1394                                 return false;
1395                         ++symIndex;
1396                         if ( nextSymbolAddr < startAddr )
1397                                 continue;
1398                         *addr = nextSymbolAddr;
1399                         *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1400                         *symbol = &sym;
1401                         return true;
1402                 }
1403                 else if ( nextCfiAddr < nextSymbolAddr ) {
1404                         if ( nextCfiAddr >= endAddr )
1405                                 return false;
1406                         ++cfiIndex;
1407                         if ( nextCfiAddr < startAddr )
1408                                 continue;
1409                         *addr = nextCfiAddr;
1410                         *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1411                         *symbol = NULL;
1412                         return true;
1413                 }
1414                 else {
1415                         if ( nextCfiAddr >= endAddr )
1416                                 return false;
1417                         ++symIndex;
1418                         ++cfiIndex;
1419                         if ( nextCfiAddr < startAddr )
1420                                 continue;
1421                         *addr = nextCfiAddr;
1422                         *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1423                         *symbol = &sym;
1424                         return true;
1425                 }
1426         }
1427         while ( symIndex < sortedSymbolCount ) {
1428                 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1429                 pint_t nextSymbolAddr = sym.n_value();
1430                 // if next symbol found is not in this section, then done with iteration
1431                 if ( sym.n_sect() != sectNum )
1432                         return false;
1433                 ++symIndex;
1434                 if ( nextSymbolAddr < startAddr )
1435                         continue;
1436                 *addr = nextSymbolAddr;
1437                 *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1438                 *symbol = &sym;
1439                 return true;
1440         }
1441         while ( cfiIndex < cfiStartsCount ) {
1442                 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1443                 if ( nextCfiAddr >= endAddr )
1444                         return false;
1445                 ++cfiIndex;
1446                 if ( nextCfiAddr < startAddr )
1447                         continue;
1448                 *addr = nextCfiAddr;
1449                 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1450                 *symbol = NULL;
1451                 return true;
1452         }
1453         return false;
1454 }
1455
1456
1457
1458 template <typename A>
1459 ld::relocatable::File* Parser<A>::parse(const ParserOptions& opts)
1460 {
1461         // create file object
1462         _file = new File<A>(_path, _modTime, _fileContent, _ordinal);
1463
1464         // respond to -t option
1465         if ( opts.logAllFiles )
1466                 printf("%s\n", _path);
1467
1468         // parse start of mach-o file
1469         if ( ! parseLoadCommands() )
1470                 return _file;
1471
1472         // make array of
1473         uint32_t sortedSectionIndexes[_machOSectionsCount];
1474         this->makeSortedSectionsArray(sortedSectionIndexes);
1475
1476         // make symbol table sorted by address
1477         this->prescanSymbolTable();
1478         uint32_t sortedSymbolIndexes[_symbolsInSections];
1479         this->makeSortedSymbolsArray(sortedSymbolIndexes, sortedSectionIndexes);
1480
1481         // allocate Section<A> object for each mach-o section
1482         makeSections();
1483
1484         // if it exists, do special early parsing of __compact_unwind section
1485         uint32_t countOfCUs = 0;
1486         if ( _compactUnwindSection != NULL )
1487                 countOfCUs = _compactUnwindSection->count();
1488         uint8_t cuInfoBuffer[sizeof(typename CUSection<A>::Info) * countOfCUs];
1489         typename CUSection<A>::Info*  cuInfoArray = (typename CUSection<A>::Info*)cuInfoBuffer;
1490         if ( countOfCUs != 0 )
1491                 _compactUnwindSection->parse(*this, countOfCUs, cuInfoArray);
1492
1493         // if it exists, do special early parsing of __eh_frame section
1494         // stack allocate array of CFI_Atom_Info
1495         uint32_t countOfCFIs = 0;
1496         if ( _EHFrameSection != NULL )
1497                 countOfCFIs = _EHFrameSection->cfiCount();
1498         typename CFISection<A>::CFI_Atom_Info  cfiArray[countOfCFIs];
1499         // stack allocate (if not too large) a copy of __eh_frame to apply relocations to
1500         uint8_t* ehBuffer = NULL;
1501         uint32_t stackAllocSize = 0;
1502         if ( (countOfCFIs != 0) && _EHFrameSection->needsRelocating() ) {
1503                 uint32_t sectSize = _EHFrameSection->machoSection()->size();
1504                 if ( sectSize > 50*1024 )
1505                         ehBuffer = (uint8_t*)malloc(sectSize);
1506                 else
1507                         stackAllocSize = sectSize;
1508         }
1509         uint32_t ehStackBuffer[1+stackAllocSize/4]; // make 4-byte aligned stack bufffer
1510         if ( ehBuffer == NULL )
1511                 ehBuffer = (uint8_t*)&ehStackBuffer;
1512         uint32_t cfiStartsCount = 0;
1513         if ( countOfCFIs != 0 ) {
1514                 _EHFrameSection->cfiParse(*this, ehBuffer, cfiArray, countOfCFIs);
1515                 // count functions and lsdas
1516                 for(uint32_t i=0; i < countOfCFIs; ++i) {
1517                         if ( cfiArray[i].isCIE )
1518                                 continue;
1519                         //fprintf(stderr, "cfiArray[i].func = 0x%08llX, cfiArray[i].lsda = 0x%08llX, encoding=0x%08X\n",
1520                         //                      (uint64_t)cfiArray[i].u.fdeInfo.function.targetAddress,
1521                         //                      (uint64_t)cfiArray[i].u.fdeInfo.lsda.targetAddress,
1522                         //                      cfiArray[i].u.fdeInfo.compactUnwindInfo);
1523                         if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1524                                 ++cfiStartsCount;
1525                         if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1526                                 ++cfiStartsCount;
1527                 }
1528         }
1529         CFI_CU_InfoArrays cfis(cfiArray, countOfCFIs, cuInfoArray, countOfCUs);
1530
1531         // create sorted array of function starts and lsda starts
1532         pint_t cfiStartsArray[cfiStartsCount];
1533         uint32_t countOfFDEs = 0;
1534         if ( countOfCFIs != 0 ) {
1535                 int index = 0;
1536                 for(uint32_t i=0; i < countOfCFIs; ++i) {
1537                         if ( cfiArray[i].isCIE )
1538                                 continue;
1539                         if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1540                                 cfiStartsArray[index++] = cfiArray[i].u.fdeInfo.function.targetAddress;
1541                         if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1542                                 cfiStartsArray[index++] = cfiArray[i].u.fdeInfo.lsda.targetAddress;
1543                         ++countOfFDEs;
1544                 }
1545                 ::qsort(cfiStartsArray, cfiStartsCount, sizeof(pint_t), pointerSorter);
1546         #ifndef NDEBUG
1547                 // scan for FDEs claming the same function
1548                 for(int i=1; i < index; ++i) {
1549                         assert( cfiStartsArray[i] != cfiStartsArray[i-1] );
1550                 }
1551         #endif
1552         }
1553
1554         Section<A>** sections = _file->_sectionsArray;
1555         uint32_t        sectionsCount = _file->_sectionsArrayCount;
1556
1557         // figure out how many atoms will be allocated and allocate
1558         LabelAndCFIBreakIterator breakIterator(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1559                                                                                         cfiStartsCount, _overlappingSymbols);
1560         uint32_t computedAtomCount = 0;
1561         for (uint32_t i=0; i < sectionsCount; ++i ) {
1562                 breakIterator.beginSection();
1563                 uint32_t count = sections[i]->computeAtomCount(*this, breakIterator, cfis);
1564                 //const macho_section<P>* sect = sections[i]->machoSection();
1565                 //fprintf(stderr, "computed count=%u for section %s size=%llu\n", count, sect->sectname(), (sect != NULL) ? sect->size() : 0);
1566                 computedAtomCount += count;
1567         }
1568         //fprintf(stderr, "allocating %d atoms * sizeof(Atom<A>)=%ld, sizeof(ld::Atom)=%ld\n", computedAtomCount, sizeof(Atom<A>), sizeof(ld::Atom));
1569         _file->_atomsArray = new uint8_t[computedAtomCount*sizeof(Atom<A>)];
1570         _file->_atomsArrayCount = 0;
1571
1572         // have each section append atoms to _atomsArray
1573         LabelAndCFIBreakIterator breakIterator2(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1574                                                                                                 cfiStartsCount, _overlappingSymbols);
1575         for (uint32_t i=0; i < sectionsCount; ++i ) {
1576                 uint8_t* atoms = _file->_atomsArray + _file->_atomsArrayCount*sizeof(Atom<A>);
1577                 breakIterator2.beginSection();
1578                 uint32_t count = sections[i]->appendAtoms(*this, atoms, breakIterator2, cfis);
1579                 //fprintf(stderr, "append count=%u for section %s/%s\n", count, sections[i]->machoSection()->segname(), sections[i]->machoSection()->sectname());
1580                 _file->_atomsArrayCount += count;
1581         }
1582         assert( _file->_atomsArrayCount == computedAtomCount && "more atoms allocated than expected");
1583
1584
1585         // have each section add all fix-ups for its atoms
1586         _allFixups.reserve(computedAtomCount*5);
1587         for (uint32_t i=0; i < sectionsCount; ++i )
1588                 sections[i]->makeFixups(*this, cfis);
1589
1590         // assign fixups start offset for each atom
1591         uint8_t* p = _file->_atomsArray;
1592         uint32_t fixupOffset = 0;
1593         for(int i=_file->_atomsArrayCount; i > 0; --i) {
1594                 Atom<A>* atom = (Atom<A>*)p;
1595                 atom->_fixupsStartIndex = fixupOffset;
1596                 fixupOffset += atom->_fixupsCount;
1597                 atom->_fixupsCount = 0;
1598                 p += sizeof(Atom<A>);
1599         }
1600         assert(fixupOffset == _allFixups.size());
1601         _file->_fixups.reserve(fixupOffset);
1602
1603         // copy each fixup for each atom
1604         for(typename std::vector<FixupInAtom>::iterator it=_allFixups.begin(); it != _allFixups.end(); ++it) {
1605                 uint32_t slot = it->atom->_fixupsStartIndex + it->atom->_fixupsCount;
1606                 _file->_fixups[slot] = it->fixup;
1607                 it->atom->_fixupsCount++;
1608         }
1609
1610         // done with temp vector
1611         _allFixups.clear();
1612
1613         // add unwind info
1614         _file->_unwindInfos.reserve(countOfFDEs+countOfCUs);
1615         for(uint32_t i=0; i < countOfCFIs; ++i) {
1616                 if ( cfiArray[i].isCIE )
1617                         continue;
1618                 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS ) {
1619                         ld::Atom::UnwindInfo info;
1620                         info.startOffset = 0;
1621                         info.unwindInfo = cfiArray[i].u.fdeInfo.compactUnwindInfo;
1622                         _file->_unwindInfos.push_back(info);
1623                         Atom<A>* func = findAtomByAddress(cfiArray[i].u.fdeInfo.function.targetAddress);
1624                         func->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1625                 }
1626         }
1627         // apply compact infos in __LD,__compact_unwind section to each function
1628         // if function also has dwarf unwind, CU will override it
1629         Atom<A>* lastFunc = NULL;
1630         uint32_t lastEnd = 0;
1631         for(uint32_t i=0; i < countOfCUs; ++i) {
1632                 typename CUSection<A>::Info* info = &cuInfoArray[i];
1633                 assert(info->function != NULL);
1634                 ld::Atom::UnwindInfo ui;
1635                 ui.startOffset = info->functionStartAddress - info->function->objectAddress();
1636                 ui.unwindInfo = info->compactUnwindInfo;
1637                 _file->_unwindInfos.push_back(ui);
1638                 // if previous is for same function, extend range
1639                 if ( info->function == lastFunc ) {
1640                         if ( lastEnd != ui.startOffset ) {
1641                                 if ( lastEnd < ui.startOffset )
1642                                         warning("__LD,__compact_unwind entries for %s have a gap at offset 0x%0X", info->function->name(), lastEnd);
1643                                 else
1644                                         warning("__LD,__compact_unwind entries for %s overlap at offset 0x%0X", info->function->name(), lastEnd);
1645                         }
1646                         lastFunc->extendUnwindInfoRange();
1647                 }
1648                 else
1649                         info->function->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1650                 lastFunc = info->function;
1651                 lastEnd = ui.startOffset + info->rangeLength;
1652         }
1653
1654         // parse dwarf debug info to get line info
1655         this->parseDebugInfo();
1656
1657         return _file;
1658 }
1659
1660
1661
1662 template <> uint8_t Parser<x86>::loadCommandSizeMask()          { return 0x03; }
1663 template <> uint8_t Parser<x86_64>::loadCommandSizeMask()       { return 0x07; }
1664 template <> uint8_t Parser<arm>::loadCommandSizeMask()          { return 0x03; }
1665
1666 template <typename A>
1667 bool Parser<A>::parseLoadCommands()
1668 {
1669         const macho_header<P>* header = (const macho_header<P>*)_fileContent;
1670
1671         // set File attributes
1672         _file->_canScatterAtoms = (header->flags() & MH_SUBSECTIONS_VIA_SYMBOLS);
1673         _file->_cpuSubType = header->cpusubtype();
1674
1675         const macho_segment_command<P>* segment = NULL;
1676         const uint8_t* const endOfFile = _fileContent + _fileLength;
1677         const uint32_t cmd_count = header->ncmds();
1678         // <rdar://problem/5394172> an empty .o file with zero load commands will crash linker
1679         if ( cmd_count == 0 )
1680                 return false;
1681         const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1682         const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1683         const macho_load_command<P>* cmd = cmds;
1684         for (uint32_t i = 0; i < cmd_count; ++i) {
1685                 uint32_t size = cmd->cmdsize();
1686                 if ( (size & this->loadCommandSizeMask()) != 0 )
1687                         throwf("load command #%d has a unaligned size", i);
1688                 const uint8_t* endOfCmd = ((uint8_t*)cmd)+cmd->cmdsize();
1689                 if ( endOfCmd > (uint8_t*)cmdsEnd )
1690                         throwf("load command #%d extends beyond the end of the load commands", i);
1691                 if ( endOfCmd > endOfFile )
1692                         throwf("load command #%d extends beyond the end of the file", i);
1693                 switch (cmd->cmd()) {
1694                     case LC_SYMTAB:
1695                                 {
1696                                         const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
1697                                         _symbolCount = symtab->nsyms();
1698                                         _symbols = (const macho_nlist<P>*)(_fileContent + symtab->symoff());
1699                                         _strings = (char*)_fileContent + symtab->stroff();
1700                                         _stringsSize = symtab->strsize();
1701                                         if ( (symtab->symoff() + _symbolCount*sizeof(macho_nlist<P>)) > _fileLength )
1702                                                 throw "mach-o symbol table extends beyond end of file";
1703                                         if ( (_strings + _stringsSize) > (char*)endOfFile )
1704                                                 throw "mach-o string pool extends beyond end of file";
1705                                         if ( _indirectTable == NULL ) {
1706                                                 if ( _undefinedEndIndex == 0 ) {
1707                                                         _undefinedStartIndex = 0;
1708                                                         _undefinedEndIndex = symtab->nsyms();
1709                                                 }
1710                                         }
1711                                 }
1712                                 break;
1713                         case LC_DYSYMTAB:
1714                                 {
1715                                         const macho_dysymtab_command<P>* dsymtab = (macho_dysymtab_command<P>*)cmd;
1716                                         _indirectTable = (uint32_t*)(_fileContent + dsymtab->indirectsymoff());
1717                                         _indirectTableCount = dsymtab->nindirectsyms();
1718                                         if ( &_indirectTable[_indirectTableCount] > (uint32_t*)endOfFile )
1719                                                 throw "indirect symbol table extends beyond end of file";
1720                                         _undefinedStartIndex = dsymtab->iundefsym();
1721                                         _undefinedEndIndex = _undefinedStartIndex + dsymtab->nundefsym();
1722                                 }
1723                                 break;
1724                     case LC_UUID:
1725                                 _hasUUID = true;
1726                                 break;
1727
1728                         default:
1729                                 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1730                                         if ( segment != NULL )
1731                                                 throw "more than one LC_SEGMENT found in object file";
1732                                         segment = (macho_segment_command<P>*)cmd;
1733                                 }
1734                                 break;
1735                 }
1736                 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1737                 if ( cmd > cmdsEnd )
1738                         throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1739         }
1740
1741         // record range of sections
1742         if ( segment == NULL )
1743                 throw "missing LC_SEGMENT";
1744         _sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1745         _machOSectionsCount = segment->nsects();
1746
1747         return true;
1748 }
1749
1750
1751 template <typename A>
1752 void Parser<A>::prescanSymbolTable()
1753 {
1754         _tentativeDefinitionCount = 0;
1755         _absoluteSymbolCount = 0;
1756         _symbolsInSections = 0;
1757         _hasDataInCodeLabels = false;
1758         for (uint32_t i=0; i < this->_symbolCount; ++i) {
1759                 const macho_nlist<P>& sym =     symbolFromIndex(i);
1760                 // ignore stabs
1761                 if ( (sym.n_type() & N_STAB) != 0 )
1762                         continue;
1763
1764                 // look at undefines
1765                 const char* symbolName = this->nameFromSymbol(sym);
1766                 if ( (sym.n_type() & N_TYPE) == N_UNDF ) {
1767                         if ( sym.n_value() != 0 ) {
1768                                 // count tentative definitions
1769                                 ++_tentativeDefinitionCount;
1770                         }
1771                         else if ( strncmp(symbolName, "___dtrace_", 10) == 0 ) {
1772                                 // any undefined starting with __dtrace_*$ that is not ___dtrace_probe$* or ___dtrace_isenabled$*
1773                                 // is extra provider info
1774                                 if ( (strncmp(&symbolName[10], "probe$", 6) != 0) && (strncmp(&symbolName[10], "isenabled$", 10) != 0) ) {
1775                                         _dtraceProviderInfo.push_back(symbolName);
1776                                 }
1777                         }
1778                         continue;
1779                 }
1780
1781                 // count absolute symbols
1782                 if ( (sym.n_type() & N_TYPE) == N_ABS ) {
1783                         const char* absName = this->nameFromSymbol(sym);
1784                         // ignore .objc_class_name_* symbols
1785                         if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
1786                                 _AppleObjc = true;
1787                                 continue;
1788                         }
1789                         // ignore .objc_class_name_* symbols
1790                         if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
1791                                 continue;
1792                         // ignore empty *.eh symbols
1793                         if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
1794                                 continue;
1795                         ++_absoluteSymbolCount;
1796                 }
1797
1798                 // only look at definitions
1799                 if ( (sym.n_type() & N_TYPE) != N_SECT )
1800                         continue;
1801
1802                 // 'L' labels do not denote atom breaks
1803                 if ( symbolName[0] == 'L' ) {
1804                         // <rdar://problem/9218847> Formalize data in code with L$start$ labels
1805                         if ( strncmp(symbolName, "L$start$", 8) == 0 )
1806                                 _hasDataInCodeLabels = true;
1807                         continue;
1808                 }
1809                 // how many def syms in each section
1810                 if ( sym.n_sect() > _machOSectionsCount )
1811                         throw "bad n_sect in symbol table";
1812
1813                 _symbolsInSections++;
1814         }
1815 }
1816
1817 template <typename A>
1818 int Parser<A>::sectionIndexSorter(void* extra, const void* l, const void* r)
1819 {
1820         Parser<A>* parser = (Parser<A>*)extra;
1821         const uint32_t* left = (uint32_t*)l;
1822         const uint32_t* right = (uint32_t*)r;
1823         const macho_section<P>* leftSect =      parser->machOSectionFromSectionIndex(*left);
1824         const macho_section<P>* rightSect = parser->machOSectionFromSectionIndex(*right);
1825
1826         // can't just return difference because 64-bit diff does not fit in 32-bit return type
1827         int64_t result = leftSect->addr() - rightSect->addr();
1828         if ( result == 0 ) {
1829                 // two sections with same start address
1830                 // one with zero size goes first
1831                 bool leftEmpty = ( leftSect->size() == 0 );
1832                 bool rightEmpty = ( rightSect->size() == 0 );
1833                 if ( leftEmpty != rightEmpty ) {
1834                         return ( rightEmpty ? 1 : -1 );
1835                 }
1836                 if ( !leftEmpty && !rightEmpty )
1837                         throwf("overlapping sections");
1838                 // both empty, so chose file order
1839                 return ( rightSect - leftSect );
1840         }
1841         else if ( result < 0 )
1842                 return -1;
1843         else
1844                 return 1;
1845 }
1846
1847 template <typename A>
1848 void Parser<A>::makeSortedSectionsArray(uint32_t array[])
1849 {
1850         const bool log = false;
1851
1852         if ( log ) {
1853                 fprintf(stderr, "unsorted sections:\n");
1854                 for(unsigned int i=0; i < _machOSectionsCount; ++i )
1855                         fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[i].addr(), _sectionsStart[i].segname(), _sectionsStart[i].sectname());
1856         }
1857
1858         // sort by symbol table address
1859         for (uint32_t i=0; i < _machOSectionsCount; ++i)
1860                 array[i] = i;
1861         ::qsort_r(array, _machOSectionsCount, sizeof(uint32_t), this, &sectionIndexSorter);
1862
1863         if ( log ) {
1864                 fprintf(stderr, "sorted sections:\n");
1865                 for(unsigned int i=0; i < _machOSectionsCount; ++i )
1866                         fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[array[i]].addr(), _sectionsStart[array[i]].segname(), _sectionsStart[array[i]].sectname());
1867         }
1868 }
1869
1870
1871
1872 template <typename A>
1873 int Parser<A>::symbolIndexSorter(void* extra, const void* l, const void* r)
1874 {
1875         ParserAndSectionsArray* extraInfo = (ParserAndSectionsArray*)extra;
1876         Parser<A>* parser = extraInfo->parser;
1877         const uint32_t* sortedSectionsArray = extraInfo->sortedSectionsArray;
1878         const uint32_t* left = (uint32_t*)l;
1879         const uint32_t* right = (uint32_t*)r;
1880         const macho_nlist<P>& leftSym = parser->symbolFromIndex(*left);
1881         const macho_nlist<P>& rightSym = parser->symbolFromIndex(*right);
1882         // can't just return difference because 64-bit diff does not fit in 32-bit return type
1883         int64_t result = leftSym.n_value() - rightSym.n_value();
1884         if ( result == 0 ) {
1885                 // two symbols with same address
1886                 // if in different sections, sort earlier section first
1887                 if ( leftSym.n_sect() != rightSym.n_sect() ) {
1888                         for (uint32_t i=0; i < parser->machOSectionCount(); ++i) {
1889                                 if ( sortedSectionsArray[i]+1 == leftSym.n_sect() )
1890                                         return -1;
1891                                 if ( sortedSectionsArray[i]+1 == rightSym.n_sect() )
1892                                         return 1;
1893                         }
1894                 }
1895                 // two symbols in same section, means one is an alias
1896                 // if only one is global, make the other an alias (sort first)
1897                 if ( (leftSym.n_type() & N_EXT) != (rightSym.n_type() & N_EXT) ) {
1898                         if ( (rightSym.n_type() & N_EXT) != 0 )
1899                                 return -1;
1900                         else
1901                                 return 1;
1902                 }
1903                 // if both are global, make alphabetically last one be the alias
1904                 return ( strcmp(parser->nameFromSymbol(rightSym), parser->nameFromSymbol(leftSym)) );
1905         }
1906         else if ( result < 0 )
1907                 return -1;
1908         else
1909                 return 1;
1910 }
1911
1912
1913 template <typename A>
1914 void Parser<A>::makeSortedSymbolsArray(uint32_t array[], const uint32_t sectionArray[])
1915 {
1916         const bool log = false;
1917
1918         uint32_t* p = array;
1919         for (uint32_t i=0; i < this->_symbolCount; ++i) {
1920                 const macho_nlist<P>& sym =     symbolFromIndex(i);
1921                 // ignore stabs
1922                 if ( (sym.n_type() & N_STAB) != 0 )
1923                         continue;
1924
1925                 // only look at definitions
1926                 if ( (sym.n_type() & N_TYPE) != N_SECT )
1927                         continue;
1928
1929                 // 'L' labels do not denote atom breaks
1930                 const char* symbolName = this->nameFromSymbol(sym);
1931                 if ( symbolName[0] == 'L' )
1932                         continue;
1933
1934                 // how many def syms in each section
1935                 if ( sym.n_sect() > _machOSectionsCount )
1936                         throw "bad n_sect in symbol table";
1937
1938                 // append to array
1939                 *p++ = i;
1940         }
1941         assert(p == &array[_symbolsInSections] && "second pass over symbol table yield a different number of symbols");
1942
1943         // sort by symbol table address
1944         ParserAndSectionsArray extra = { this, sectionArray };
1945         ::qsort_r(array, _symbolsInSections, sizeof(uint32_t), &extra, &symbolIndexSorter);
1946
1947         // look for two symbols at same address
1948         _overlappingSymbols = false;
1949         for (unsigned int i=1; i < _symbolsInSections; ++i) {
1950                 if ( symbolFromIndex(array[i-1]).n_value() == symbolFromIndex(array[i]).n_value() ) {
1951                         //fprintf(stderr, "overlapping symbols at 0x%08llX\n", symbolFromIndex(array[i-1]).n_value());
1952                         _overlappingSymbols = true;
1953                 }
1954         }
1955
1956         if ( log ) {
1957                 fprintf(stderr, "sorted symbols:\n");
1958                 for(unsigned int i=0; i < _symbolsInSections; ++i )
1959                         fprintf(stderr, "0x%09llX symIndex=%d sectNum=%2d, %s\n", symbolFromIndex(array[i]).n_value(), array[i], symbolFromIndex(array[i]).n_sect(), nameFromSymbol(symbolFromIndex(array[i])) );
1960         }
1961 }
1962
1963
1964 template <typename A>
1965 void Parser<A>::makeSections()
1966 {
1967         // classify each section by type
1968         // compute how many Section objects will be needed and total size for all
1969         unsigned int totalSectionsSize = 0;
1970         uint8_t machOSectsStorage[sizeof(MachOSectionAndSectionClass<P>)*(_machOSectionsCount+2)]; // also room for tentative-defs and absolute symbols
1971         // allocate raw storage for all section objects on stack
1972         MachOSectionAndSectionClass<P>* machOSects = (MachOSectionAndSectionClass<P>*)machOSectsStorage;
1973         unsigned int count = 0;
1974         for (uint32_t i=0; i < _machOSectionsCount; ++i) {
1975                 const macho_section<P>* sect = &_sectionsStart[i];
1976                 if ( (sect->flags() & S_ATTR_DEBUG) != 0 ) {
1977                         if ( strcmp(sect->segname(), "__DWARF") == 0 ) {
1978                                 // note that .o file has dwarf
1979                                 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoDwarf;
1980                                 // save off iteresting dwarf sections
1981                                 if ( strcmp(sect->sectname(), "__debug_info") == 0 )
1982                                         _file->_dwarfDebugInfoSect = sect;
1983                                 else if ( strcmp(sect->sectname(), "__debug_abbrev") == 0 )
1984                                         _file->_dwarfDebugAbbrevSect = sect;
1985                                 else if ( strcmp(sect->sectname(), "__debug_line") == 0 )
1986                                         _file->_dwarfDebugLineSect = sect;
1987                                 else if ( strcmp(sect->sectname(), "__debug_str") == 0 )
1988                                         _file->_dwarfDebugStringSect = sect;
1989                                 // linker does not propagate dwarf sections to output file
1990                                 continue;
1991                         }
1992                         else if ( strcmp(sect->segname(), "__LD") == 0 ) {
1993                                 if ( strncmp(sect->sectname(), "__compact_unwind", 16) == 0 ) {
1994                                         machOSects[count].sect = sect;
1995                                         totalSectionsSize += sizeof(CUSection<A>);
1996                                         machOSects[count++].type = sectionTypeCompactUnwind;
1997                                         continue;
1998                                 }
1999                         }
2000                 }
2001                 // ignore empty __OBJC sections
2002                 if ( (sect->size() == 0) && (strcmp(sect->segname(), "__OBJC") == 0) )
2003                         continue;
2004                 // objc image info section is really attributes and not content
2005                 if ( ((strcmp(sect->sectname(), "__image_info") == 0) && (strcmp(sect->segname(), "__OBJC") == 0))
2006                         || ((strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0) && (strcmp(sect->segname(), "__DATA") == 0)) ) {
2007                         //      struct objc_image_info  {
2008                         //              uint32_t        version;        // initially 0
2009                         //              uint32_t        flags;
2010                         //      };
2011                         // #define OBJC_IMAGE_SUPPORTS_GC   2
2012                         // #define OBJC_IMAGE_GC_ONLY       4
2013                         //
2014                         const uint32_t* contents = (uint32_t*)(_file->fileContent()+sect->offset());
2015                         if ( (sect->size() >= 8) && (contents[0] == 0) ) {
2016                                 uint32_t flags = E::get32(contents[1]);
2017                                 if ( (flags & 4) == 4 )
2018                                         _file->_objConstraint = ld::File::objcConstraintGC;
2019                                 else if ( (flags & 2) == 2 )
2020                                         _file->_objConstraint = ld::File::objcConstraintRetainReleaseOrGC;
2021                                 else
2022                                         _file->_objConstraint = ld::File::objcConstraintRetainRelease;
2023                                 if ( sect->size() > 8 ) {
2024                                         warning("section %s/%s has unexpectedly large size %llu in %s",
2025                                                         sect->segname(), Section<A>::makeSectionName(sect), sect->size(), _file->path());
2026                                 }
2027                         }
2028                         else {
2029                                 warning("can't parse %s/%s section in %s", sect->segname(), Section<A>::makeSectionName(sect), _file->path());
2030                         }
2031                         continue;
2032                 }
2033                 machOSects[count].sect = sect;
2034                 switch ( sect->flags() & SECTION_TYPE ) {
2035                         case S_SYMBOL_STUBS:
2036                                 if ( _stubsSectionNum == 0 ) {
2037                                         _stubsSectionNum = i+1;
2038                                         _stubsMachOSection = sect;
2039                                 }
2040                                 else
2041                                         assert(1 && "multiple S_SYMBOL_STUBS sections");
2042                         case S_LAZY_SYMBOL_POINTERS:
2043                                 break;
2044                         case S_4BYTE_LITERALS:
2045                                 totalSectionsSize += sizeof(Literal4Section<A>);
2046                                 machOSects[count++].type = sectionTypeLiteral4;
2047                                 break;
2048                         case S_8BYTE_LITERALS:
2049                                 totalSectionsSize += sizeof(Literal8Section<A>);
2050                                 machOSects[count++].type = sectionTypeLiteral8;
2051                                 break;
2052                         case S_16BYTE_LITERALS:
2053                                 totalSectionsSize += sizeof(Literal16Section<A>);
2054                                 machOSects[count++].type = sectionTypeLiteral16;
2055                                 break;
2056                         case S_NON_LAZY_SYMBOL_POINTERS:
2057                                 totalSectionsSize += sizeof(NonLazyPointerSection<A>);
2058                                 machOSects[count++].type = sectionTypeNonLazy;
2059                                 break;
2060                         case S_LITERAL_POINTERS:
2061                                 if ( (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__cls_refs") == 0) ) {
2062                                         totalSectionsSize += sizeof(Objc1ClassReferences<A>);
2063                                         machOSects[count++].type = sectionTypeObjC1ClassRefs;
2064                                 }
2065                                 else {
2066                                         totalSectionsSize += sizeof(PointerToCStringSection<A>);
2067                                         machOSects[count++].type = sectionTypeCStringPointer;
2068                                 }
2069                                 break;
2070                         case S_CSTRING_LITERALS:
2071                                 totalSectionsSize += sizeof(CStringSection<A>);
2072                                 machOSects[count++].type = sectionTypeCString;
2073                                 break;
2074                         case S_MOD_INIT_FUNC_POINTERS:
2075                         case S_MOD_TERM_FUNC_POINTERS:
2076                         case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
2077                         case S_INTERPOSING:
2078                         case S_ZEROFILL:
2079                         case S_REGULAR:
2080                         case S_COALESCED:
2081                         case S_THREAD_LOCAL_REGULAR:
2082                         case S_THREAD_LOCAL_ZEROFILL:
2083                                 if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__eh_frame") == 0) ) {
2084                                         totalSectionsSize += sizeof(CFISection<A>);
2085                                         machOSects[count++].type = sectionTypeCFI;
2086                                 }
2087                                 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__cfstring") == 0) ) {
2088                                         totalSectionsSize += sizeof(CFStringSection<A>);
2089                                         machOSects[count++].type = sectionTypeCFString;
2090                                 }
2091                                 else if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__ustring") == 0) ) {
2092                                         totalSectionsSize += sizeof(UTF16StringSection<A>);
2093                                         machOSects[count++].type = sectionTypeUTF16Strings;
2094                                 }
2095                                 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strncmp(sect->sectname(), "__objc_classrefs", 16) == 0) ) {
2096                                         totalSectionsSize += sizeof(ObjC2ClassRefsSection<A>);
2097                                         machOSects[count++].type = sectionTypeObjC2ClassRefs;
2098                                 }
2099                                 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__objc_catlist") == 0) ) {
2100                                         totalSectionsSize += sizeof(ObjC2CategoryListSection<A>);
2101                                         machOSects[count++].type = typeObjC2CategoryList;
2102                                 }
2103                                 else if ( _AppleObjc && (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__class") == 0) ) {
2104                                         totalSectionsSize += sizeof(ObjC1ClassSection<A>);
2105                                         machOSects[count++].type = sectionTypeObjC1Classes;
2106                                 }
2107                                 else {
2108                                         totalSectionsSize += sizeof(SymboledSection<A>);
2109                                         machOSects[count++].type = sectionTypeSymboled;
2110                                 }
2111                                 break;
2112                         case S_THREAD_LOCAL_VARIABLES:
2113                                 totalSectionsSize += sizeof(TLVDefsSection<A>);
2114                                 machOSects[count++].type = sectionTypeTLVDefs;
2115                                 break;
2116                         case S_THREAD_LOCAL_VARIABLE_POINTERS:
2117                         default:
2118                                 throwf("unknown section type %d", sect->flags() & SECTION_TYPE);
2119                 }
2120         }
2121
2122         // sort by address (mach-o object files don't aways have sections sorted)
2123         ::qsort(machOSects, count, sizeof(MachOSectionAndSectionClass<P>), MachOSectionAndSectionClass<P>::sorter);
2124
2125         // we will synthesize a dummy Section<A> object for tentative definitions
2126         if ( _tentativeDefinitionCount > 0 ) {
2127                 totalSectionsSize += sizeof(TentativeDefinitionSection<A>);
2128                 machOSects[count++].type = sectionTypeTentativeDefinitions;
2129         }
2130
2131         // we will synthesize a dummy Section<A> object for Absolute symbols
2132         if ( _absoluteSymbolCount > 0 ) {
2133                 totalSectionsSize += sizeof(AbsoluteSymbolSection<A>);
2134                 machOSects[count++].type = sectionTypeAbsoluteSymbols;
2135         }
2136
2137         // allocate one block for all Section objects as well as pointers to each
2138         uint8_t* space = new uint8_t[totalSectionsSize+count*sizeof(Section<A>*)];
2139         _file->_sectionsArray = (Section<A>**)space;
2140         _file->_sectionsArrayCount = count;
2141         Section<A>** objects = _file->_sectionsArray;
2142         space += count*sizeof(Section<A>*);
2143         for (uint32_t i=0; i < count; ++i) {
2144                 switch ( machOSects[i].type ) {
2145                         case sectionTypeIgnore:
2146                                 break;
2147                         case sectionTypeLiteral4:
2148                                 *objects++ = new (space) Literal4Section<A>(*this, *_file, machOSects[i].sect);
2149                                 space += sizeof(Literal4Section<A>);
2150                                 break;
2151                         case sectionTypeLiteral8:
2152                                 *objects++ = new (space) Literal8Section<A>(*this, *_file, machOSects[i].sect);
2153                                 space += sizeof(Literal8Section<A>);
2154                                 break;
2155                         case sectionTypeLiteral16:
2156                                 *objects++ = new (space) Literal16Section<A>(*this, *_file, machOSects[i].sect);
2157                                 space += sizeof(Literal16Section<A>);
2158                                 break;
2159                         case sectionTypeNonLazy:
2160                                 *objects++ = new (space) NonLazyPointerSection<A>(*this, *_file, machOSects[i].sect);
2161                                 space += sizeof(NonLazyPointerSection<A>);
2162                                 break;
2163                         case sectionTypeCFI:
2164                                 _EHFrameSection = new (space) CFISection<A>(*this, *_file, machOSects[i].sect);
2165                                 *objects++ = _EHFrameSection;
2166                                 space += sizeof(CFISection<A>);
2167                                 break;
2168                         case sectionTypeCString:
2169                                 *objects++ = new (space) CStringSection<A>(*this, *_file, machOSects[i].sect);
2170                                 space += sizeof(CStringSection<A>);
2171                                 break;
2172                         case sectionTypeCStringPointer:
2173                                 *objects++ = new (space) PointerToCStringSection<A>(*this, *_file, machOSects[i].sect);
2174                                 space += sizeof(PointerToCStringSection<A>);
2175                                 break;
2176                         case sectionTypeObjC1ClassRefs:
2177                                 *objects++ = new (space) Objc1ClassReferences<A>(*this, *_file, machOSects[i].sect);
2178                                 space += sizeof(Objc1ClassReferences<A>);
2179                                 break;
2180                         case sectionTypeUTF16Strings:
2181                                 *objects++ = new (space) UTF16StringSection<A>(*this, *_file, machOSects[i].sect);
2182                                 space += sizeof(UTF16StringSection<A>);
2183                                 break;
2184                         case sectionTypeCFString:
2185                                 *objects++ = new (space) CFStringSection<A>(*this, *_file, machOSects[i].sect);
2186                                 space += sizeof(CFStringSection<A>);
2187                                 break;
2188                         case sectionTypeObjC2ClassRefs:
2189                                 *objects++ = new (space) ObjC2ClassRefsSection<A>(*this, *_file, machOSects[i].sect);
2190                                 space += sizeof(ObjC2ClassRefsSection<A>);
2191                                 break;
2192                         case typeObjC2CategoryList:
2193                                 *objects++ = new (space) ObjC2CategoryListSection<A>(*this, *_file, machOSects[i].sect);
2194                                 space += sizeof(ObjC2CategoryListSection<A>);
2195                                 break;
2196                         case sectionTypeObjC1Classes:
2197                                 *objects++ = new (space) ObjC1ClassSection<A>(*this, *_file, machOSects[i].sect);
2198                                 space += sizeof(ObjC1ClassSection<A>);
2199                                 break;
2200                         case sectionTypeSymboled:
2201                                 *objects++ = new (space) SymboledSection<A>(*this, *_file, machOSects[i].sect);
2202                                 space += sizeof(SymboledSection<A>);
2203                                 break;
2204                         case sectionTypeTLVDefs:
2205                                 *objects++ = new (space) TLVDefsSection<A>(*this, *_file, machOSects[i].sect);
2206                                 space += sizeof(TLVDefsSection<A>);
2207                                 break;
2208                         case sectionTypeCompactUnwind:
2209                                 _compactUnwindSection = new (space) CUSection<A>(*this, *_file, machOSects[i].sect);
2210                                 *objects++ = _compactUnwindSection;
2211                                 space += sizeof(CUSection<A>);
2212                                 break;
2213                         case sectionTypeTentativeDefinitions:
2214                                 *objects++ = new (space) TentativeDefinitionSection<A>(*this, *_file);
2215                                 space += sizeof(TentativeDefinitionSection<A>);
2216                                 break;
2217                         case sectionTypeAbsoluteSymbols:
2218                                 _absoluteSection = new (space) AbsoluteSymbolSection<A>(*this, *_file);
2219                                 *objects++ = _absoluteSection;
2220                                 space += sizeof(AbsoluteSymbolSection<A>);
2221                                 break;
2222                         default:
2223                                 throw "internal error uknown SectionType";
2224                 }
2225         }
2226 }
2227
2228
2229 template <typename A>
2230 Section<A>* Parser<A>::sectionForAddress(typename A::P::uint_t addr)
2231 {
2232         for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2233                 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2234                 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2235                 if ( sect != NULL ) {
2236                         if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
2237                                 return _file->_sectionsArray[i];
2238                         }
2239                 }
2240         }
2241         // not strictly in any section
2242         // may be in a zero length section
2243         for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2244                 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2245                 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2246                 if ( sect != NULL ) {
2247                         if ( (sect->addr() == addr) && (sect->size() == 0) ) {
2248                                 return _file->_sectionsArray[i];
2249                         }
2250                 }
2251         }
2252
2253         throwf("sectionForAddress(0x%llX) address not in any section", (uint64_t)addr);
2254 }
2255
2256 template <typename A>
2257 Section<A>* Parser<A>::sectionForNum(unsigned int num)
2258 {
2259         for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2260                 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2261                 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2262                 if ( sect != NULL ) {
2263                         if ( num == (unsigned int)((sect - _sectionsStart)+1) )
2264                                 return _file->_sectionsArray[i];
2265                 }
2266         }
2267         throwf("sectionForNum(%u) section number not for any section", num);
2268 }
2269
2270 template <typename A>
2271 Atom<A>* Parser<A>::findAtomByAddress(pint_t addr)
2272 {
2273         Section<A>* section = this->sectionForAddress(addr);
2274         return section->findAtomByAddress(addr);
2275 }
2276
2277 template <typename A>
2278 Atom<A>* Parser<A>::findAtomByAddressOrNullIfStub(pint_t addr)
2279 {
2280         if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) )
2281                 return NULL;
2282         return findAtomByAddress(addr);
2283 }
2284
2285 template <typename A>
2286 Atom<A>* Parser<A>::findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom)
2287 {
2288         if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2289                 // target is a stub, remove indirection
2290                 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2291                 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2292                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2293                 // can't be to external weak symbol
2294                 assert( (this->combineFromSymbol(sym) != ld::Atom::combineByName) || (this->scopeFromSymbol(sym) != ld::Atom::scopeGlobal) );
2295                 *offsetInAtom = 0;
2296                 return this->findAtomByName(this->nameFromSymbol(sym));
2297         }
2298         Atom<A>* target = this->findAtomByAddress(addr);
2299         *offsetInAtom = addr - target->_objAddress;
2300         return target;
2301 }
2302
2303 template <typename A>
2304 Atom<A>* Parser<A>::findAtomByName(const char* name)
2305 {
2306         uint8_t* p = _file->_atomsArray;
2307         for(int i=_file->_atomsArrayCount; i > 0; --i) {
2308                 Atom<A>* atom = (Atom<A>*)p;
2309                 if ( strcmp(name, atom->name()) == 0 )
2310                         return atom;
2311                 p += sizeof(Atom<A>);
2312         }
2313         return NULL;
2314 }
2315
2316 template <typename A>
2317 void Parser<A>::findTargetFromAddress(pint_t addr, TargetDesc& target)
2318 {
2319         if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2320                 // target is a stub, remove indirection
2321                 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2322                 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2323                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2324                 target.atom = NULL;
2325                 target.name = this->nameFromSymbol(sym);
2326                 target.weakImport = this->weakImportFromSymbol(sym);
2327                 target.addend = 0;
2328                 return;
2329         }
2330         Section<A>* section = this->sectionForAddress(addr);
2331         target.atom = section->findAtomByAddress(addr);
2332         target.addend = addr - target.atom->_objAddress;
2333         target.weakImport = false;
2334         target.name = NULL;
2335 }
2336
2337 template <typename A>
2338 void Parser<A>::findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target)
2339 {
2340         findTargetFromAddress(baseAddr, target);
2341         target.addend = addr - target.atom->_objAddress;
2342 }
2343
2344 template <typename A>
2345 void Parser<A>::findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum, TargetDesc& target)
2346 {
2347         if ( sectNum == R_ABS ) {
2348                 // target is absolute symbol that corresponds to addr
2349                 if ( _absoluteSection != NULL ) {
2350                         target.atom = _absoluteSection->findAbsAtomForValue(addr);
2351                         if ( target.atom != NULL ) {
2352                                 target.name = NULL;
2353                                 target.weakImport = false;
2354                                 target.addend = 0;
2355                                 return;
2356                         }
2357                 }
2358                 throwf("R_ABS reloc but no absolute symbol at target address");
2359         }
2360
2361         if ( hasStubsSection() && (stubsSectionNum() == sectNum) ) {
2362                 // target is a stub, remove indirection
2363                 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2364                 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2365                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2366                 // use direct reference when stub is to a static function
2367                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (this->nameFromSymbol(sym)[0] == 'L')) ) {
2368                         this->findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
2369                 }
2370                 else {
2371                         target.atom = NULL;
2372                         target.name = this->nameFromSymbol(sym);
2373                         target.weakImport = this->weakImportFromSymbol(sym);
2374                         target.addend = 0;
2375                 }
2376                 return;
2377         }
2378         Section<A>* section = this->sectionForNum(sectNum);
2379         target.atom = section->findAtomByAddress(addr);
2380         if ( target.atom == NULL ) {
2381                 typedef typename A::P::sint_t sint_t;
2382                 sint_t a = (sint_t)addr;
2383                 sint_t sectStart = (sint_t)(section->machoSection()->addr());
2384                 sint_t sectEnd  = sectStart + section->machoSection()->size();
2385                 if ( a < sectStart ) {
2386                         // target address is before start of section, so must be negative addend
2387                         target.atom = section->findAtomByAddress(sectStart);
2388                         target.addend = a - sectStart;
2389                         target.weakImport = false;
2390                         target.name = NULL;
2391                         return;
2392                 }
2393                 else if ( a >= sectEnd ) {
2394                         target.atom = section->findAtomByAddress(sectEnd-1);
2395                         target.addend = a - sectEnd;
2396                         target.weakImport = false;
2397                         target.name = NULL;
2398                         return;
2399                 }
2400         }
2401         assert(target.atom != NULL);
2402         target.addend = addr - target.atom->_objAddress;
2403         target.weakImport = false;
2404         target.name = NULL;
2405 }
2406
2407 template <typename A>
2408 void Parser<A>::addDtraceExtraInfos(const SourceLocation& src, const char* providerName)
2409 {
2410         // for every ___dtrace_stability$* and ___dtrace_typedefs$* undefine with
2411         // a matching provider name, add a by-name kDtraceTypeReference at probe site
2412         const char* dollar = strchr(providerName, '$');
2413         if ( dollar != NULL ) {
2414                 int providerNameLen = dollar-providerName+1;
2415                 for ( std::vector<const char*>::iterator it = _dtraceProviderInfo.begin(); it != _dtraceProviderInfo.end(); ++it) {
2416                         const char* typeDollar = strchr(*it, '$');
2417                         if ( typeDollar != NULL ) {
2418                                 if ( strncmp(typeDollar+1, providerName, providerNameLen) == 0 ) {
2419                                         addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindDtraceExtra,false, *it);
2420                                 }
2421                         }
2422                 }
2423         }
2424 }
2425
2426 template <typename A>
2427 const char* Parser<A>::scanSymbolTableForAddress(uint64_t addr)
2428 {
2429         uint64_t closestSymAddr = 0;
2430         const char* closestSymName = NULL;
2431         for (uint32_t i=0; i < this->_symbolCount; ++i) {
2432                 const macho_nlist<P>& sym =     symbolFromIndex(i);
2433                 // ignore stabs
2434                 if ( (sym.n_type() & N_STAB) != 0 )
2435                         continue;
2436
2437                 // only look at definitions
2438                 if ( (sym.n_type() & N_TYPE) != N_SECT )
2439                         continue;
2440
2441                 // return with exact match
2442                 if ( sym.n_value() == addr )
2443                         return nameFromSymbol(sym);
2444
2445                 // record closest seen so far
2446                 if ( (sym.n_value() < addr) && ((sym.n_value() > closestSymAddr) || (closestSymName == NULL)) )
2447                         closestSymName = nameFromSymbol(sym);
2448         }
2449
2450         return (closestSymName != NULL) ? closestSymName : "unknown";
2451 }
2452
2453
2454 template <typename A>
2455 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind setKind, const TargetDesc& target)
2456 {
2457         // some fixup pairs can be combined
2458         ld::Fixup::Cluster cl = ld::Fixup::k1of3;
2459         ld::Fixup::Kind firstKind = ld::Fixup::kindSetTargetAddress;
2460         bool combined = false;
2461         if ( target.addend == 0 ) {
2462                 cl = ld::Fixup::k1of1;
2463                 combined = true;
2464                 switch ( setKind ) {
2465                         case ld::Fixup::kindStoreLittleEndian32:
2466                                 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian32;
2467                                 break;
2468                         case ld::Fixup::kindStoreLittleEndian64:
2469                                 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian64;
2470                                 break;
2471                         case ld::Fixup::kindStoreBigEndian32:
2472                                 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian32;
2473                                 break;
2474                         case ld::Fixup::kindStoreBigEndian64:
2475                                 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian64;
2476                                 break;
2477                         case ld::Fixup::kindStoreX86BranchPCRel32:
2478                                 firstKind = ld::Fixup::kindStoreTargetAddressX86BranchPCRel32;
2479                                 break;
2480                         case ld::Fixup::kindStoreX86PCRel32:
2481                                 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32;
2482                                 break;
2483                         case ld::Fixup::kindStoreX86PCRel32GOTLoad:
2484                                 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32GOTLoad;
2485                                 break;
2486                         case ld::Fixup::kindStoreX86PCRel32TLVLoad:
2487                                 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32TLVLoad;
2488                                 break;
2489                         case ld::Fixup::kindStoreX86Abs32TLVLoad:
2490                                 firstKind = ld::Fixup::kindStoreTargetAddressX86Abs32TLVLoad;
2491                                 break;
2492                         case ld::Fixup::kindStoreARMBranch24:
2493                                 firstKind = ld::Fixup::kindStoreTargetAddressARMBranch24;
2494                                 break;
2495                         case ld::Fixup::kindStoreThumbBranch22:
2496                                 firstKind = ld::Fixup::kindStoreTargetAddressThumbBranch22;
2497                                 break;
2498                         default:
2499                                 combined = false;
2500                                 cl = ld::Fixup::k1of2;
2501                                 break;
2502                 }
2503         }
2504
2505         if ( target.atom != NULL ) {
2506                 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2507                         addFixup(src, cl, firstKind, target.atom);
2508                 }
2509                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2510                         addFixup(src, cl, firstKind, ld::Fixup::bindingByContentBound, target.atom);
2511                 }
2512                 else if ( (src.atom->section().type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
2513                         // backing string in CFStrings should always be direct
2514                         addFixup(src, cl, firstKind, target.atom);
2515                 }
2516                 else {
2517                         // change direct fixup to by-name fixup
2518                         addFixup(src, cl, firstKind, false, target.atom->name());
2519                 }
2520         }
2521         else {
2522                 addFixup(src, cl, firstKind, target.weakImport, target.name);
2523         }
2524         if ( target.addend == 0 ) {
2525                 if ( ! combined )
2526                         addFixup(src, ld::Fixup::k2of2, setKind);
2527         }
2528         else {
2529                 addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, target.addend);
2530                 addFixup(src, ld::Fixup::k3of3, setKind);
2531         }
2532 }
2533
2534 template <typename A>
2535 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase)
2536 {
2537         ld::Fixup::Cluster cl = (target.addend == 0) ? ld::Fixup::k1of4 : ld::Fixup::k1of5;
2538         if ( target.atom != NULL ) {
2539                 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2540                         addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.atom);
2541                 }
2542                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2543                         addFixup(src, cl, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
2544                 }
2545                 else {
2546                         addFixup(src, cl, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
2547                 }
2548         }
2549         else {
2550                 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.weakImport, target.name);
2551         }
2552         if ( target.addend == 0 ) {
2553                 assert(picBase.atom != NULL);
2554                 addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2555                 addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, picBase.addend);
2556                 addFixup(src, ld::Fixup::k4of4, kind);
2557         }
2558         else {
2559                 addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
2560                 addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2561                 addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, picBase.addend);
2562                 addFixup(src, ld::Fixup::k5of5, kind);
2563         }
2564 }
2565
2566
2567
2568 template <typename A>
2569 uint32_t TentativeDefinitionSection<A>::computeAtomCount(class Parser<A>& parser,
2570                                                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
2571                                                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
2572 {
2573         return parser.tentativeDefinitionCount();
2574 }
2575
2576 template <typename A>
2577 uint32_t TentativeDefinitionSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2578                                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
2579                                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&)
2580 {
2581         this->_beginAtoms = (Atom<A>*)p;
2582         uint32_t count = 0;
2583         for (uint32_t i=parser.undefinedStartIndex(); i < parser.undefinedEndIndex(); ++i) {
2584                 const macho_nlist<P>& sym =     parser.symbolFromIndex(i);
2585                 if ( ((sym.n_type() & N_TYPE) == N_UNDF) && (sym.n_value() != 0) ) {
2586                         uint64_t size = sym.n_value();
2587                         uint8_t alignP2 = GET_COMM_ALIGN(sym.n_desc());
2588                         if ( alignP2 == 0 ) {
2589                                 // common symbols align to their size
2590                                 // that is, a 4-byte common aligns to 4-bytes
2591                                 // if this size is not a power of two,
2592                                 // then round up to the next power of two
2593                                 alignP2 = 63 - (uint8_t)__builtin_clzll(size);
2594                                 if ( size != (1ULL << alignP2) )
2595                                         ++alignP2;
2596                         }
2597                         // limit alignment of extremely large commons to 2^15 bytes (8-page)
2598                         if ( alignP2 > 15 )
2599                                 alignP2 = 15;
2600                         Atom<A>* allocatedSpace = (Atom<A>*)p;
2601                         new (allocatedSpace) Atom<A>(*this, parser.nameFromSymbol(sym), (pint_t)ULLONG_MAX, size,
2602                                                                                 ld::Atom::definitionTentative,  ld::Atom::combineByName,
2603                                                                                 parser.scopeFromSymbol(sym), ld::Atom::typeZeroFill, ld::Atom::symbolTableIn,
2604                                                                                 parser.dontDeadStripFromSymbol(sym), false, false, ld::Atom::Alignment(alignP2) );
2605                         p += sizeof(Atom<A>);
2606                         ++count;
2607                 }
2608         }
2609         this->_endAtoms = (Atom<A>*)p;
2610         return count;
2611 }
2612
2613
2614 template <typename A>
2615 uint32_t AbsoluteSymbolSection<A>::computeAtomCount(class Parser<A>& parser,
2616                                                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
2617                                                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
2618 {
2619         return parser.absoluteSymbolCount();
2620 }
2621
2622 template <typename A>
2623 uint32_t AbsoluteSymbolSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2624                                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
2625                                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&)
2626 {
2627         this->_beginAtoms = (Atom<A>*)p;
2628         uint32_t count = 0;
2629         for (uint32_t i=0; i < parser.symbolCount(); ++i) {
2630                 const macho_nlist<P>& sym =     parser.symbolFromIndex(i);
2631                 if ( (sym.n_type() & N_TYPE) != N_ABS )
2632                         continue;
2633                 const char* absName = parser.nameFromSymbol(sym);
2634                 // ignore .objc_class_name_* symbols
2635                 if ( strncmp(absName, ".objc_class_name_", 17) == 0 )
2636                         continue;
2637                 // ignore .objc_class_name_* symbols
2638                 if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
2639                         continue;
2640                 // ignore empty *.eh symbols
2641                 if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
2642                         continue;
2643
2644                 Atom<A>* allocatedSpace = (Atom<A>*)p;
2645                 new (allocatedSpace) Atom<A>(*this, parser, sym, 0);
2646                 p += sizeof(Atom<A>);
2647                 ++count;
2648         }
2649         this->_endAtoms = (Atom<A>*)p;
2650         return count;
2651 }
2652
2653 template <typename A>
2654 Atom<A>* AbsoluteSymbolSection<A>::findAbsAtomForValue(typename A::P::uint_t value)
2655 {
2656         Atom<A>* end = this->_endAtoms;
2657         for(Atom<A>* p = this->_beginAtoms; p < end; ++p) {
2658                 if ( p->_objAddress == value )
2659                         return p;
2660         }
2661         return NULL;
2662 }
2663
2664
2665 template <typename A>
2666 uint32_t Parser<A>::indirectSymbol(uint32_t indirectIndex)
2667 {
2668         if ( indirectIndex >= _indirectTableCount )
2669                 throw "indirect symbol index out of range";
2670         return E::get32(_indirectTable[indirectIndex]);
2671 }
2672
2673 template <typename A>
2674 const macho_nlist<typename A::P>& Parser<A>::symbolFromIndex(uint32_t index)
2675 {
2676         if ( index > _symbolCount )
2677                 throw "symbol index out of range";
2678         return _symbols[index];
2679 }
2680
2681 template <typename A>
2682 const macho_section<typename A::P>*     Parser<A>::machOSectionFromSectionIndex(uint32_t index)
2683 {
2684         if ( index >= _machOSectionsCount )
2685                 throw "section index out of range";
2686         return &_sectionsStart[index];
2687 }
2688
2689 template <typename A>
2690 uint32_t Parser<A>::symbolIndexFromIndirectSectionAddress(pint_t addr, const macho_section<P>* sect)
2691 {
2692         uint32_t elementSize = 0;
2693         switch ( sect->flags() & SECTION_TYPE ) {
2694                 case S_SYMBOL_STUBS:
2695                         elementSize = sect->reserved2();
2696                         break;
2697                 case S_LAZY_SYMBOL_POINTERS:
2698                 case S_NON_LAZY_SYMBOL_POINTERS:
2699                         elementSize = sizeof(pint_t);
2700                         break;
2701                 default:
2702                         throw "section does not use inirect symbol table";
2703         }
2704         uint32_t indexInSection = (addr - sect->addr()) / elementSize;
2705         uint32_t indexIntoIndirectTable = sect->reserved1() + indexInSection;
2706         return this->indirectSymbol(indexIntoIndirectTable);
2707 }
2708
2709
2710
2711 template <typename A>
2712 const char* Parser<A>::nameFromSymbol(const macho_nlist<P>& sym)
2713 {
2714         return &_strings[sym.n_strx()];
2715 }
2716
2717 template <typename A>
2718 ld::Atom::Scope Parser<A>::scopeFromSymbol(const macho_nlist<P>& sym)
2719 {
2720         if ( (sym.n_type() & N_EXT) == 0 )
2721                 return ld::Atom::scopeTranslationUnit;
2722         else if ( (sym.n_type() & N_PEXT) != 0 )
2723                 return ld::Atom::scopeLinkageUnit;
2724         else if ( this->nameFromSymbol(sym)[0] == 'l' ) // since all 'l' symbols will be remove, don't make them global
2725                 return ld::Atom::scopeLinkageUnit;
2726         else
2727                 return ld::Atom::scopeGlobal;
2728 }
2729
2730 template <typename A>
2731 ld::Atom::Definition Parser<A>::definitionFromSymbol(const macho_nlist<P>& sym)
2732 {
2733         switch ( sym.n_type() & N_TYPE ) {
2734                 case N_ABS:
2735                         return ld::Atom::definitionAbsolute;
2736                 case N_SECT:
2737                         return ld::Atom::definitionRegular;
2738                 case N_UNDF:
2739                         if ( sym.n_value() != 0 )
2740                                 return ld::Atom::definitionTentative;
2741         }
2742         throw "definitionFromSymbol() bad symbol";
2743 }
2744
2745 template <typename A>
2746 ld::Atom::Combine Parser<A>::combineFromSymbol(const macho_nlist<P>& sym)
2747 {
2748         if ( sym.n_desc() & N_WEAK_DEF )
2749                 return ld::Atom::combineByName;
2750         else
2751                 return ld::Atom::combineNever;
2752 }
2753
2754
2755 template <typename A>
2756 ld::Atom::SymbolTableInclusion Parser<A>::inclusionFromSymbol(const macho_nlist<P>& sym)
2757 {
2758         const char* symbolName = nameFromSymbol(sym);
2759         // labels beginning with 'l' (lowercase ell) are automatically removed in final linked images <rdar://problem/4571042>
2760         // labels beginning with 'L' should have been stripped by the assembler, so are stripped now
2761         if ( sym.n_desc() & REFERENCED_DYNAMICALLY )
2762                 return ld::Atom::symbolTableInAndNeverStrip;
2763         else if ( symbolName[0] == 'l' )
2764                 return ld::Atom::symbolTableNotInFinalLinkedImages;
2765         else if ( symbolName[0] == 'L' )
2766                 return ld::Atom::symbolTableNotIn;
2767         else
2768                 return ld::Atom::symbolTableIn;
2769 }
2770
2771 template <typename A>
2772 bool Parser<A>::dontDeadStripFromSymbol(const macho_nlist<P>& sym)
2773 {
2774         return ( (sym.n_desc() & (N_NO_DEAD_STRIP|REFERENCED_DYNAMICALLY)) != 0 );
2775 }
2776
2777 template <typename A>
2778 bool Parser<A>::isThumbFromSymbol(const macho_nlist<P>& sym)
2779 {
2780         return ( sym.n_desc() & N_ARM_THUMB_DEF );
2781 }
2782
2783 template <typename A>
2784 bool Parser<A>::weakImportFromSymbol(const macho_nlist<P>& sym)
2785 {
2786         return ( ((sym.n_type() & N_TYPE) == N_UNDF) && ((sym.n_desc() & N_WEAK_REF) != 0) );
2787 }
2788
2789 template <typename A>
2790 bool Parser<A>::resolverFromSymbol(const macho_nlist<P>& sym)
2791 {
2792         return ( sym.n_desc() & N_SYMBOL_RESOLVER );
2793 }
2794
2795
2796 /* Skip over a LEB128 value (signed or unsigned).  */
2797 static void
2798 skip_leb128 (const uint8_t ** offset, const uint8_t * end)
2799 {
2800   while (*offset != end && **offset >= 0x80)
2801     (*offset)++;
2802   if (*offset != end)
2803     (*offset)++;
2804 }
2805
2806 /* Read a ULEB128 into a 64-bit word.  Return (uint64_t)-1 on overflow
2807    or error.  On overflow, skip past the rest of the uleb128.  */
2808 static uint64_t
2809 read_uleb128 (const uint8_t ** offset, const uint8_t * end)
2810 {
2811   uint64_t result = 0;
2812   int bit = 0;
2813
2814   do  {
2815     uint64_t b;
2816
2817     if (*offset == end)
2818       return (uint64_t) -1;
2819
2820     b = **offset & 0x7f;
2821
2822     if (bit >= 64 || b << bit >> bit != b)
2823       result = (uint64_t) -1;
2824     else
2825       result |= b << bit, bit += 7;
2826   } while (*(*offset)++ >= 0x80);
2827   return result;
2828 }
2829
2830
2831 /* Skip over a DWARF attribute of form FORM.  */
2832 template <typename A>
2833 bool Parser<A>::skip_form(const uint8_t ** offset, const uint8_t * end, uint64_t form,
2834                                                         uint8_t addr_size, bool dwarf64)
2835 {
2836   int64_t sz=0;
2837
2838   switch (form)
2839     {
2840     case DW_FORM_addr:
2841       sz = addr_size;
2842       break;
2843
2844     case DW_FORM_block2:
2845       if (end - *offset < 2)
2846         return false;
2847       sz = 2 + A::P::E::get16(*(uint16_t*)offset);
2848       break;
2849
2850     case DW_FORM_block4:
2851       if (end - *offset < 4)
2852         return false;
2853       sz = 2 + A::P::E::get32(*(uint32_t*)offset);
2854       break;
2855
2856     case DW_FORM_data2:
2857     case DW_FORM_ref2:
2858       sz = 2;
2859       break;
2860
2861     case DW_FORM_data4:
2862     case DW_FORM_ref4:
2863       sz = 4;
2864       break;
2865
2866     case DW_FORM_data8:
2867     case DW_FORM_ref8:
2868       sz = 8;
2869       break;
2870
2871     case DW_FORM_string:
2872       while (*offset != end && **offset)
2873         ++*offset;
2874     case DW_FORM_data1:
2875     case DW_FORM_flag:
2876     case DW_FORM_ref1:
2877       sz = 1;
2878       break;
2879
2880     case DW_FORM_block:
2881       sz = read_uleb128 (offset, end);
2882       break;
2883
2884     case DW_FORM_block1:
2885       if (*offset == end)
2886         return false;
2887       sz = 1 + **offset;
2888       break;
2889
2890     case DW_FORM_sdata:
2891     case DW_FORM_udata:
2892     case DW_FORM_ref_udata:
2893       skip_leb128 (offset, end);
2894       return true;
2895
2896     case DW_FORM_strp:
2897     case DW_FORM_ref_addr:
2898       sz = 4;
2899       break;
2900
2901     default:
2902       return false;
2903     }
2904   if (end - *offset < sz)
2905     return false;
2906   *offset += sz;
2907   return true;
2908 }
2909
2910
2911 template <typename A>
2912 const char* Parser<A>::getDwarfString(uint64_t form, const uint8_t* p)
2913 {
2914         if ( form == DW_FORM_string )
2915                 return (const char*)p;
2916         else if ( form == DW_FORM_strp ) {
2917                 uint32_t offset = E::get32(*((uint32_t*)p));
2918                 const char* dwarfStrings = (char*)_file->fileContent() + _file->_dwarfDebugStringSect->offset();
2919                 if ( offset > _file->_dwarfDebugStringSect->size() ) {
2920                         warning("unknown dwarf DW_FORM_strp (offset=0x%08X) is too big in %s\n", offset, this->_path);
2921                         return NULL;
2922                 }
2923                 return &dwarfStrings[offset];
2924         }
2925         warning("unknown dwarf string encoding (form=%lld) in %s\n", form, this->_path);
2926         return NULL;
2927 }
2928
2929
2930 template <typename A>
2931 struct AtomAndLineInfo {
2932         Atom<A>*                        atom;
2933         ld::Atom::LineInfo      info;
2934 };
2935
2936
2937 // <rdar://problem/5591394> Add support to ld64 for N_FUN stabs when used for symbolic constants
2938 // Returns whether a stabStr belonging to an N_FUN stab represents a
2939 // symbolic constant rather than a function
2940 template <typename A>
2941 bool Parser<A>::isConstFunStabs(const char *stabStr)
2942 {
2943         const char* colon;
2944         // N_FUN can be used for both constants and for functions. In case it's a constant,
2945         // the format of the stabs string is "symname:c=<value>;"
2946         // ':' cannot appear in the symbol name, except if it's an Objective-C method
2947         // (in which case the symbol name starts with + or -, and then it's definitely
2948         //  not a constant)
2949         return (stabStr != NULL) && (stabStr[0] != '+') && (stabStr[0] != '-')
2950                         && ((colon = strchr(stabStr, ':')) != NULL)
2951                         && (colon[1] == 'c') && (colon[2] == '=');
2952 }
2953
2954
2955 template <typename A>
2956 void Parser<A>::parseDebugInfo()
2957 {
2958         // check for dwarf __debug_info section
2959         if ( _file->_dwarfDebugInfoSect == NULL ) {
2960                 // if no DWARF debug info, look for stabs
2961                 this->parseStabs();
2962                 return;
2963         }
2964         if ( _file->_dwarfDebugInfoSect->size() == 0 )
2965                 return;
2966
2967         uint64_t stmtList;
2968         if ( !read_comp_unit(&_file->_dwarfTranslationUnitFile, &_file->_dwarfTranslationUnitDir, &stmtList) ) {
2969                 // if can't parse dwarf, warn and give up
2970                 _file->_dwarfTranslationUnitFile = NULL;
2971                 _file->_dwarfTranslationUnitDir = NULL;
2972                 warning("can't parse dwarf compilation unit info in %s", _path);
2973                 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoNone;
2974                 return;
2975         }
2976
2977         // add line number info to atoms from dwarf
2978         std::vector<AtomAndLineInfo<A> > entries;
2979         entries.reserve(64);
2980         if ( _file->_debugInfoKind == ld::relocatable::File::kDebugInfoDwarf ) {
2981                 // file with just data will have no __debug_line info
2982                 if ( (_file->_dwarfDebugLineSect != NULL) && (_file->_dwarfDebugLineSect->size() != 0) ) {
2983                         // validate stmt_list
2984                         if ( (stmtList != (uint64_t)-1) && (stmtList < _file->_dwarfDebugLineSect->size()) ) {
2985                                 const uint8_t* debug_line = (uint8_t*)_file->fileContent() + _file->_dwarfDebugLineSect->offset();
2986                                 struct line_reader_data* lines = line_open(&debug_line[stmtList],
2987                                                                                                                 _file->_dwarfDebugLineSect->size() - stmtList, E::little_endian);
2988                                 struct line_info result;
2989                                 Atom<A>* curAtom = NULL;
2990                                 uint32_t curAtomOffset = 0;
2991                                 uint32_t curAtomAddress = 0;
2992                                 uint32_t curAtomSize = 0;
2993                                 std::map<uint32_t,const char*>  dwarfIndexToFile;
2994                                 if ( lines != NULL ) {
2995                                         while ( line_next(lines, &result, line_stop_pc) ) {
2996                                                 //fprintf(stderr, "curAtom=%p, result.pc=0x%llX, result.line=%llu, result.end_of_sequence=%d,"
2997                                                 //                                " curAtomAddress=0x%X, curAtomSize=0x%X\n",
2998                                                 //              curAtom, result.pc, result.line, result.end_of_sequence, curAtomAddress, curAtomSize);
2999                                                 // work around weird debug line table compiler generates if no functions in __text section
3000                                                 if ( (curAtom == NULL) && (result.pc == 0) && result.end_of_sequence && (result.file == 1))
3001                                                         continue;
3002                                                 // for performance, see if in next pc is in current atom
3003                                                 if ( (curAtom != NULL) && (curAtomAddress <= result.pc) && (result.pc < (curAtomAddress+curAtomSize)) ) {
3004                                                         curAtomOffset = result.pc - curAtomAddress;
3005                                                 }
3006                                                 // or pc at end of current atom
3007                                                 else if ( result.end_of_sequence && (curAtom != NULL) && (result.pc == (curAtomAddress+curAtomSize)) ) {
3008                                                         curAtomOffset = result.pc - curAtomAddress;
3009                                                 }
3010                                                 // or only one function that is a one line function
3011                                                 else if ( result.end_of_sequence && (curAtom == NULL) && (this->findAtomByAddress(0) != NULL) && (result.pc == this->findAtomByAddress(0)->size()) ) {
3012                                                         curAtom                 = this->findAtomByAddress(0);
3013                                                         curAtomOffset   = result.pc - curAtom->objectAddress();
3014                                                         curAtomAddress  = curAtom->objectAddress();
3015                                                         curAtomSize             = curAtom->size();
3016                                                 }
3017                                                 else {
3018                                                         // do slow look up of atom by address
3019                                                         try {
3020                                                                 curAtom = this->findAtomByAddress(result.pc);
3021                                                         }
3022                                                         catch (...) {
3023                                                                 // in case of bug in debug info, don't abort link, just limp on
3024                                                                 curAtom = NULL;
3025                                                         }
3026                                                         if ( curAtom == NULL )
3027                                                                 break; // file has line info but no functions
3028                                                         if ( result.end_of_sequence && (curAtomAddress+curAtomSize < result.pc) ) {
3029                                                                 // a one line function can be returned by line_next() as one entry with pc at end of blob
3030                                                                 // look for alt atom starting at end of previous atom
3031                                                                 uint32_t previousEnd = curAtomAddress+curAtomSize;
3032                                                                 Atom<A>* alt = this->findAtomByAddressOrNullIfStub(previousEnd);
3033                                                                 if ( alt == NULL )
3034                                                                         continue; // ignore spurious debug info for stubs
3035                                                                 if ( result.pc <= alt->objectAddress() + alt->size() ) {
3036                                                                         curAtom                 = alt;
3037                                                                         curAtomOffset   = result.pc - alt->objectAddress();
3038                                                                         curAtomAddress  = alt->objectAddress();
3039                                                                         curAtomSize             = alt->size();
3040                                                                 }
3041                                                                 else {
3042                                                                         curAtomOffset   = result.pc - curAtom->objectAddress();
3043                                                                         curAtomAddress  = curAtom->objectAddress();
3044                                                                         curAtomSize             = curAtom->size();
3045                                                                 }
3046                                                         }
3047                                                         else {
3048                                                                 curAtomOffset   = result.pc - curAtom->objectAddress();
3049                                                                 curAtomAddress  = curAtom->objectAddress();
3050                                                                 curAtomSize             = curAtom->size();
3051                                                         }
3052                                                 }
3053                                                 const char* filename;
3054                                                 std::map<uint32_t,const char*>::iterator pos = dwarfIndexToFile.find(result.file);
3055                                                 if ( pos == dwarfIndexToFile.end() ) {
3056                                                         filename = line_file(lines, result.file);
3057                                                         dwarfIndexToFile[result.file] = filename;
3058                                                 }
3059                                                 else {
3060                                                         filename = pos->second;
3061                                                 }
3062                                                 // only record for ~8000 line info records per function
3063                                                 if ( curAtom->roomForMoreLineInfoCount() ) {
3064                                                         AtomAndLineInfo<A> entry;
3065                                                         entry.atom = curAtom;
3066                                                         entry.info.atomOffset = curAtomOffset;
3067                                                         entry.info.fileName = filename;
3068                                                         entry.info.lineNumber = result.line;
3069                                                         //fprintf(stderr, "addr=0x%08llX, line=%lld, file=%s, atom=%s, atom.size=0x%X, end=%d\n",
3070                                                         //              result.pc, result.line, filename, curAtom->name(), curAtomSize, result.end_of_sequence);
3071                                                         entries.push_back(entry);
3072                                                         curAtom->incrementLineInfoCount();
3073                                                 }
3074                                                 if ( result.end_of_sequence ) {
3075                                                         curAtom = NULL;
3076                                                 }
3077                                         }
3078                                         line_free(lines);
3079                                 }
3080                         }
3081                 }
3082         }
3083
3084         // assign line info start offset for each atom
3085         uint8_t* p = _file->_atomsArray;
3086         uint32_t liOffset = 0;
3087         for(int i=_file->_atomsArrayCount; i > 0; --i) {
3088                 Atom<A>* atom = (Atom<A>*)p;
3089                 atom->_lineInfoStartIndex = liOffset;
3090                 liOffset += atom->_lineInfoCount;
3091                 atom->_lineInfoCount = 0;
3092                 p += sizeof(Atom<A>);
3093         }
3094         assert(liOffset == entries.size());
3095         _file->_lineInfos.reserve(liOffset);
3096
3097         // copy each line info for each atom
3098         for (typename std::vector<AtomAndLineInfo<A> >::iterator it = entries.begin(); it != entries.end(); ++it) {
3099                 uint32_t slot = it->atom->_lineInfoStartIndex + it->atom->_lineInfoCount;
3100                 _file->_lineInfos[slot] = it->info;
3101                 it->atom->_lineInfoCount++;
3102         }
3103
3104         // done with temp vector
3105         entries.clear();
3106 }
3107
3108 template <typename A>
3109 void Parser<A>::parseStabs()
3110 {
3111         // scan symbol table for stabs entries
3112         Atom<A>* currentAtom = NULL;
3113         pint_t currentAtomAddress = 0;
3114         enum { start, inBeginEnd, inFun } state = start;
3115         for (uint32_t symbolIndex = 0; symbolIndex < _symbolCount; ++symbolIndex ) {
3116                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
3117                 bool useStab = true;
3118                 uint8_t type = sym.n_type();
3119                 const char* symString = (sym.n_strx() != 0) ? this->nameFromSymbol(sym) : NULL;
3120                 if ( (type & N_STAB) != 0 ) {
3121                         _file->_debugInfoKind =  (_hasUUID ? ld::relocatable::File::kDebugInfoStabsUUID : ld::relocatable::File::kDebugInfoStabs);
3122                         ld::relocatable::File::Stab stab;
3123                         stab.atom       = NULL;
3124                         stab.type       = type;
3125                         stab.other      = sym.n_sect();
3126                         stab.desc       = sym.n_desc();
3127                         stab.value      = sym.n_value();
3128                         stab.string = NULL;
3129                         switch (state) {
3130                                 case start:
3131                                         switch (type) {
3132                                                 case N_BNSYM:
3133                                                         // beginning of function block
3134                                                         state = inBeginEnd;
3135                                                         // fall into case to lookup atom by addresss
3136                                                 case N_LCSYM:
3137                                                 case N_STSYM:
3138                                                         currentAtomAddress = sym.n_value();
3139                                                         currentAtom = this->findAtomByAddress(currentAtomAddress);
3140                                                         if ( currentAtom != NULL ) {
3141                                                                 stab.atom = currentAtom;
3142                                                                 stab.string = symString;
3143                                                         }
3144                                                         else {
3145                                                                 fprintf(stderr, "can't find atom for stabs BNSYM at %08llX in %s",
3146                                                                         (uint64_t)sym.n_value(), _path);
3147                                                         }
3148                                                         break;
3149                                                 case N_SO:
3150                                                 case N_OSO:
3151                                                 case N_OPT:
3152                                                 case N_LSYM:
3153                                                 case N_RSYM:
3154                                                 case N_PSYM:
3155                                                         // not associated with an atom, just copy
3156                                                         stab.string = symString;
3157                                                         break;
3158                                                 case N_GSYM:
3159                                                 {
3160                                                         // n_value field is NOT atom address ;-(
3161                                                         // need to find atom by name match
3162                                                         const char* colon = strchr(symString, ':');
3163                                                         if ( colon != NULL ) {
3164                                                                 // build underscore leading name
3165                                                                 int nameLen = colon - symString;
3166                                                                 char symName[nameLen+2];
3167                                                                 strlcpy(&symName[1], symString, nameLen+1);
3168                                                                 symName[0] = '_';
3169                                                                 symName[nameLen+1] = '\0';
3170                                                                 currentAtom = this->findAtomByName(symName);
3171                                                                 if ( currentAtom != NULL ) {
3172                                                                         stab.atom = currentAtom;
3173                                                                         stab.string = symString;
3174                                                                 }
3175                                                         }
3176                                                         else {
3177                                                                 // might be a debug-note without trailing :G()
3178                                                                 currentAtom = this->findAtomByName(symString);
3179                                                                 if ( currentAtom != NULL ) {
3180                                                                         stab.atom = currentAtom;
3181                                                                         stab.string = symString;
3182                                                                 }
3183                                                         }
3184                                                         if ( stab.atom == NULL ) {
3185                                                                 // ld_classic added bogus GSYM stabs for old style dtrace probes
3186                                                                 if ( (strncmp(symString, "__dtrace_probe$", 15) != 0) )
3187                                                                         warning("can't find atom for N_GSYM stabs %s in %s", symString, _path);
3188                                                                 useStab = false;
3189                                                         }
3190                                                         break;
3191                                                 }
3192                                                 case N_FUN:
3193                                                         if ( isConstFunStabs(symString) ) {
3194                                                                 // constant not associated with a function
3195                                                                 stab.string = symString;
3196                                                         }
3197                                                         else {
3198                                                                 // old style stabs without BNSYM
3199                                                                 state = inFun;
3200                                                                 currentAtomAddress = sym.n_value();
3201                                                                 currentAtom = this->findAtomByAddress(currentAtomAddress);
3202                                                                 if ( currentAtom != NULL ) {
3203                                                                         stab.atom = currentAtom;
3204                                                                         stab.string = symString;
3205                                                                 }
3206                                                                 else {
3207                                                                         warning("can't find atom for stabs FUN at %08llX in %s",
3208                                                                                 (uint64_t)currentAtomAddress, _path);
3209                                                                 }
3210                                                         }
3211                                                         break;
3212                                                 case N_SOL:
3213                                                 case N_SLINE:
3214                                                         stab.string = symString;
3215                                                         // old stabs
3216                                                         break;
3217                                                 case N_BINCL:
3218                                                 case N_EINCL:
3219                                                 case N_EXCL:
3220                                                         stab.string = symString;
3221                                                         // -gfull built .o file
3222                                                         break;
3223                                                 default:
3224                                                         warning("unknown stabs type 0x%X in %s", type, _path);
3225                                         }
3226                                         break;
3227                                 case inBeginEnd:
3228                                         stab.atom = currentAtom;
3229                                         switch (type) {
3230                                                 case N_ENSYM:
3231                                                         state = start;
3232                                                         currentAtom = NULL;
3233                                                         break;
3234                                                 case N_LCSYM:
3235                                                 case N_STSYM:
3236                                                 {
3237                                                         Atom<A>* nestedAtom = this->findAtomByAddress(sym.n_value());
3238                                                         if ( nestedAtom != NULL ) {
3239                                                                 stab.atom = nestedAtom;
3240                                                                 stab.string = symString;
3241                                                         }
3242                                                         else {
3243                                                                 warning("can't find atom for stabs 0x%X at %08llX in %s",
3244                                                                         type, (uint64_t)sym.n_value(), _path);
3245                                                         }
3246                                                         break;
3247                                                 }
3248                                                 case N_LBRAC:
3249                                                 case N_RBRAC:
3250                                                 case N_SLINE:
3251                                                         // adjust value to be offset in atom
3252                                                         stab.value -= currentAtomAddress;
3253                                                 default:
3254                                                         stab.string = symString;
3255                                                         break;
3256                                         }
3257                                         break;
3258                                 case inFun:
3259                                         switch (type) {
3260                                                 case N_FUN:
3261                                                         if ( isConstFunStabs(symString) ) {
3262                                                                 stab.atom = currentAtom;
3263                                                                 stab.string = symString;
3264                                                         }
3265                                                         else {
3266                                                                 if ( sym.n_sect() != 0 ) {
3267                                                                         // found another start stab, must be really old stabs...
3268                                                                         currentAtomAddress = sym.n_value();
3269                                                                         currentAtom = this->findAtomByAddress(currentAtomAddress);
3270                                                                         if ( currentAtom != NULL ) {
3271                                                                                 stab.atom = currentAtom;
3272                                                                                 stab.string = symString;
3273                                                                         }
3274                                                                         else {
3275                                                                                 warning("can't find atom for stabs FUN at %08llX in %s",
3276                                                                                         (uint64_t)currentAtomAddress, _path);
3277                                                                         }
3278                                                                 }
3279                                                                 else {
3280                                                                         // found ending stab, switch back to start state
3281                                                                         stab.string = symString;
3282                                                                         stab.atom = currentAtom;
3283                                                                         state = start;
3284                                                                         currentAtom = NULL;
3285                                                                 }
3286                                                         }
3287                                                         break;
3288                                                 case N_LBRAC:
3289                                                 case N_RBRAC:
3290                                                 case N_SLINE:
3291                                                         // adjust value to be offset in atom
3292                                                         stab.value -= currentAtomAddress;
3293                                                         stab.atom = currentAtom;
3294                                                         break;
3295                                                 case N_SO:
3296                                                         stab.string = symString;
3297                                                         state = start;
3298                                                         break;
3299                                                 default:
3300                                                         stab.atom = currentAtom;
3301                                                         stab.string = symString;
3302                                                         break;
3303                                         }
3304                                         break;
3305                         }
3306                         // add to list of stabs for this .o file
3307                         if ( useStab )
3308                                 _file->_stabs.push_back(stab);
3309                 }
3310         }
3311 }
3312
3313
3314
3315 // Look at the compilation unit DIE and determine
3316 // its NAME, compilation directory (in COMP_DIR) and its
3317 // line number information offset (in STMT_LIST).  NAME and COMP_DIR
3318 // may be NULL (especially COMP_DIR) if they are not in the .o file;
3319 // STMT_LIST will be (uint64_t) -1.
3320 //
3321 // At present this assumes that there's only one compilation unit DIE.
3322 //
3323 template <typename A>
3324 bool Parser<A>::read_comp_unit(const char ** name, const char ** comp_dir,
3325                                                         uint64_t *stmt_list)
3326 {
3327         const uint8_t * debug_info;
3328         const uint8_t * debug_abbrev;
3329         const uint8_t * di;
3330         const uint8_t * da;
3331         const uint8_t * end;
3332         const uint8_t * enda;
3333         uint64_t sz;
3334         uint16_t vers;
3335         uint64_t abbrev_base;
3336         uint64_t abbrev;
3337         uint8_t address_size;
3338         bool dwarf64;
3339
3340         *name = NULL;
3341         *comp_dir = NULL;
3342         *stmt_list = (uint64_t) -1;
3343
3344         if ( (_file->_dwarfDebugInfoSect == NULL) || (_file->_dwarfDebugAbbrevSect == NULL) )
3345                 return false;
3346
3347         debug_info = (uint8_t*)_file->fileContent() + _file->_dwarfDebugInfoSect->offset();
3348         debug_abbrev = (uint8_t*)_file->fileContent() + _file->_dwarfDebugAbbrevSect->offset();
3349         di = debug_info;
3350
3351         if (_file->_dwarfDebugInfoSect->size() < 12)
3352                 /* Too small to be a real debug_info section.  */
3353                 return false;
3354         sz = A::P::E::get32(*(uint32_t*)di);
3355         di += 4;
3356         dwarf64 = sz == 0xffffffff;
3357         if (dwarf64)
3358                 sz = A::P::E::get64(*(uint64_t*)di), di += 8;
3359         else if (sz > 0xffffff00)
3360                 /* Unknown dwarf format.  */
3361                 return false;
3362
3363         /* Verify claimed size.  */
3364         if (sz + (di - debug_info) > _file->_dwarfDebugInfoSect->size() || sz <= (dwarf64 ? 23 : 11))
3365                 return false;
3366
3367         vers = A::P::E::get16(*(uint16_t*)di);
3368         if (vers < 2 || vers > 3)
3369         /* DWARF version wrong for this code.
3370            Chances are we could continue anyway, but we don't know for sure.  */
3371                 return false;
3372         di += 2;
3373
3374         /* Find the debug_abbrev section.  */
3375         abbrev_base = dwarf64 ? A::P::E::get64(*(uint64_t*)di) : A::P::E::get32(*(uint32_t*)di);
3376         di += dwarf64 ? 8 : 4;
3377
3378         if (abbrev_base > _file->_dwarfDebugAbbrevSect->size())
3379                 return false;
3380         da = debug_abbrev + abbrev_base;
3381         enda = debug_abbrev + _file->_dwarfDebugAbbrevSect->size();
3382
3383         address_size = *di++;
3384
3385         /* Find the abbrev number we're looking for.  */
3386         end = di + sz;
3387         abbrev = read_uleb128 (&di, end);
3388         if (abbrev == (uint64_t) -1)
3389                 return false;
3390
3391         /* Skip through the debug_abbrev section looking for that abbrev.  */
3392         for (;;)
3393         {
3394                 uint64_t this_abbrev = read_uleb128 (&da, enda);
3395                 uint64_t attr;
3396
3397                 if (this_abbrev == abbrev)
3398                         /* This is almost always taken.  */
3399                         break;
3400                 skip_leb128 (&da, enda); /* Skip the tag.  */
3401                 if (da == enda)
3402                         return false;
3403                 da++;  /* Skip the DW_CHILDREN_* value.  */
3404
3405                 do {
3406                         attr = read_uleb128 (&da, enda);
3407                         skip_leb128 (&da, enda);
3408                 } while (attr != 0 && attr != (uint64_t) -1);
3409                 if (attr != 0)
3410                         return false;
3411         }
3412
3413         /* Check that the abbrev is one for a DW_TAG_compile_unit.  */
3414         if (read_uleb128 (&da, enda) != DW_TAG_compile_unit)
3415         return false;
3416         if (da == enda)
3417         return false;
3418         da++;  /* Skip the DW_CHILDREN_* value.  */
3419
3420         /* Now, go through the DIE looking for DW_AT_name,
3421          DW_AT_comp_dir, and DW_AT_stmt_list.  */
3422         for (;;)
3423         {
3424                 uint64_t attr = read_uleb128 (&da, enda);
3425                 uint64_t form = read_uleb128 (&da, enda);
3426
3427                 if (attr == (uint64_t) -1)
3428                         return false;
3429                 else if (attr == 0)
3430                         return true;
3431
3432                 if (form == DW_FORM_indirect)
3433                         form = read_uleb128 (&di, end);
3434
3435                 if (attr == DW_AT_name)
3436                         *name = getDwarfString(form, di);
3437                 else if (attr == DW_AT_comp_dir)
3438                         *comp_dir = getDwarfString(form, di);
3439                 else if (attr == DW_AT_stmt_list && form == DW_FORM_data4)
3440                         *stmt_list = A::P::E::get32(*(uint32_t*)di);
3441                 else if (attr == DW_AT_stmt_list && form == DW_FORM_data8)
3442                         *stmt_list = A::P::E::get64(*(uint64_t*)di);
3443                 if (! skip_form (&di, end, form, address_size, dwarf64))
3444                         return false;
3445         }
3446 }
3447
3448
3449
3450 template <typename A>
3451 File<A>::~File()
3452 {
3453         free(_sectionsArray);
3454         free(_atomsArray);
3455 }
3456
3457 template <typename A>
3458 bool File<A>::translationUnitSource(const char** dir, const char** name) const
3459 {
3460         if ( _debugInfoKind == ld::relocatable::File::kDebugInfoDwarf ) {
3461                 *dir = _dwarfTranslationUnitDir;
3462                 *name = _dwarfTranslationUnitFile;
3463                 return (_dwarfTranslationUnitFile != NULL);
3464         }
3465         return false;
3466 }
3467
3468
3469
3470 template <typename A>
3471 bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
3472 {
3473         handler.doFile(*this);
3474         uint8_t* p = _atomsArray;
3475         for(int i=_atomsArrayCount; i > 0; --i) {
3476                 handler.doAtom(*((Atom<A>*)p));
3477                 p += sizeof(Atom<A>);
3478         }
3479         return (_atomsArrayCount != 0);
3480 }
3481
3482 template <typename A>
3483 const char* Section<A>::makeSegmentName(const macho_section<typename A::P>* sect)
3484 {
3485         // mach-o section record only has room for 16-byte seg/sect names
3486         // so a 16-byte name has no trailing zero
3487         const char* name = sect->segname();
3488         if ( strlen(name) < 16 )
3489                 return name;
3490         char* tmp = new char[17];
3491         strlcpy(tmp, name, 17);
3492         return tmp;
3493 }
3494
3495 template <typename A>
3496 const char* Section<A>::makeSectionName(const macho_section<typename A::P>* sect)
3497 {
3498         const char* name = sect->sectname();
3499         if ( strlen(name) < 16 )
3500                 return name;
3501
3502         // special case common long section names so we don't have to malloc
3503         if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
3504                 return "__objc_classrefs";
3505         if ( strncmp(sect->sectname(), "__objc_classlist", 16) == 0 )
3506                 return "__objc_classlist";
3507         if ( strncmp(sect->sectname(), "__objc_nlclslist", 16) == 0 )
3508                 return "__objc_nlclslist";
3509         if ( strncmp(sect->sectname(), "__objc_nlcatlist", 16) == 0 )
3510                 return "__objc_nlcatlist";
3511         if ( strncmp(sect->sectname(), "__objc_protolist", 16) == 0 )
3512                 return "__objc_protolist";
3513         if ( strncmp(sect->sectname(), "__objc_protorefs", 16) == 0 )
3514                 return "__objc_protorefs";
3515         if ( strncmp(sect->sectname(), "__objc_superrefs", 16) == 0 )
3516                 return "__objc_superrefs";
3517         if ( strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0 )
3518                 return "__objc_imageinfo";
3519         if ( strncmp(sect->sectname(), "__objc_stringobj", 16) == 0 )
3520                 return "__objc_stringobj";
3521         if ( strncmp(sect->sectname(), "__gcc_except_tab", 16) == 0 )
3522                 return "__gcc_except_tab";
3523
3524         char* tmp = new char[17];
3525         strlcpy(tmp, name, 17);
3526         return tmp;
3527 }
3528
3529 template <typename A>
3530 bool Section<A>::readable(const macho_section<typename A::P>* sect)
3531 {
3532         return true;
3533 }
3534
3535 template <typename A>
3536 bool Section<A>::writable(const macho_section<typename A::P>* sect)
3537 {
3538         // mach-o .o files do not contain segment permissions
3539         // we just know TEXT is special
3540         return ( strcmp(sect->segname(), "__TEXT") != 0 );
3541 }
3542
3543 template <typename A>
3544 bool Section<A>::exectuable(const macho_section<typename A::P>* sect)
3545 {
3546         // mach-o .o files do not contain segment permissions
3547         // we just know TEXT is special
3548         return ( strcmp(sect->segname(), "__TEXT") == 0 );
3549 }
3550
3551
3552 template <typename A>
3553 ld::Section::Type Section<A>::sectionType(const macho_section<typename A::P>* sect)
3554 {
3555         switch ( sect->flags() & SECTION_TYPE ) {
3556                 case S_ZEROFILL:
3557                         return ld::Section::typeZeroFill;
3558                 case S_CSTRING_LITERALS:
3559                         if ( (strcmp(sect->sectname(), "__cstring") == 0) && (strcmp(sect->segname(), "__TEXT") == 0) )
3560                                 return ld::Section::typeCString;
3561                         else
3562                                 return ld::Section::typeNonStdCString;
3563                 case S_4BYTE_LITERALS:
3564                         return ld::Section::typeLiteral4;
3565                 case S_8BYTE_LITERALS:
3566                         return ld::Section::typeLiteral8;
3567                 case S_LITERAL_POINTERS:
3568                         return ld::Section::typeCStringPointer;
3569                 case S_NON_LAZY_SYMBOL_POINTERS:
3570                         return ld::Section::typeNonLazyPointer;
3571                 case S_LAZY_SYMBOL_POINTERS:
3572                         return ld::Section::typeLazyPointer;
3573                 case S_SYMBOL_STUBS:
3574                         return ld::Section::typeStub;
3575                 case S_MOD_INIT_FUNC_POINTERS:
3576                         return ld::Section::typeInitializerPointers;
3577                 case S_MOD_TERM_FUNC_POINTERS:
3578                         return ld::Section::typeTerminatorPointers;
3579                 case S_INTERPOSING:
3580                         return ld::Section::typeUnclassified;
3581                 case S_16BYTE_LITERALS:
3582                         return ld::Section::typeLiteral16;
3583                 case S_REGULAR:
3584                 case S_COALESCED:
3585                         if ( sect->flags() & S_ATTR_PURE_INSTRUCTIONS ) {
3586                                 return ld::Section::typeCode;
3587                         }
3588                         else if ( strcmp(sect->segname(), "__TEXT") == 0 ) {
3589                                 if ( strcmp(sect->sectname(), "__eh_frame") == 0 )
3590                                         return ld::Section::typeCFI;
3591                                 else if ( strcmp(sect->sectname(), "__ustring") == 0 )
3592                                         return ld::Section::typeUTF16Strings;
3593                                 else if ( strcmp(sect->sectname(), "__textcoal_nt") == 0 )
3594                                         return ld::Section::typeCode;
3595                                 else if ( strcmp(sect->sectname(), "__StaticInit") == 0 )
3596                                         return ld::Section::typeCode;
3597                                 else if ( strcmp(sect->sectname(), "__constructor") == 0 )
3598                                         return ld::Section::typeInitializerPointers;
3599                         }
3600                         else if ( strcmp(sect->segname(), "__DATA") == 0 ) {
3601                                 if ( strcmp(sect->sectname(), "__cfstring") == 0 )
3602                                         return ld::Section::typeCFString;
3603                                 else if ( strcmp(sect->sectname(), "__dyld") == 0 )
3604                                         return ld::Section::typeDyldInfo;
3605                                 else if ( strcmp(sect->sectname(), "__program_vars") == 0 )
3606                                         return ld::Section::typeDyldInfo;
3607                                 else if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
3608                                         return ld::Section::typeObjCClassRefs;
3609                                 else if ( strcmp(sect->sectname(), "__objc_catlist") == 0 )
3610                                         return ld::Section::typeObjC2CategoryList;
3611                         }
3612                         else if ( strcmp(sect->segname(), "__OBJC") == 0 ) {
3613                                 if ( strcmp(sect->sectname(), "__class") == 0 )
3614                                         return ld::Section::typeObjC1Classes;
3615                         }
3616                         break;
3617                 case S_THREAD_LOCAL_REGULAR:
3618                         return ld::Section::typeTLVInitialValues;
3619                 case S_THREAD_LOCAL_ZEROFILL:
3620                         return ld::Section::typeTLVZeroFill;
3621                 case S_THREAD_LOCAL_VARIABLES:
3622                         return ld::Section::typeTLVDefs;
3623                 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
3624                         return ld::Section::typeTLVInitializerPointers;
3625         }
3626         return ld::Section::typeUnclassified;
3627 }
3628
3629
3630 template <typename A>
3631 Atom<A>* Section<A>::findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end)
3632 {
3633         // do a binary search of atom array
3634         uint32_t atomCount = end - start;
3635         Atom<A>* base = start;
3636         for (uint32_t n = atomCount; n > 0; n /= 2) {
3637                 Atom<A>* pivot = &base[n/2];
3638                 pint_t atomStartAddr = pivot->_objAddress;
3639                 pint_t atomEndAddr = atomStartAddr + pivot->_size;
3640                 if ( atomStartAddr <= addr ) {
3641                         // address in normal atom
3642                         if (addr < atomEndAddr)
3643                                 return pivot;
3644                         // address in "end" label (but not in alias)
3645                         if ( (pivot->_size == 0) && (addr == atomEndAddr) && !pivot->isAlias() )
3646                                 return pivot;
3647                 }
3648                 if ( addr >= atomEndAddr ) {
3649                         // key > pivot
3650                         // move base to atom after pivot
3651                         base = &pivot[1];
3652                         --n;
3653                 }
3654                 else {
3655                         // key < pivot
3656                         // keep same base
3657                 }
3658         }
3659         return NULL;
3660 }
3661
3662 template <typename A>
3663 ld::Atom::Alignment Section<A>::alignmentForAddress(pint_t addr)
3664 {
3665         const uint32_t sectionAlignment = this->_machOSection->align();
3666         return ld::Atom::Alignment(sectionAlignment, (addr % (1 << sectionAlignment)));
3667 }
3668
3669 template <typename A>
3670 uint32_t Section<A>::sectionNum(class Parser<A>& parser) const
3671 {
3672         if ( _machOSection == NULL )
3673                 return 0;
3674         else
3675                 return 1 + (this->_machOSection - parser.firstMachOSection());
3676 }
3677
3678 // arm does not have zero cost exceptions
3679 template <> uint32_t CFISection<arm>::cfiCount() { return 0; }
3680
3681 template <typename A>
3682 uint32_t CFISection<A>::cfiCount()
3683 {
3684         // create ObjectAddressSpace object for use by libunwind
3685         OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
3686         return libunwind::CFI_Parser<OAS>::getCFICount(oas,
3687                                                                                 this->_machOSection->addr(), this->_machOSection->size());
3688 }
3689
3690 template <typename A>
3691 void CFISection<A>::warnFunc(void* ref, uint64_t funcAddr, const char* msg)
3692 {
3693         Parser<A>* parser = (Parser<A>*)ref;
3694         if ( ! parser->convertUnwindInfo() )
3695                 return;
3696         if ( funcAddr != CFI_INVALID_ADDRESS ) {
3697                 // atoms are not constructed yet, so scan symbol table for labels
3698                 const char* name = parser->scanSymbolTableForAddress(funcAddr);
3699                 warning("could not create compact unwind for %s: %s", name, msg);
3700         }
3701         else {
3702                 warning("could not create compact unwind: %s", msg);
3703         }
3704 }
3705
3706 template <>
3707 bool CFISection<x86_64>::needsRelocating()
3708 {
3709         return true;
3710 }
3711
3712 template <typename A>
3713 bool CFISection<A>::needsRelocating()
3714 {
3715         return false;
3716 }
3717
3718 template <>
3719 void CFISection<x86_64>::cfiParse(class Parser<x86_64>& parser, uint8_t* buffer,
3720                                                                         libunwind::CFI_Atom_Info<CFISection<x86_64>::OAS>::CFI_Atom_Info cfiArray[],
3721                                                                         uint32_t count)
3722 {
3723         // copy __eh_frame data to buffer
3724         memcpy(buffer, file().fileContent() + this->_machOSection->offset(), this->_machOSection->size());
3725
3726         // and apply relocations
3727         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + this->_machOSection->reloff());
3728         const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
3729         for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
3730                 uint64_t value = 0;
3731                 switch ( reloc->r_type() ) {
3732                         case X86_64_RELOC_SUBTRACTOR:
3733                                 value =  0 - parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3734                                 ++reloc;
3735                                 if ( reloc->r_extern() )
3736                                         value += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3737                                 break;
3738                         case X86_64_RELOC_UNSIGNED:
3739                                 value = parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3740                                 break;
3741                         case X86_64_RELOC_GOT:
3742                                 // this is used for the reference to the personality function in CIEs
3743                                 // store the symbol number of the personality function for later use as a Fixup
3744                                 value = reloc->r_symbolnum();
3745                                 break;
3746                         default:
3747                                 fprintf(stderr, "CFISection::cfiParse() unexpected relocation type at r_address=0x%08X\n", reloc->r_address());
3748                                 break;
3749                 }
3750                 uint64_t*       p64;
3751                 uint32_t*       p32;
3752                 switch ( reloc->r_length() ) {
3753                         case 3:
3754                                 p64 = (uint64_t*)&buffer[reloc->r_address()];
3755                                 E::set64(*p64, value + E::get64(*p64));
3756                                 break;
3757                         case 2:
3758                                 p32 = (uint32_t*)&buffer[reloc->r_address()];
3759                                 E::set32(*p32, value + E::get32(*p32));
3760                                 break;
3761                         default:
3762                                 fprintf(stderr, "CFISection::cfiParse() unexpected relocation size at r_address=0x%08X\n", reloc->r_address());
3763                                 break;
3764                 }
3765         }
3766
3767
3768         // create ObjectAddressSpace object for use by libunwind
3769         OAS oas(*this, buffer);
3770
3771         // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
3772         const char* msg;
3773         msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86_64>::parseCFIs(
3774                                                         oas, this->_machOSection->addr(), this->_machOSection->size(),
3775                                                         cfiArray, count, (void*)&parser, warnFunc);
3776         if ( msg != NULL )
3777                 throwf("malformed __eh_frame section: %s", msg);
3778 }
3779
3780 template <>
3781 void CFISection<x86>::cfiParse(class Parser<x86>& parser, uint8_t* buffer,
3782                                                                         libunwind::CFI_Atom_Info<CFISection<x86>::OAS>::CFI_Atom_Info cfiArray[],
3783                                                                         uint32_t count)
3784 {
3785         // create ObjectAddressSpace object for use by libunwind
3786         OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
3787
3788         // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
3789         const char* msg;
3790         msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86>::parseCFIs(
3791                                                         oas, this->_machOSection->addr(), this->_machOSection->size(),
3792                                                         cfiArray, count, (void*)&parser, warnFunc);
3793         if ( msg != NULL )
3794                 throwf("malformed __eh_frame section: %s", msg);
3795 }
3796
3797
3798
3799
3800 template <>
3801 void CFISection<arm>::cfiParse(class Parser<arm>& parser, uint8_t* buffer,
3802                                                                         libunwind::CFI_Atom_Info<CFISection<arm>::OAS>::CFI_Atom_Info cfiArray[],
3803                                                                         uint32_t count)
3804 {
3805         // arm does not use zero cost exceptions
3806         assert(count == 0);
3807 }
3808
3809
3810
3811 template <typename A>
3812 uint32_t CFISection<A>::computeAtomCount(class Parser<A>& parser,
3813                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
3814                                                                                         const struct Parser<A>::CFI_CU_InfoArrays& cfis)
3815 {
3816         return cfis.cfiCount;
3817 }
3818
3819
3820
3821 template <typename A>
3822 uint32_t CFISection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
3823                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
3824                                                                         const struct Parser<A>::CFI_CU_InfoArrays& cfis)
3825 {
3826         this->_beginAtoms = (Atom<A>*)p;
3827         // walk CFI_Atom_Info array and create atom for each entry
3828         const CFI_Atom_Info* start = &cfis.cfiArray[0];
3829         const CFI_Atom_Info* end   = &cfis.cfiArray[cfis.cfiCount];
3830         for(const CFI_Atom_Info* a=start; a < end; ++a) {
3831                 Atom<A>* space = (Atom<A>*)p;
3832                 new (space) Atom<A>(*this, (a->isCIE ? "CIE" : "FDE"), a->address, a->size,
3833                                                                                 ld::Atom::definitionRegular, ld::Atom::combineNever, ld::Atom::scopeTranslationUnit,
3834                                                                                 ld::Atom::typeCFI, ld::Atom::symbolTableNotInFinalLinkedImages,
3835                                                                                 false, false, false, ld::Atom::Alignment(0));
3836                 p += sizeof(Atom<A>);
3837         }
3838         this->_endAtoms = (Atom<A>*)p;
3839         return cfis.cfiCount;
3840 }
3841
3842
3843 template <> bool CFISection<x86_64>::bigEndian() { return false; }
3844 template <> bool CFISection<x86>::bigEndian() { return false; }
3845 template <> bool CFISection<arm>::bigEndian() { return false; }
3846
3847
3848 template <>
3849 void CFISection<x86_64>::addCiePersonalityFixups(class Parser<x86_64>& parser, const CFI_Atom_Info* cieInfo)
3850 {
3851         uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
3852         if ( personalityEncoding == 0x9B ) {
3853                 // compiler always produces X86_64_RELOC_GOT with addend of 4 to personality function
3854                 // CFISection<x86_64>::cfiParse() set targetAddress to be symbolIndex + 4 + addressInCIE
3855                 uint32_t symbolIndex = cieInfo->u.cieInfo.personality.targetAddress - 4
3856                                                                         - cieInfo->address - cieInfo->u.cieInfo.personality.offsetInCFI;
3857                 const macho_nlist<P>& sym = parser.symbolFromIndex(symbolIndex);
3858                 const char* personalityName = parser.nameFromSymbol(sym);
3859
3860                 Atom<x86_64>* cieAtom = this->findAtomByAddress(cieInfo->address);
3861                 Parser<x86_64>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
3862                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, personalityName);
3863                 parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, 4);
3864                 parser.addFixup(src, ld::Fixup::k3of3, ld::Fixup::kindStoreX86PCRel32GOT);
3865         }
3866         else if ( personalityEncoding != 0 ) {
3867                 throwf("unsupported address encoding (%02X) of personality function in CIE",
3868                                 personalityEncoding);
3869         }
3870 }
3871
3872 template <>
3873 void CFISection<x86>::addCiePersonalityFixups(class Parser<x86>& parser, const CFI_Atom_Info* cieInfo)
3874 {
3875         uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
3876         if ( (personalityEncoding == 0x9B) || (personalityEncoding == 0x90) ) {
3877                 uint32_t offsetInCFI = cieInfo->u.cieInfo.personality.offsetInCFI;
3878                 uint32_t nlpAddr = cieInfo->u.cieInfo.personality.targetAddress;
3879                 Atom<x86>* cieAtom = this->findAtomByAddress(cieInfo->address);
3880                 Atom<x86>* nlpAtom = parser.findAtomByAddress(nlpAddr);
3881                 assert(nlpAtom->contentType() == ld::Atom::typeNonLazyPointer);
3882                 Parser<x86>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
3883
3884                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, nlpAtom);
3885                 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
3886                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, offsetInCFI);
3887                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
3888         }
3889         else if ( personalityEncoding != 0 ) {
3890                 throwf("unsupported address encoding (%02X) of personality function in CIE", personalityEncoding);
3891         }
3892 }
3893
3894
3895 template <typename A>
3896 void CFISection<A>::addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo)
3897 {
3898         // FIX ME
3899         assert(0);
3900 }
3901
3902 template <typename A>
3903 void CFISection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cfis)
3904 {
3905         ld::Fixup::Kind store32 = bigEndian() ? ld::Fixup::kindStoreBigEndian32 : ld::Fixup::kindStoreLittleEndian32;
3906         ld::Fixup::Kind store64 = bigEndian() ? ld::Fixup::kindStoreBigEndian64 : ld::Fixup::kindStoreLittleEndian64;
3907
3908         // add all references for FDEs, including implicit group references
3909         const CFI_Atom_Info* end = &cfis.cfiArray[cfis.cfiCount];
3910         for(const CFI_Atom_Info* p = &cfis.cfiArray[0]; p < end; ++p) {
3911                 if ( p->isCIE ) {
3912                         // add reference to personality function if used
3913                         if ( p->u.cieInfo.personality.targetAddress != CFI_INVALID_ADDRESS ) {
3914                                 this->addCiePersonalityFixups(parser, p);
3915                         }
3916                 }
3917                 else {
3918                         // find FDE Atom
3919                         Atom<A>* fdeAtom = this->findAtomByAddress(p->address);
3920                         // find function Atom
3921                         Atom<A>* functionAtom = parser.findAtomByAddress(p->u.fdeInfo.function.targetAddress);
3922                         // find CIE Atom
3923                         Atom<A>* cieAtom = this->findAtomByAddress(p->u.fdeInfo.cie.targetAddress);
3924                         // find LSDA Atom
3925                         Atom<A>* lsdaAtom = NULL;
3926                         if ( p->u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS ) {
3927                                 lsdaAtom = parser.findAtomByAddress(p->u.fdeInfo.lsda.targetAddress);
3928                         }
3929                         // add reference from FDE to CIE (always 32-bit pc-rel)
3930                         typename Parser<A>::SourceLocation fdeToCieSrc(fdeAtom, p->u.fdeInfo.cie.offsetInCFI);
3931                         parser.addFixup(fdeToCieSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, fdeAtom);
3932                         parser.addFixup(fdeToCieSrc, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, p->u.fdeInfo.cie.offsetInCFI);
3933                         parser.addFixup(fdeToCieSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
3934                         parser.addFixup(fdeToCieSrc, ld::Fixup::k4of4, store32, cieAtom);
3935
3936                         // add reference from FDE to function
3937                         typename Parser<A>::SourceLocation fdeToFuncSrc(fdeAtom, p->u.fdeInfo.function.offsetInCFI);
3938                         switch (p->u.fdeInfo.function.encodingOfTargetAddress) {
3939                                 case DW_EH_PE_pcrel|DW_EH_PE_ptr:
3940                                         if ( sizeof(typename A::P::uint_t) == 8 ) {
3941                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
3942                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3943                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
3944                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store64);
3945                                                 break;
3946                                         }
3947                                         // else fall into 32-bit case
3948                                 case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
3949                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
3950                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3951                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
3952                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store32);
3953                                         break;
3954                                 default:
3955                                         throw "unsupported encoding in FDE of pointer to function";
3956                         }
3957
3958                         // add reference from FDE to LSDA
3959                         typename Parser<A>::SourceLocation fdeToLsdaSrc(fdeAtom,  p->u.fdeInfo.lsda.offsetInCFI);
3960                         if ( lsdaAtom != NULL ) {
3961                                 switch (p->u.fdeInfo.lsda.encodingOfTargetAddress) {
3962                                         case DW_EH_PE_pcrel|DW_EH_PE_ptr:
3963                                                 if ( sizeof(typename A::P::uint_t) == 8 ) {
3964                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
3965                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3966                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
3967                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store64);
3968                                                         break;
3969                                                 }
3970                                                 // else fall into 32-bit case
3971                                         case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
3972                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
3973                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3974                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
3975                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store32);
3976                                         break;
3977                                         default:
3978                                                 throw "unsupported encoding in FDE of pointer to LSDA";
3979                                 }
3980                         }
3981
3982                         // FDE is in group lead by function atom
3983                         typename Parser<A>::SourceLocation fdeSrc(functionAtom,0);
3984                         parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateFDE, fdeAtom);
3985
3986                         // LSDA is in group lead by function atom
3987                         if ( lsdaAtom != NULL ) {
3988                                 parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, lsdaAtom);
3989                         }
3990                 }
3991         }
3992 }
3993
3994
3995
3996
3997 template <typename A>
3998 const void*      CFISection<A>::OAS::mappedAddress(pint_t addr)
3999 {
4000         if ( (_ehFrameStartAddr <= addr) && (addr < _ehFrameEndAddr) )
4001                 return &_ehFrameContent[addr-_ehFrameStartAddr];
4002         else {
4003                 // requested bytes are not in __eh_frame section
4004                 // this can occur when examining the instruction bytes in the __text
4005                 File<A>& file = _ehFrameSection.file();
4006                 for (uint32_t i=0; i < file._sectionsArrayCount; ++i ) {
4007                         const macho_section<typename A::P>* sect = file._sectionsArray[i]->machoSection();
4008                         // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
4009                         if ( sect != NULL ) {
4010                                 if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
4011                                         return file.fileContent() + sect->offset() + addr - sect->addr();
4012                                 }
4013                         }
4014                 }
4015                 throwf("__eh_frame parsing problem.  Can't find target of reference to address 0x%08llX", (uint64_t)addr);
4016         }
4017 }
4018
4019
4020 template <typename A>
4021 uint64_t CFISection<A>::OAS::getULEB128(pint_t& logicalAddr, pint_t end)
4022 {
4023         uintptr_t size = (end - logicalAddr);
4024         libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
4025         libunwind::LocalAddressSpace::pint_t sladdr = laddr;
4026         uint64_t result = libunwind::LocalAddressSpace::getULEB128(laddr, laddr+size);
4027         logicalAddr += (laddr-sladdr);
4028         return result;
4029 }
4030
4031 template <typename A>
4032 int64_t CFISection<A>::OAS::getSLEB128(pint_t& logicalAddr, pint_t end)
4033 {
4034         uintptr_t size = (end - logicalAddr);
4035         libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
4036         libunwind::LocalAddressSpace::pint_t sladdr = laddr;
4037         int64_t result = libunwind::LocalAddressSpace::getSLEB128(laddr, laddr+size);
4038         logicalAddr += (laddr-sladdr);
4039         return result;
4040 }
4041
4042 template <typename A>
4043 typename A::P::uint_t CFISection<A>::OAS::getEncodedP(pint_t& addr, pint_t end, uint8_t encoding)
4044 {
4045         pint_t startAddr = addr;
4046         pint_t p = addr;
4047         pint_t result;
4048
4049         // first get value
4050         switch (encoding & 0x0F) {
4051                 case DW_EH_PE_ptr:
4052                         result = getP(addr);
4053                         p += sizeof(pint_t);
4054                         addr = (pint_t)p;
4055                         break;
4056                 case DW_EH_PE_uleb128:
4057                         result = getULEB128(addr, end);
4058                         break;
4059                 case DW_EH_PE_udata2:
4060                         result = get16(addr);
4061                         p += 2;
4062                         addr = (pint_t)p;
4063                         break;
4064                 case DW_EH_PE_udata4:
4065                         result = get32(addr);
4066                         p += 4;
4067                         addr = (pint_t)p;
4068                         break;
4069                 case DW_EH_PE_udata8:
4070                         result = get64(addr);
4071                         p += 8;
4072                         addr = (pint_t)p;
4073                         break;
4074                 case DW_EH_PE_sleb128:
4075                         result = getSLEB128(addr, end);
4076                         break;
4077                 case DW_EH_PE_sdata2:
4078                         result = (int16_t)get16(addr);
4079                         p += 2;
4080                         addr = (pint_t)p;
4081                         break;
4082                 case DW_EH_PE_sdata4:
4083                         result = (int32_t)get32(addr);
4084                         p += 4;
4085                         addr = (pint_t)p;
4086                         break;
4087                 case DW_EH_PE_sdata8:
4088                         result = get64(addr);
4089                         p += 8;
4090                         addr = (pint_t)p;
4091                         break;
4092                 default:
4093                         throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4094         }
4095
4096         // then add relative offset
4097         switch ( encoding & 0x70 ) {
4098                 case DW_EH_PE_absptr:
4099                         // do nothing
4100                         break;
4101                 case DW_EH_PE_pcrel:
4102                         result += startAddr;
4103                         break;
4104                 case DW_EH_PE_textrel:
4105                         throw "DW_EH_PE_textrel pointer encoding not supported";
4106                         break;
4107                 case DW_EH_PE_datarel:
4108                         throw "DW_EH_PE_datarel pointer encoding not supported";
4109                         break;
4110                 case DW_EH_PE_funcrel:
4111                         throw "DW_EH_PE_funcrel pointer encoding not supported";
4112                         break;
4113                 case DW_EH_PE_aligned:
4114                         throw "DW_EH_PE_aligned pointer encoding not supported";
4115                         break;
4116                 default:
4117                         throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4118                         break;
4119         }
4120
4121 //  Note: DW_EH_PE_indirect is only used in CIEs to refernce the personality pointer
4122 //  When parsing .o files that pointer contains zero, so we don't to return that.
4123 //  Instead we skip the dereference and return the address of the pointer.
4124 //      if ( encoding & DW_EH_PE_indirect )
4125 //              result = getP(result);
4126
4127         return result;
4128 }
4129
4130 template <>
4131 const char* CUSection<x86_64>::personalityName(class Parser<x86_64>& parser, const macho_relocation_info<x86_64::P>* reloc)
4132 {
4133         assert(reloc->r_extern() && "reloc not extern on personality column in __compact_unwind section");
4134         assert((reloc->r_type() == X86_64_RELOC_UNSIGNED) && "wrong reloc type on personality column in __compact_unwind section");
4135         const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4136         return parser.nameFromSymbol(sym);
4137 }
4138
4139 template <>
4140 const char* CUSection<x86>::personalityName(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
4141 {
4142         assert(reloc->r_extern() && "reloc not extern on personality column in __compact_unwind section");
4143         assert((reloc->r_type() == GENERIC_RELOC_VANILLA) && "wrong reloc type on personality column in __compact_unwind section");
4144         const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4145         return parser.nameFromSymbol(sym);
4146 }
4147
4148 template <typename A>
4149 const char* CUSection<A>::personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
4150 {
4151         return NULL;
4152 }
4153
4154
4155 template <typename A>
4156 int CUSection<A>::infoSorter(const void* l, const void* r)
4157 {
4158         // sort references by symbol index, then address
4159         const Info* left = (Info*)l;
4160         const Info* right = (Info*)r;
4161         if ( left->functionSymbolIndex == right->functionSymbolIndex )
4162                 return (left->functionStartAddress - right->functionStartAddress);
4163         else
4164                 return (left->functionSymbolIndex - right->functionSymbolIndex);
4165 }
4166
4167 template <typename A>
4168 void CUSection<A>::parse(class Parser<A>& parser, uint32_t cnt, Info array[])
4169 {
4170         // walk section content and copy to Info array
4171         const macho_compact_unwind_entry<P>* const entries = (macho_compact_unwind_entry<P>*)(this->file().fileContent() + this->_machOSection->offset());
4172         for (uint32_t i=0; i < cnt; ++i) {
4173                 Info* info = &array[i];
4174                 const macho_compact_unwind_entry<P>* entry = &entries[i];
4175                 info->functionStartAddress      = entry->codeStart();
4176                 info->functionSymbolIndex   = 0xFFFFFFFF;
4177                 info->rangeLength                       = entry->codeLen();
4178                 info->compactUnwindInfo         = entry->compactUnwindInfo();
4179                 info->personality                       = NULL;
4180                 info->lsdaAddress                       = entry->lsda();
4181                 info->function                          = NULL;
4182                 info->lsda                                      = NULL;
4183                 if ( (info->compactUnwindInfo & UNWIND_PERSONALITY_MASK) != 0 )
4184                         warning("no bits should be set in UNWIND_PERSONALITY_MASK of compact unwind encoding in __LD,__compact_unwind section");
4185                 if ( info->lsdaAddress != 0 ) {
4186                         info->compactUnwindInfo |= UNWIND_HAS_LSDA;
4187                 }
4188         }
4189
4190         // scan relocs, local relocs are useless - ignore them
4191         // extern relocs are needed for personality references (possibly for function/lsda refs??)
4192         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(this->file().fileContent() + this->_machOSection->reloff());
4193         const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
4194         for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
4195                 if ( reloc->r_extern() ) {
4196                         // only expect external relocs on some colummns
4197                         if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::personalityFieldOffset() ) {
4198                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4199                                 array[entryIndex].personality = this->personalityName(parser, reloc);
4200                         }
4201                         else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::lsdaFieldOffset() ) {
4202                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4203                                 const macho_nlist<P>& lsdaSym = parser.symbolFromIndex(reloc->r_symbolnum());
4204                                 if ( (lsdaSym.n_type() & N_TYPE) == N_SECT )
4205                                         array[entryIndex].lsdaAddress = lsdaSym.n_value();
4206                                 else
4207                                         warning("unexpected extern relocation to lsda in __compact_unwind section");
4208                         }
4209                         else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::codeStartFieldOffset() ) {
4210                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4211                                 array[entryIndex].functionSymbolIndex = reloc->r_symbolnum();
4212                         }
4213                         else {
4214                                 warning("unexpected extern relocation in __compact_unwind section");
4215                         }
4216                 }
4217         }
4218
4219         // sort array by function start address so unwind infos will be contiguous for a given function
4220         ::qsort(array, cnt, sizeof(Info), infoSorter);
4221 }
4222
4223 template <typename A>
4224 uint32_t CUSection<A>::count()
4225 {
4226         const macho_section<P>* machoSect =     this->machoSection();
4227         if ( (machoSect->size() % sizeof(macho_compact_unwind_entry<P>)) != 0 )
4228                 throw "malformed __LD,__compact_unwind section, bad length";
4229
4230         return machoSect->size() / sizeof(macho_compact_unwind_entry<P>);
4231 }
4232
4233 template <typename A>
4234 void CUSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cus)
4235 {
4236         Info* const arrayStart = cus.cuArray;
4237         Info* const arrayEnd = &cus.cuArray[cus.cuCount];
4238         for (Info* info=arrayStart; info < arrayEnd; ++info) {
4239                 // if external reloc was used, real address is symbol n_value + addend
4240                 if ( info->functionSymbolIndex != 0xFFFFFFFF )
4241                         info->functionStartAddress += parser.symbolFromIndex(info->functionSymbolIndex).n_value();
4242                 // find function atom from address
4243                 info->function = parser.findAtomByAddress(info->functionStartAddress);
4244                 // find lsda atom from address
4245                 if ( info->lsdaAddress != 0 ) {
4246                         info->lsda = parser.findAtomByAddress(info->lsdaAddress);
4247                         // add lsda subordinate
4248                         typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4249                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, info->lsda);
4250                 }
4251                 if ( info->personality != NULL ) {
4252                         // add personality subordinate
4253                         typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4254                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinatePersonality, false, info->personality);
4255                 }
4256         }
4257
4258 }
4259
4260 template <typename A>
4261 SymboledSection<A>::SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
4262         : Section<A>(f, s), _type(ld::Atom::typeUnclassified)
4263 {
4264         switch ( s->flags() & SECTION_TYPE ) {
4265                 case S_ZEROFILL:
4266                         _type = ld::Atom::typeZeroFill;
4267                         break;
4268                 case S_MOD_INIT_FUNC_POINTERS:
4269                         _type = ld::Atom::typeInitializerPointers;
4270                         break;
4271                 case S_MOD_TERM_FUNC_POINTERS:
4272                         _type = ld::Atom::typeTerminatorPointers;
4273                         break;
4274                 case S_THREAD_LOCAL_VARIABLES:
4275                         _type = ld::Atom::typeTLV;
4276                         break;
4277                 case S_THREAD_LOCAL_ZEROFILL:
4278                         _type = ld::Atom::typeTLVZeroFill;
4279                         break;
4280                 case S_THREAD_LOCAL_REGULAR:
4281                         _type = ld::Atom::typeTLVInitialValue;
4282                         break;
4283                 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
4284                         _type = ld::Atom::typeTLVInitializerPointers;
4285                         break;
4286                 case S_REGULAR:
4287                         if ( strncmp(s->sectname(), "__gcc_except_tab", 16) == 0 )
4288                                 _type = ld::Atom::typeLSDA;
4289                         else if ( this->type() == ld::Section::typeInitializerPointers )
4290                                 _type = ld::Atom::typeInitializerPointers;
4291                         break;
4292         }
4293 }
4294
4295
4296 template <typename A>
4297 bool SymboledSection<A>::dontDeadStrip()
4298 {
4299         switch ( _type ) {
4300                 case ld::Atom::typeInitializerPointers:
4301                 case ld::Atom::typeTerminatorPointers:
4302                         return true;
4303                 default:
4304                         // model an object file without MH_SUBSECTIONS_VIA_SYMBOLS as one in which nothing can be dead stripped
4305                         if ( ! this->_file.canScatterAtoms() )
4306                                 return true;
4307                         // call inherited
4308                         return Section<A>::dontDeadStrip();
4309         }
4310         return false;
4311 }
4312
4313
4314 template <typename A>
4315 uint32_t SymboledSection<A>::computeAtomCount(class Parser<A>& parser,
4316                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
4317                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&)
4318 {
4319         const pint_t startAddr = this->_machOSection->addr();
4320         const pint_t endAddr = startAddr + this->_machOSection->size();
4321         const uint32_t sectNum = this->sectionNum(parser);
4322
4323         uint32_t count = 0;
4324         pint_t  addr;
4325         pint_t  size;
4326         const macho_nlist<P>* sym;
4327         while ( it.next(parser, sectNum, startAddr, endAddr, &addr, &size, &sym) ) {
4328                 ++count;
4329         }
4330         //fprintf(stderr, "computeAtomCount(%s,%s) => %d\n", this->segmentName(), this->sectionName(), count);
4331         return count;
4332 }
4333
4334 template <typename A>
4335 uint32_t SymboledSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4336                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
4337                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
4338 {
4339         this->_beginAtoms = (Atom<A>*)p;
4340
4341         //fprintf(stderr, "SymboledSection::appendAtoms() in section %s\n", this->_machOSection->sectname());
4342         const pint_t startAddr = this->_machOSection->addr();
4343         const pint_t endAddr = startAddr + this->_machOSection->size();
4344         const uint32_t sectNum = this->sectionNum(parser);
4345
4346         uint32_t count = 0;
4347         pint_t  addr;
4348         pint_t  size;
4349         const macho_nlist<P>* label;
4350         while ( it.next(parser, sectNum, startAddr, endAddr, &addr, &size, &label) ) {
4351                 Atom<A>* allocatedSpace = (Atom<A>*)p;
4352                 // is break because of label or CFI?
4353                 if ( label != NULL ) {
4354                         // The size is computed based on the address of the next label (or the end of the section for the last label)
4355                         // If there are two labels at the same address, we want them one to be an alias of the other.
4356                         // If the label is at the end of a section, it is has zero size, but is not an alias
4357                         const bool isAlias = ( (size == 0) && (addr <  endAddr) );
4358                         new (allocatedSpace) Atom<A>(*this, parser, *label, size, isAlias);
4359                         if ( isAlias )
4360                                 this->_hasAliases = true;
4361                 }
4362                 else {
4363                         ld::Atom::SymbolTableInclusion inclusion = ld::Atom::symbolTableNotIn;
4364                         ld::Atom::ContentType ctype = this->contentType();
4365                         if ( ctype == ld::Atom::typeLSDA )
4366                                 inclusion = ld::Atom::symbolTableInWithRandomAutoStripLabel;
4367                         new (allocatedSpace) Atom<A>(*this, "anon", addr, size, ld::Atom::definitionRegular, ld::Atom::combineNever,
4368                                                                                 ld::Atom::scopeTranslationUnit, ctype, inclusion,
4369                                                                                 this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
4370                 }
4371                 p += sizeof(Atom<A>);
4372                 ++count;
4373         }
4374
4375         this->_endAtoms = (Atom<A>*)p;
4376         return count;
4377 }
4378
4379
4380 template <typename A>
4381 uint32_t ImplicitSizeSection<A>::computeAtomCount(class Parser<A>& parser,
4382                                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
4383                                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
4384 {
4385         uint32_t count = 0;
4386         const macho_section<P>* sect = this->machoSection();
4387         const pint_t startAddr = sect->addr();
4388         const pint_t endAddr = startAddr + sect->size();
4389         for (pint_t addr = startAddr; addr < endAddr; addr += elementSizeAtAddress(addr) ) {
4390                 if ( useElementAt(parser, it, addr) )
4391                         ++count;
4392         }
4393         if ( it.fileHasOverlappingSymbols && (sect->size() != 0) && (this->combine(parser, startAddr) == ld::Atom::combineByNameAndContent) ) {
4394                 // if there are multiple labels in this section for the same address, then clone them into multi atoms
4395                 pint_t  prevSymbolAddr = (pint_t)(-1);
4396                 uint8_t prevSymbolSectNum = 0;
4397                 for(uint32_t i=0; i < it.sortedSymbolCount; ++i) {
4398                         const macho_nlist<P>& sym = parser.symbolFromIndex(it.sortedSymbolIndexes[i]);
4399                         const pint_t symbolAddr = sym.n_value();
4400                         const pint_t symbolSectNum = sym.n_sect();
4401                         if ( (symbolAddr == prevSymbolAddr) && (prevSymbolSectNum == symbolSectNum) && (symbolSectNum == this->sectionNum(parser)) ) {
4402                                 ++count;
4403                         }
4404                         prevSymbolAddr = symbolAddr;
4405                         prevSymbolSectNum = symbolSectNum;
4406                 }
4407         }
4408         return count;
4409 }
4410
4411 template <typename A>
4412 uint32_t ImplicitSizeSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4413                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
4414                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
4415 {
4416         this->_beginAtoms = (Atom<A>*)p;
4417
4418         const macho_section<P>* sect = this->machoSection();
4419         const pint_t startAddr = sect->addr();
4420         const pint_t endAddr = startAddr + sect->size();
4421         const uint32_t sectNum = this->sectionNum(parser);
4422         //fprintf(stderr, "ImplicitSizeSection::appendAtoms() in section %s\n", sect->sectname());
4423         uint32_t count = 0;
4424         pint_t  foundAddr;
4425         pint_t  size;
4426         const macho_nlist<P>* foundLabel;
4427         Atom<A>* allocatedSpace;
4428         while ( it.next(parser, sectNum, startAddr, endAddr, &foundAddr, &size, &foundLabel) ) {
4429                 if ( foundLabel != NULL ) {
4430                         pint_t labeledAtomSize = this->elementSizeAtAddress(foundAddr);
4431                         allocatedSpace = (Atom<A>*)p;
4432                         if ( this->ignoreLabel(parser.nameFromSymbol(*foundLabel)) ) {
4433                                 //fprintf(stderr, "  0x%08llX make annon\n", (uint64_t)foundAddr);
4434                                 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, foundAddr), foundAddr,
4435                                                                                         this->elementSizeAtAddress(foundAddr), this->definition(),
4436                                                                                         this->combine(parser, foundAddr), this->scopeAtAddress(parser, foundAddr),
4437                                                                                         this->contentType(), this->symbolTableInclusion(),
4438                                                                                         this->dontDeadStrip(), false, false, this->alignmentForAddress(foundAddr));
4439                         }
4440                         else {
4441                                 // make named atom for label
4442                                 //fprintf(stderr, "  0x%08llX make labeled\n", (uint64_t)foundAddr);
4443                                 new (allocatedSpace) Atom<A>(*this, parser, *foundLabel, labeledAtomSize);
4444                         }
4445                         ++count;
4446                         p += sizeof(Atom<A>);
4447                         foundAddr += labeledAtomSize;
4448                         size -= labeledAtomSize;
4449                 }
4450                 // some number of anonymous atoms
4451                 for (pint_t addr = foundAddr; addr < (foundAddr+size); addr += elementSizeAtAddress(addr) ) {
4452                         // make anon atoms for area before label
4453                         if ( this->useElementAt(parser, it, addr) ) {
4454                                 //fprintf(stderr, "  0x%08llX make annon\n", (uint64_t)addr);
4455                                 allocatedSpace = (Atom<A>*)p;
4456                                 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, addr), addr, this->elementSizeAtAddress(addr),
4457                                                                                         this->definition(), this->combine(parser, addr), this->scopeAtAddress(parser, addr),
4458                                                                                         this->contentType(), this->symbolTableInclusion(),
4459                                                                                         this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
4460                                 ++count;
4461                                 p += sizeof(Atom<A>);
4462                         }
4463                 }
4464         }
4465
4466         this->_endAtoms = (Atom<A>*)p;
4467
4468         return count;
4469 }
4470
4471
4472 template <typename A>
4473 unsigned long Literal4Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4474 {
4475         const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
4476         return *literalContent;
4477 }
4478
4479 template <typename A>
4480 bool Literal4Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4481                                                                                                         const ld::IndirectBindingTable& ind) const
4482 {
4483         assert(this->type() == rhs.section().type());
4484         const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
4485
4486         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4487         assert(rhsAtom != NULL);
4488         if ( rhsAtom != NULL ) {
4489                 const uint32_t* rhsLiteralContent = (uint32_t*)rhsAtom->contentPointer();
4490                 return (*literalContent == *rhsLiteralContent);
4491         }
4492         return false;
4493 }
4494
4495
4496 template <typename A>
4497 unsigned long Literal8Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4498 {
4499 #if __LP64__
4500         const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4501         return *literalContent;
4502 #else
4503         unsigned long hash = 5381;
4504         const uint8_t* byteContent = atom->contentPointer();
4505         for (int i=0; i < 8; ++i) {
4506                 hash = hash * 33 + byteContent[i];
4507         }
4508         return hash;
4509 #endif
4510 }
4511
4512 template <typename A>
4513 bool Literal8Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4514                                                                                                         const ld::IndirectBindingTable& ind) const
4515 {
4516         if ( rhs.section().type() != ld::Section::typeLiteral8 )
4517                 return false;
4518         assert(this->type() == rhs.section().type());
4519         const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4520
4521         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4522         assert(rhsAtom != NULL);
4523         if ( rhsAtom != NULL ) {
4524                 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
4525                 return (*literalContent == *rhsLiteralContent);
4526         }
4527         return false;
4528 }
4529
4530
4531 template <typename A>
4532 unsigned long Literal16Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4533 {
4534         unsigned long hash = 5381;
4535         const uint8_t* byteContent = atom->contentPointer();
4536         for (int i=0; i < 16; ++i) {
4537                 hash = hash * 33 + byteContent[i];
4538         }
4539         return hash;
4540 }
4541
4542 template <typename A>
4543 bool Literal16Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4544                                                                                                         const ld::IndirectBindingTable& ind) const
4545 {
4546         if ( rhs.section().type() != ld::Section::typeLiteral16 )
4547                 return false;
4548         assert(this->type() == rhs.section().type());
4549         const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4550
4551         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4552         assert(rhsAtom != NULL);
4553         if ( rhsAtom != NULL ) {
4554                 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
4555                 return ((literalContent[0] == rhsLiteralContent[0]) && (literalContent[1] == rhsLiteralContent[1]));
4556         }
4557         return false;
4558 }
4559
4560
4561
4562 template <typename A>
4563 typename A::P::uint_t CStringSection<A>::elementSizeAtAddress(pint_t addr)
4564 {
4565         const macho_section<P>* sect = this->machoSection();
4566         const char* stringContent = (char*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
4567         return strlen(stringContent) + 1;
4568 }
4569
4570 template <typename A>
4571 bool CStringSection<A>::useElementAt(Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
4572 {
4573         return true;
4574 }
4575
4576 template <typename A>
4577 bool CStringSection<A>::ignoreLabel(const char* label)
4578 {
4579         return (label[0] == 'L') || (label[0] == 'l');
4580 }
4581
4582 template <typename A>
4583 Atom<A>* CStringSection<A>::findAtomByAddress(pint_t addr)
4584 {
4585         Atom<A>* result = this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms);
4586         return result;
4587 }
4588
4589 template <typename A>
4590 unsigned long CStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4591 {
4592         unsigned long hash = 5381;
4593         const char* stringContent = (char*)atom->contentPointer();
4594         for (const char* s = stringContent; *s != '\0'; ++s) {
4595                 hash = hash * 33 + *s;
4596         }
4597         return hash;
4598 }
4599
4600
4601 template <typename A>
4602 bool CStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4603                                                                                                         const ld::IndirectBindingTable& ind) const
4604 {
4605         if ( rhs.section().type() != ld::Section::typeCString )
4606                 return false;
4607         assert(this->type() == rhs.section().type());
4608         assert(strcmp(this->sectionName(), rhs.section().sectionName())== 0);
4609         assert(strcmp(this->segmentName(), rhs.section().segmentName())== 0);
4610         const char* stringContent = (char*)atom->contentPointer();
4611
4612         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4613         assert(rhsAtom != NULL);
4614         if ( rhsAtom != NULL ) {
4615                 if ( atom->_size != rhsAtom->_size )
4616                         return false;
4617                 const char* rhsStringContent = (char*)rhsAtom->contentPointer();
4618                 return (strcmp(stringContent, rhsStringContent) == 0);
4619         }
4620         return false;
4621 }
4622
4623
4624 template <>
4625 ld::Fixup::Kind NonLazyPointerSection<x86>::fixupKind()
4626 {
4627         return ld::Fixup::kindStoreLittleEndian32;
4628 }
4629
4630 template <>
4631 ld::Fixup::Kind NonLazyPointerSection<arm>::fixupKind()
4632 {
4633         return ld::Fixup::kindStoreLittleEndian32;
4634 }
4635
4636
4637 template <>
4638 void NonLazyPointerSection<x86_64>::makeFixups(class Parser<x86_64>& parser, const struct Parser<x86_64>::CFI_CU_InfoArrays&)
4639 {
4640         assert(0 && "x86_64 should not have non-lazy-pointer sections in .o files");
4641 }
4642
4643 template <typename A>
4644 void NonLazyPointerSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
4645 {
4646         // add references for each NLP atom based on indirect symbol table
4647         const macho_section<P>* sect = this->machoSection();
4648         const pint_t endAddr = sect->addr() + sect->size();
4649         for( pint_t addr = sect->addr(); addr < endAddr; addr += sizeof(pint_t)) {
4650                 typename Parser<A>::SourceLocation      src;
4651                 typename Parser<A>::TargetDesc          target;
4652                 src.atom = this->findAtomByAddress(addr);
4653                 src.offsetInAtom = 0;
4654                 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
4655                 target.atom = NULL;
4656                 target.name = NULL;
4657                 target.weakImport = false;
4658                 target.addend = 0;
4659                 if ( symIndex == INDIRECT_SYMBOL_LOCAL ) {
4660                         // use direct reference for local symbols
4661                         const pint_t* nlpContent = (pint_t*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
4662                         pint_t targetAddr = P::getP(*nlpContent);
4663                         target.atom = parser.findAtomByAddress(targetAddr);
4664                         target.weakImport = false;
4665                         target.addend = (targetAddr - target.atom->objectAddress());
4666                         // <rdar://problem/8385011> if pointer to thumb function, mask of thumb bit (not an addend of +1)
4667                         if ( target.atom->isThumb() )
4668                                 target.addend &= (-2);
4669                         assert(src.atom->combine() == ld::Atom::combineNever);
4670                 }
4671                 else {
4672                         const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
4673                         // use direct reference for local symbols
4674                         if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
4675                                 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
4676                                 assert(src.atom->combine() == ld::Atom::combineNever);
4677                         }
4678                         else {
4679                                 target.name = parser.nameFromSymbol(sym);
4680                                 target.weakImport = parser.weakImportFromSymbol(sym);
4681                                 assert(src.atom->combine() == ld::Atom::combineByNameAndReferences);
4682                         }
4683                 }
4684                 parser.addFixups(src, this->fixupKind(), target);
4685         }
4686 }
4687
4688 template <typename A>
4689 ld::Atom::Combine NonLazyPointerSection<A>::combine(Parser<A>& parser, pint_t addr)
4690 {
4691         const macho_section<P>* sect = this->machoSection();
4692         uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
4693         if ( symIndex == INDIRECT_SYMBOL_LOCAL)
4694                 return ld::Atom::combineNever;
4695
4696         // don't coalesce non-lazy-pointers to local symbols
4697         const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
4698         if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) )
4699                 return ld::Atom::combineNever;
4700
4701         return ld::Atom::combineByNameAndReferences;
4702 }
4703
4704 template <typename A>
4705 const char* NonLazyPointerSection<A>::targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind)
4706 {
4707         assert(atom->combine() == ld::Atom::combineByNameAndReferences);
4708         assert(atom->fixupCount() == 1);
4709         ld::Fixup::iterator fit = atom->fixupsBegin();
4710         const char* name = NULL;
4711         switch ( fit->binding ) {
4712                 case ld::Fixup::bindingByNameUnbound:
4713                         name = fit->u.name;
4714                         break;
4715                 case ld::Fixup::bindingByContentBound:
4716                         name = fit->u.target->name();
4717                         break;
4718                 case ld::Fixup::bindingsIndirectlyBound:
4719                         name = ind.indirectName(fit->u.bindingIndex);
4720                         break;
4721                 default:
4722                         assert(0);
4723         }
4724         assert(name != NULL);
4725         return name;
4726 }
4727
4728 template <typename A>
4729 unsigned long NonLazyPointerSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4730 {
4731         assert(atom->combine() == ld::Atom::combineByNameAndReferences);
4732         unsigned long hash = 9508;
4733         for (const char* s = this->targetName(atom, ind); *s != '\0'; ++s) {
4734                 hash = hash * 33 + *s;
4735         }
4736         return hash;
4737 }
4738
4739 template <typename A>
4740 bool NonLazyPointerSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4741                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
4742 {
4743         if ( rhs.section().type() != ld::Section::typeNonLazyPointer )
4744                 return false;
4745         assert(this->type() == rhs.section().type());
4746         // there can be many non-lazy pointer in different section names
4747         // we only want to coalesce in same section name
4748         if ( *this != rhs.section() )
4749                 return false;
4750         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4751         assert(rhsAtom !=  NULL);
4752         const char* thisName = this->targetName(atom, indirectBindingTable);
4753         const char* rhsName = this->targetName(rhsAtom, indirectBindingTable);
4754         return (strcmp(thisName, rhsName) == 0);
4755 }
4756
4757 template <typename A>
4758 ld::Atom::Scope NonLazyPointerSection<A>::scopeAtAddress(Parser<A>& parser, pint_t addr)
4759 {
4760         const macho_section<P>* sect = this->machoSection();
4761         uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
4762         if ( symIndex == INDIRECT_SYMBOL_LOCAL)
4763                 return ld::Atom::scopeTranslationUnit;
4764         else
4765                 return ld::Atom::scopeLinkageUnit;
4766 }
4767
4768
4769 template <typename A>
4770 const uint8_t* CFStringSection<A>::targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
4771                                                                                                         ContentType* ct, unsigned int* count)
4772 {
4773         *ct = contentUnknown;
4774         for (ld::Fixup::iterator fit=atom->fixupsBegin(), end=atom->fixupsEnd(); fit != end; ++fit) {
4775                 const ld::Atom* targetAtom = NULL;
4776                 switch ( fit->binding ) {
4777                         case ld::Fixup::bindingByNameUnbound:
4778                                 // ignore reference to ___CFConstantStringClassReference
4779                                 // we are just looking for reference to backing string data
4780                                 assert(fit->offsetInAtom == 0);
4781                                 assert(strcmp(fit->u.name, "___CFConstantStringClassReference") == 0);
4782                                 break;
4783                         case ld::Fixup::bindingDirectlyBound:
4784                         case ld::Fixup::bindingByContentBound:
4785                                 targetAtom = fit->u.target;
4786                                 break;
4787                         case ld::Fixup::bindingsIndirectlyBound:
4788                                 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
4789                                 break;
4790                         default:
4791                                 assert(0 && "bad binding type");
4792                 }
4793                 assert(targetAtom != NULL);
4794                 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
4795                 if ( targetAtom->section().type() == ld::Section::typeCString ) {
4796                         *ct = contentUTF8;
4797                         *count = targetAtom->size();
4798                 }
4799                 else if ( targetAtom->section().type() == ld::Section::typeUTF16Strings ) {
4800                         *ct = contentUTF16;
4801                         *count = (targetAtom->size()+1)/2; // round up incase of buggy compiler that has only one trailing zero byte
4802                 }
4803                 assert(target !=  NULL);
4804                 return target->contentPointer();
4805         }
4806         assert(0);
4807         return NULL;
4808 }
4809
4810 template <typename A>
4811 unsigned long CFStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4812 {
4813         // base hash of CFString on hash of cstring it wraps
4814         ContentType cType;
4815         unsigned long hash;
4816         unsigned int charCount;
4817         const uint8_t* content = this->targetContent(atom, ind, &cType, &charCount);
4818         switch ( cType ) {
4819                 case contentUTF8:
4820                         hash = 9408;
4821                         for (const char* s = (char*)content; *s != '\0'; ++s) {
4822                                 hash = hash * 33 + *s;
4823                         }
4824                         return hash;
4825                 case contentUTF16:
4826                         hash = 407955;
4827                         --charCount; // don't add last 0x0000 to hash because some buggy compilers only have trailing single byte
4828                         for (const uint16_t* s = (uint16_t*)content; charCount > 0; ++s, --charCount) {
4829                                 hash = hash * 1025 + *s;
4830                         }
4831                         return hash;
4832                 case contentUnknown:
4833                         return 0;
4834         }
4835         return 0;
4836 }
4837
4838
4839 template <typename A>
4840 bool CFStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4841                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
4842 {
4843         if ( atom == &rhs )
4844                 return true;
4845         if ( rhs.section().type() != ld::Section::typeCFString)
4846                 return false;
4847         assert(this->type() == rhs.section().type());
4848         assert(strcmp(this->sectionName(), "__cfstring") == 0);
4849
4850         ContentType thisType;
4851         unsigned int charCount;
4852         const uint8_t* cstringContent = this->targetContent(atom, indirectBindingTable, &thisType, &charCount);
4853         ContentType rhsType;
4854         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4855         assert(rhsAtom !=  NULL);
4856         unsigned int rhsCharCount;
4857         const uint8_t* rhsStringContent = this->targetContent(rhsAtom, indirectBindingTable, &rhsType, &rhsCharCount);
4858
4859         if ( thisType != rhsType )
4860                 return false;
4861
4862         // no need to compare content of pointers are already the same
4863         if ( cstringContent == rhsStringContent )
4864                 return true;
4865
4866         // no need to compare content if size is different
4867         if ( charCount != rhsCharCount )
4868                 return false;
4869
4870         switch ( thisType ) {
4871                 case contentUTF8:
4872                         return (strcmp((char*)cstringContent, (char*)rhsStringContent) == 0);
4873                 case contentUTF16:
4874                         {
4875                                 const uint16_t* cstringContent16 = (uint16_t*)cstringContent;
4876                                 const uint16_t* rhsStringContent16 = (uint16_t*)rhsStringContent;
4877                                 for (unsigned int i = 0; i < charCount; ++i) {
4878                                         if ( cstringContent16[i] != rhsStringContent16[i] )
4879                                                 return false;
4880                                 }
4881                                 return true;
4882                         }
4883                 case contentUnknown:
4884                         return false;
4885         }
4886         return false;
4887 }
4888
4889
4890 template <typename A>
4891 typename A::P::uint_t ObjC1ClassSection<A>::elementSizeAtAddress(pint_t addr)
4892 {
4893         // nominal size for each class is 48 bytes, but sometimes the compiler
4894         // over aligns and there is padding after class data
4895         const macho_section<P>* sct = this->machoSection();
4896         uint32_t align = 1 << sct->align();
4897         uint32_t size = ((12 * sizeof(pint_t)) + align-1) & (-align);
4898         return size;
4899 }
4900
4901 template <typename A>
4902 const char* ObjC1ClassSection<A>::unlabeledAtomName(Parser<A>& parser, pint_t addr)
4903 {
4904         // 8-bytes into class object is pointer to class name
4905         const macho_section<P>* sct = this->machoSection();
4906         uint32_t classObjcFileOffset = sct->offset() - sct->addr() + addr;
4907         const uint8_t* mappedFileContent = this->file().fileContent();
4908         pint_t nameAddr = P::getP(*((pint_t*)(mappedFileContent+classObjcFileOffset+2*sizeof(pint_t))));
4909
4910         // find section containing string address to get string bytes
4911         const macho_section<P>* const sections = parser.firstMachOSection();
4912         const uint32_t sectionCount = parser.machOSectionCount();
4913         for (uint32_t i=0; i < sectionCount; ++i) {
4914                 const macho_section<P>* aSect = &sections[i];
4915                 if ( (aSect->addr() <= nameAddr) && (nameAddr < (aSect->addr()+aSect->size())) ) {
4916                         assert((aSect->flags() & SECTION_TYPE) == S_CSTRING_LITERALS);
4917                         uint32_t nameFileOffset = aSect->offset() - aSect->addr() + nameAddr;
4918                         const char* name = (char*)mappedFileContent + nameFileOffset;
4919                         // spin through symbol table to find absolute symbol corresponding to this class
4920                         for (uint32_t s=0; s < parser.symbolCount(); ++s) {
4921                                 const macho_nlist<P>& sym =     parser.symbolFromIndex(s);
4922                                 if ( (sym.n_type() & N_TYPE) != N_ABS )
4923                                         continue;
4924                                 const char* absName = parser.nameFromSymbol(sym);
4925                                 if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
4926                                         if ( strcmp(&absName[17], name) == 0 )
4927                                                 return absName;
4928                                 }
4929                         }
4930                         assert(0 && "obj class name not found in symbol table");
4931                 }
4932         }
4933         assert(0 && "obj class name not found");
4934         return "unknown objc class";
4935 }
4936
4937
4938 template <typename A>
4939 const char* ObjC2ClassRefsSection<A>::targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4940 {
4941         assert(atom->fixupCount() == 1);
4942         ld::Fixup::iterator fit = atom->fixupsBegin();
4943         const char* className = NULL;
4944         switch ( fit->binding ) {
4945                 case ld::Fixup::bindingByNameUnbound:
4946                         className = fit->u.name;
4947                         break;
4948                 case ld::Fixup::bindingDirectlyBound:
4949                 case ld::Fixup::bindingByContentBound:
4950                         className = fit->u.target->name();
4951                         break;
4952                 case ld::Fixup::bindingsIndirectlyBound:
4953                         className = ind.indirectName(fit->u.bindingIndex);
4954                         break;
4955                 default:
4956                         assert(0 && "unsupported binding in objc2 class ref section");
4957         }
4958         assert(className != NULL);
4959         return className;
4960 }
4961
4962
4963 template <typename A>
4964 unsigned long ObjC2ClassRefsSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4965 {
4966         unsigned long hash = 978;
4967         for (const char* s = targetClassName(atom, ind); *s != '\0'; ++s) {
4968                 hash = hash * 33 + *s;
4969         }
4970         return hash;
4971 }
4972
4973 template <typename A>
4974 bool ObjC2ClassRefsSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4975                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
4976 {
4977         assert(this->type() == rhs.section().type());
4978         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4979         assert(rhsAtom !=  NULL);
4980         const char* thisClassName = targetClassName(atom, indirectBindingTable);
4981         const char* rhsClassName = targetClassName(rhsAtom, indirectBindingTable);
4982         return (strcmp(thisClassName, rhsClassName) == 0);
4983 }
4984
4985
4986 template <typename A>
4987 const char* Objc1ClassReferences<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4988 {
4989         assert(atom->fixupCount() == 2);
4990         ld::Fixup::iterator fit = atom->fixupsBegin();
4991         if ( fit->kind == ld::Fixup::kindSetTargetAddress )
4992                 ++fit;
4993         const ld::Atom* targetAtom = NULL;
4994         switch ( fit->binding ) {
4995                 case ld::Fixup::bindingByContentBound:
4996                         targetAtom = fit->u.target;
4997                         break;
4998                 case ld::Fixup::bindingsIndirectlyBound:
4999                         targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5000                         if ( targetAtom == NULL ) {
5001                                 fprintf(stderr, "missing target named %s\n", ind.indirectName(fit->u.bindingIndex));
5002                         }
5003                         break;
5004                 default:
5005                         assert(0);
5006         }
5007         assert(targetAtom != NULL);
5008         const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5009         assert(target !=  NULL);
5010         return (char*)target->contentPointer();
5011 }
5012
5013
5014 template <typename A>
5015 const char* PointerToCStringSection<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5016 {
5017         assert(atom->fixupCount() == 1);
5018         ld::Fixup::iterator fit = atom->fixupsBegin();
5019         const ld::Atom* targetAtom = NULL;
5020         switch ( fit->binding ) {
5021                 case ld::Fixup::bindingByContentBound:
5022                         targetAtom = fit->u.target;
5023                         break;
5024                 case ld::Fixup::bindingsIndirectlyBound:
5025                         targetAtom = ind.indirectAtom(fit->u.bindingIndex);
5026                         break;
5027                 default:
5028                         assert(0);
5029         }
5030         assert(targetAtom != NULL);
5031         const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5032         assert(target !=  NULL);
5033         return (char*)target->contentPointer();
5034 }
5035
5036 template <typename A>
5037 unsigned long PointerToCStringSection<A>::contentHash(const class Atom<A>* atom,
5038                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
5039 {
5040         // make hash from section name and target cstring name
5041         unsigned long hash = 123;
5042         for (const char* s = this->sectionName(); *s != '\0'; ++s) {
5043                 hash = hash * 33 + *s;
5044         }
5045         for (const char* s = this->targetCString(atom, indirectBindingTable); *s != '\0'; ++s) {
5046                 hash = hash * 33 + *s;
5047         }
5048         return hash;
5049 }
5050
5051 template <typename A>
5052 bool PointerToCStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5053                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
5054 {
5055         assert(this->type() == rhs.section().type());
5056         // there can be pointers-to-cstrings in different section names
5057         // we only want to coalesce in same section name
5058         if ( *this != rhs.section() )
5059                 return false;
5060
5061         // get string content for this
5062         const char* cstringContent = this->targetCString(atom, indirectBindingTable);
5063         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5064         assert(rhsAtom !=  NULL);
5065         const char* rhsCstringContent = this->targetCString(rhsAtom, indirectBindingTable);
5066
5067         assert(cstringContent != NULL);
5068         assert(rhsCstringContent != NULL);
5069         return (strcmp(cstringContent, rhsCstringContent) == 0);
5070 }
5071
5072
5073
5074 template <typename A>
5075 unsigned long UTF16StringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5076 {
5077         unsigned long hash = 5381;
5078         const uint16_t* stringContent = (uint16_t*)atom->contentPointer();
5079         // some buggy compilers end utf16 data with single byte, so don't use last word in hash computation
5080         unsigned int count = (atom->size()/2) - 1;
5081         for (const uint16_t* s = stringContent; count > 0; ++s, --count) {
5082                 hash = hash * 33 + *s;
5083         }
5084         return hash;
5085 }
5086
5087 template <typename A>
5088 bool UTF16StringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5089                                                                                                         const ld::IndirectBindingTable& ind) const
5090 {
5091         if ( rhs.section().type() != ld::Section::typeUTF16Strings )
5092                 return false;
5093         assert(0);
5094         return false;
5095 }
5096
5097
5098
5099
5100
5101
5102
5103 template <>
5104 uint32_t Section<x86_64>::x86_64PcRelOffset(uint8_t r_type)
5105 {
5106         switch ( r_type ) {
5107                 case X86_64_RELOC_SIGNED:
5108                         return 4;
5109                 case X86_64_RELOC_SIGNED_1:
5110                         return 5;
5111                 case X86_64_RELOC_SIGNED_2:
5112                         return 6;
5113                 case X86_64_RELOC_SIGNED_4:
5114                         return 8;
5115         }
5116         return 0;
5117 }
5118
5119
5120 template <>
5121 bool Section<x86_64>::addRelocFixup(class Parser<x86_64>& parser, const macho_relocation_info<P>* reloc)
5122 {
5123         const macho_section<P>* sect = this->machoSection();
5124         uint64_t srcAddr = sect->addr() + reloc->r_address();
5125         Parser<x86_64>::SourceLocation  src;
5126         Parser<x86_64>::TargetDesc              target;
5127         Parser<x86_64>::TargetDesc              toTarget;
5128         src.atom = this->findAtomByAddress(srcAddr);
5129         src.offsetInAtom = srcAddr - src.atom->_objAddress;
5130         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
5131         uint64_t contentValue = 0;
5132         const macho_relocation_info<x86_64::P>* nextReloc = &reloc[1];
5133         bool result = false;
5134         bool useDirectBinding;
5135         switch ( reloc->r_length() ) {
5136                 case 0:
5137                         contentValue = *fixUpPtr;
5138                         break;
5139                 case 1:
5140                         contentValue = (int64_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
5141                         break;
5142                 case 2:
5143                         contentValue = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
5144                         break;
5145                 case 3:
5146                         contentValue = E::get64(*((uint64_t*)fixUpPtr));
5147                         break;
5148         }
5149         target.atom = NULL;
5150         target.name = NULL;
5151         target.weakImport = false;
5152         target.addend = 0;
5153         if ( reloc->r_extern() ) {
5154                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
5155                 // use direct reference for local symbols
5156                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5157                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5158                         target.addend += contentValue;
5159                 }
5160                 else {
5161                         target.name = parser.nameFromSymbol(sym);
5162                         target.weakImport = parser.weakImportFromSymbol(sym);
5163                         target.addend = contentValue;
5164                 }
5165                 // cfstrings should always use direct reference to backing store
5166                 if ( (this->type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
5167                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5168                         target.addend = contentValue;
5169                 }
5170         }
5171         else {
5172                 if ( reloc->r_pcrel()  )
5173                         contentValue += srcAddr + x86_64PcRelOffset(reloc->r_type());
5174                 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5175         }
5176         switch ( reloc->r_type() ) {
5177                 case X86_64_RELOC_UNSIGNED:
5178                         if ( reloc->r_pcrel() )
5179                                 throw "pcrel and X86_64_RELOC_UNSIGNED not supported";
5180                         switch ( reloc->r_length() ) {
5181                                 case 0:
5182                                 case 1:
5183                                         throw "length < 2 and X86_64_RELOC_UNSIGNED not supported";
5184                                 case 2:
5185                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5186                                         break;
5187                                 case 3:
5188                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian64, target);
5189                                         break;
5190                         }
5191                         break;
5192                 case X86_64_RELOC_SIGNED:
5193                 case X86_64_RELOC_SIGNED_1:
5194                 case X86_64_RELOC_SIGNED_2:
5195                 case X86_64_RELOC_SIGNED_4:
5196                         if ( ! reloc->r_pcrel() )
5197                                 throw "not pcrel and X86_64_RELOC_SIGNED* not supported";
5198                         if ( reloc->r_length() != 2 )
5199                                 throw "length != 2 and X86_64_RELOC_SIGNED* not supported";
5200                         switch ( reloc->r_type() ) {
5201                                 case X86_64_RELOC_SIGNED:
5202                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
5203                                         break;
5204                                 case X86_64_RELOC_SIGNED_1:
5205                                         if ( reloc->r_extern() )
5206                                                 target.addend += 1;
5207                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_1, target);
5208                                         break;
5209                                 case X86_64_RELOC_SIGNED_2:
5210                                         if ( reloc->r_extern() )
5211                                                 target.addend += 2;
5212                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_2, target);
5213                                         break;
5214                                 case X86_64_RELOC_SIGNED_4:
5215                                         if ( reloc->r_extern() )
5216                                                 target.addend += 4;
5217                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_4, target);
5218                                         break;
5219                         }
5220                         break;
5221                 case X86_64_RELOC_BRANCH:
5222                         if ( ! reloc->r_pcrel() )
5223                                 throw "not pcrel and X86_64_RELOC_BRANCH not supported";
5224                         switch ( reloc->r_length() ) {
5225                                 case 2:
5226                                         if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5227                                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
5228                                                 parser.addDtraceExtraInfos(src, &target.name[16]);
5229                                         }
5230                                         else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5231                                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
5232                                                 parser.addDtraceExtraInfos(src, &target.name[20]);
5233                                         }
5234                                         else {
5235                                                 parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel32, target);
5236                                         }
5237                                         break;
5238                                 case 0:
5239                                         parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel8, target);
5240                                         break;
5241                                 default:
5242                                         throwf("length=%d and X86_64_RELOC_BRANCH not supported", reloc->r_length());
5243                         }
5244                         break;
5245                 case X86_64_RELOC_GOT:
5246                         if ( ! reloc->r_extern() )
5247                                 throw "not extern and X86_64_RELOC_GOT not supported";
5248                         if ( ! reloc->r_pcrel() )
5249                                 throw "not pcrel and X86_64_RELOC_GOT not supported";
5250                         if ( reloc->r_length() != 2 )
5251                                 throw "length != 2 and X86_64_RELOC_GOT not supported";
5252                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOT, target);
5253                         break;
5254                 case X86_64_RELOC_GOT_LOAD:
5255                         if ( ! reloc->r_extern() )
5256                                 throw "not extern and X86_64_RELOC_GOT_LOAD not supported";
5257                         if ( ! reloc->r_pcrel() )
5258                                 throw "not pcrel and X86_64_RELOC_GOT_LOAD not supported";
5259                         if ( reloc->r_length() != 2 )
5260                                 throw "length != 2 and X86_64_RELOC_GOT_LOAD not supported";
5261                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOTLoad, target);
5262                         break;
5263                 case X86_64_RELOC_SUBTRACTOR:
5264                         if ( reloc->r_pcrel() )
5265                                 throw "X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5266                         if ( reloc->r_length() < 2 )
5267                                 throw "X86_64_RELOC_SUBTRACTOR must have r_length of 2 or 3";
5268                         if ( !reloc->r_extern() )
5269                                 throw "X86_64_RELOC_SUBTRACTOR must have r_extern=1";
5270                         if ( nextReloc->r_type() != X86_64_RELOC_UNSIGNED )
5271                                 throw "X86_64_RELOC_SUBTRACTOR must be followed by X86_64_RELOC_UNSIGNED";
5272                         result = true;
5273                         if ( nextReloc->r_pcrel() )
5274                                 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5275                         if ( nextReloc->r_length() != reloc->r_length() )
5276                                 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR must have same r_length";
5277                         if ( nextReloc->r_extern() ) {
5278                                 const macho_nlist<P>& sym = parser.symbolFromIndex(nextReloc->r_symbolnum());
5279                                 // use direct reference for local symbols
5280                                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5281                                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), toTarget);
5282                                         toTarget.addend = contentValue;
5283                                         useDirectBinding = true;
5284                                 }
5285                                 else {
5286                                         toTarget.name = parser.nameFromSymbol(sym);
5287                                         toTarget.weakImport = parser.weakImportFromSymbol(sym);
5288                                         toTarget.addend = contentValue;
5289                                         useDirectBinding = false;
5290                                 }
5291                         }
5292                         else {
5293                                 parser.findTargetFromAddressAndSectionNum(contentValue, nextReloc->r_symbolnum(), toTarget);
5294                                 useDirectBinding = (toTarget.atom->scope() == ld::Atom::scopeTranslationUnit);
5295                         }
5296                         if ( useDirectBinding )
5297                                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.atom);
5298                         else
5299                                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.weakImport, toTarget.name);
5300                         parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, toTarget.addend);
5301                         if ( target.atom == NULL )
5302                                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, false, target.name);
5303                         else
5304                                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, target.atom);
5305                         if ( reloc->r_length() == 2 )
5306                                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
5307                         else
5308                                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian64);
5309                         break;
5310                 case X86_64_RELOC_TLV:
5311                         if ( ! reloc->r_extern() )
5312                                 throw "not extern and X86_64_RELOC_TLV not supported";
5313                         if ( ! reloc->r_pcrel() )
5314                                 throw "not pcrel and X86_64_RELOC_TLV not supported";
5315                         if ( reloc->r_length() != 2 )
5316                                 throw "length != 2 and X86_64_RELOC_TLV not supported";
5317                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
5318                         break;
5319                 default:
5320                         throwf("unknown relocation type %d", reloc->r_type());
5321         }
5322         return result;
5323 }
5324
5325
5326
5327 template <>
5328 bool Section<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<P>* reloc)
5329 {
5330         const macho_section<P>* sect = this->machoSection();
5331         uint32_t srcAddr;
5332         const uint8_t* fixUpPtr;
5333         uint32_t contentValue = 0;
5334         ld::Fixup::Kind kind = ld::Fixup::kindNone;
5335         Parser<x86>::SourceLocation     src;
5336         Parser<x86>::TargetDesc         target;
5337
5338         if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
5339                 srcAddr = sect->addr() + reloc->r_address();
5340                 src.atom = this->findAtomByAddress(srcAddr);
5341                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5342                 fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
5343                 switch ( reloc->r_type() ) {
5344                 case GENERIC_RELOC_VANILLA:
5345                         switch ( reloc->r_length() ) {
5346                                 case 0:
5347                                         contentValue = (int32_t)(int8_t)*fixUpPtr;
5348                                         if ( reloc->r_pcrel() ) {
5349                                                 kind = ld::Fixup::kindStoreX86BranchPCRel8;
5350                                                 contentValue += srcAddr + sizeof(uint8_t);
5351                                         }
5352                                         else
5353                                                 throw "r_length=0 and r_pcrel=0 not supported";
5354                                         break;
5355                                 case 1:
5356                                         contentValue = (int32_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
5357                                         if ( reloc->r_pcrel() ) {
5358                                                 kind = ld::Fixup::kindStoreX86PCRel16;
5359                                                 contentValue += srcAddr + sizeof(uint16_t);
5360                                         }
5361                                         else
5362                                                 kind = ld::Fixup::kindStoreLittleEndian16;
5363                                         break;
5364                                 case 2:
5365                                         contentValue = E::get32(*((uint32_t*)fixUpPtr));
5366                                         if ( reloc->r_pcrel() ) {
5367                                                 kind = ld::Fixup::kindStoreX86BranchPCRel32;
5368                                                 contentValue += srcAddr + sizeof(uint32_t);
5369                                         }
5370                                         else
5371                                                 kind = ld::Fixup::kindStoreLittleEndian32;
5372                                         break;
5373                                 case 3:
5374                                         throw "r_length=3 not supported";
5375                         }
5376                         if ( reloc->r_extern() ) {
5377                                 target.atom = NULL;
5378                                 const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
5379                                 target.name = parser.nameFromSymbol(targetSymbol);
5380                                 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
5381                                 target.addend = (int32_t)contentValue;
5382                         }
5383                         else {
5384                                 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5385                         }
5386                         if ( (kind == ld::Fixup::kindStoreX86BranchPCRel32) && (target.name != NULL) ) {
5387                                 if ( strncmp(target.name, "___dtrace_probe$", 16) == 0 ) {
5388                                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
5389                                         parser.addDtraceExtraInfos(src, &target.name[16]);
5390                                         return false;
5391                                 }
5392                                 else if ( strncmp(target.name, "___dtrace_isenabled$", 20) == 0 ) {
5393                                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
5394                                         parser.addDtraceExtraInfos(src, &target.name[20]);
5395                                         return false;
5396                                 }
5397                         }
5398                         parser.addFixups(src, kind, target);
5399                         return false;
5400                         break;
5401                 case GENERIC_RLEOC_TLV:
5402                         {
5403                                 if ( !reloc->r_extern() )
5404                                         throw "r_extern=0 and r_type=GENERIC_RLEOC_TLV not supported";
5405                                 if ( reloc->r_length() != 2 )
5406                                         throw "r_length!=2 and r_type=GENERIC_RLEOC_TLV not supported";
5407                                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
5408                                 // use direct reference for local symbols
5409                                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
5410                                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5411                                 }
5412                                 else {
5413                                         target.atom = NULL;
5414                                         target.name = parser.nameFromSymbol(sym);
5415                                         target.weakImport = parser.weakImportFromSymbol(sym);
5416                                 }
5417                                 target.addend = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
5418                                 if ( reloc->r_pcrel() ) {
5419                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
5420                                 }
5421                                 else {
5422                                         parser.addFixups(src, ld::Fixup::kindStoreX86Abs32TLVLoad, target);
5423                                 }
5424                                 return false;
5425                         }
5426                         break;
5427                 default:
5428                         throwf("unsupported i386 relocation type (%d)", reloc->r_type());
5429                 }
5430         }
5431         else {
5432                 // scattered relocation
5433                 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
5434                 srcAddr = sect->addr() + sreloc->r_address();
5435                 src.atom = this->findAtomByAddress(srcAddr);
5436                 assert(src.atom != NULL);
5437                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5438                 fixUpPtr = file().fileContent() + sect->offset() + sreloc->r_address();
5439                 uint32_t relocValue = sreloc->r_value();
5440                 bool result = false;
5441                 // file format allows pair to be scattered or not
5442                 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
5443                 const macho_relocation_info<P>* nextReloc = &reloc[1];
5444                 bool nextRelocIsPair = false;
5445                 uint32_t nextRelocAddress = 0;
5446                 uint32_t nextRelocValue = 0;
5447                 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
5448                         if ( nextReloc->r_type() == GENERIC_RELOC_PAIR ) {
5449                                 nextRelocIsPair = true;
5450                                 nextRelocAddress = nextReloc->r_address();
5451                                 result = true;  // iterator should skip next reloc, since we've consumed it here
5452                         }
5453                 }
5454                 else {
5455                         if ( nextSReloc->r_type() == GENERIC_RELOC_PAIR ) {
5456                                 nextRelocIsPair = true;
5457                                 nextRelocAddress = nextSReloc->r_address();
5458                                 nextRelocValue = nextSReloc->r_value();
5459                         }
5460                 }
5461                 switch (sreloc->r_type()) {
5462                         case GENERIC_RELOC_VANILLA:
5463                                 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
5464                                 target.atom = parser.findAtomByAddress(relocValue);
5465                                 if ( sreloc->r_pcrel() ) {
5466                                         switch ( sreloc->r_length() ) {
5467                                                 case 0:
5468                                                         contentValue = srcAddr + 1 + *fixUpPtr;
5469                                                         target.addend = (int32_t)contentValue - (int32_t)relocValue;
5470                                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel8, target);
5471                                                         break;
5472                                                 case 1:
5473                                                         contentValue = srcAddr + 2 + LittleEndian::get16(*((uint16_t*)fixUpPtr));
5474                                                         target.addend = (int32_t)contentValue - (int32_t)relocValue;
5475                                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel16, target);
5476                                                         break;
5477                                                 case 2:
5478                                                         contentValue = srcAddr + 4 + LittleEndian::get32(*((uint32_t*)fixUpPtr));
5479                                                         target.addend = (int32_t)contentValue - (int32_t)relocValue;
5480                                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
5481                                                         break;
5482                                                 case 3:
5483                                                         throw "unsupported r_length=3 for scattered pc-rel vanilla reloc";
5484                                                         break;
5485                                         }
5486                                 }
5487                                 else {
5488                                         if ( sreloc->r_length() != 2 )
5489                                                 throwf("unsupported r_length=%d for scattered vanilla reloc", sreloc->r_length());
5490                                         contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
5491                                         target.addend = (int32_t)contentValue - (int32_t)(target.atom->objectAddress());
5492                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5493                                 }
5494                                 break;
5495                         case GENERIC_RELOC_SECTDIFF:
5496                         case GENERIC_RELOC_LOCAL_SECTDIFF:
5497                                 {
5498                                         if ( !nextRelocIsPair )
5499                                                 throw "GENERIC_RELOC_SECTDIFF missing following pair";
5500                                         switch ( sreloc->r_length() ) {
5501                                                 case 0:
5502                                                 case 3:
5503                                                         throw "bad length for GENERIC_RELOC_SECTDIFF";
5504                                                 case 1:
5505                                                         contentValue = (int32_t)(int16_t)LittleEndian::get16(*((uint16_t*)fixUpPtr));
5506                                                         kind = ld::Fixup::kindStoreLittleEndian16;
5507                                                         break;
5508                                                 case 2:
5509                                                         contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
5510                                                         kind = ld::Fixup::kindStoreLittleEndian32;
5511                                                         break;
5512                                         }
5513                                         Atom<x86>* fromAtom  = parser.findAtomByAddress(nextRelocValue);
5514                                         uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5515                                         parser.findTargetFromAddress(sreloc->r_value(), target);
5516                                         // check for addend encoded in the section content
5517                                         int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
5518                                         if ( addend < 0 ) {
5519                                                 // switch binding base on coalescing
5520                                                 if ( target.atom == NULL ) {
5521                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
5522                                                 }
5523                                                 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
5524                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
5525                                                 }
5526                                                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
5527                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
5528                                                 }
5529                                                 else {
5530                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
5531                                                 }
5532                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
5533                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5534                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
5535                                                 parser.addFixup(src, ld::Fixup::k5of5, kind);
5536                                         }
5537                                         else {
5538                                                 // switch binding base on coalescing
5539                                                 if ( target.atom == NULL ) {
5540                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
5541                                                 }
5542                                                 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
5543                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
5544                                                 }
5545                                                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
5546                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
5547                                                 }
5548                                                 else {
5549                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
5550                                                 }
5551                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend+addend);
5552                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5553                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
5554                                                 parser.addFixup(src, ld::Fixup::k5of5, kind);
5555                                         }
5556                                 }
5557                                 break;
5558                 }
5559                 return result;
5560         }
5561 }
5562
5563
5564
5565
5566
5567 #if SUPPORT_ARCH_arm_any
5568 template <>
5569 bool Section<arm>::addRelocFixup(class Parser<arm>& parser, const macho_relocation_info<P>* reloc)
5570 {
5571         const macho_section<P>* sect = this->machoSection();
5572         bool result = false;
5573         uint32_t srcAddr;
5574         uint32_t dstAddr;
5575         uint32_t* fixUpPtr;
5576         int32_t displacement = 0;
5577         uint32_t instruction = 0;
5578         pint_t contentValue = 0;
5579         Parser<arm>::SourceLocation     src;
5580         Parser<arm>::TargetDesc         target;
5581         const macho_relocation_info<P>* nextReloc;
5582
5583         if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
5584                 bool externSymbolIsThumbDef = false;
5585                 srcAddr = sect->addr() + reloc->r_address();
5586                 src.atom = this->findAtomByAddress(srcAddr);
5587                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5588                 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + reloc->r_address());
5589                 if ( reloc->r_type() != ARM_RELOC_PAIR )
5590                         instruction = LittleEndian::get32(*fixUpPtr);
5591                 if ( reloc->r_extern() ) {
5592                         const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
5593                         // use direct reference for local symbols
5594                         if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) && (((targetSymbol.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(targetSymbol)[0] == 'L')) ) {
5595                                 parser.findTargetFromAddressAndSectionNum(targetSymbol.n_value(), targetSymbol.n_sect(), target);
5596                         }
5597                         else {
5598                                 target.atom = NULL;
5599                                 target.name = parser.nameFromSymbol(targetSymbol);
5600                                 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
5601                                 if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) &&  (targetSymbol.n_desc() & N_ARM_THUMB_DEF) )
5602                                         externSymbolIsThumbDef = true;
5603                         }
5604                 }
5605                 switch ( reloc->r_type() ) {
5606                         case ARM_RELOC_BR24:
5607                                 // Sign-extend displacement
5608                                 displacement = (instruction & 0x00FFFFFF) << 2;
5609                                 if ( (displacement & 0x02000000) != 0 )
5610                                         displacement |= 0xFC000000;
5611                                 // The pc added will be +8 from the pc
5612                                 displacement += 8;
5613                                 // If this is BLX add H << 1
5614                                 if ((instruction & 0xFE000000) == 0xFA000000)
5615                                         displacement += ((instruction & 0x01000000) >> 23);
5616                                 if ( reloc->r_extern() ) {
5617                                         target.addend = srcAddr + displacement;
5618                                         if ( externSymbolIsThumbDef )
5619                                                 target.addend &= -2; // remove thumb bit
5620                                 }
5621                                 else {
5622                                         dstAddr = srcAddr + displacement;
5623                                         parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
5624                                 }
5625                                 // special case "calls" for dtrace
5626                                 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5627                                         parser.addFixup(src, ld::Fixup::k1of1,
5628                                                                                                                         ld::Fixup::kindStoreARMDtraceCallSiteNop, false, target.name);
5629                                         parser.addDtraceExtraInfos(src, &target.name[16]);
5630                                 }
5631                                 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5632                                         parser.addFixup(src, ld::Fixup::k1of1,
5633                                                                                                                         ld::Fixup::kindStoreARMDtraceIsEnableSiteClear, false, target.name);
5634                                         parser.addDtraceExtraInfos(src, &target.name[20]);
5635                                 }
5636                                 else {
5637                                         parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
5638                                 }
5639                                 break;
5640                         case ARM_THUMB_RELOC_BR22:
5641                                 // thumb2 added two more bits to displacement, complicating the displacement decoding
5642                                 {
5643                                         uint32_t s = (instruction >> 10) & 0x1;
5644                                         uint32_t j1 = (instruction >> 29) & 0x1;
5645                                         uint32_t j2 = (instruction >> 27) & 0x1;
5646                                         uint32_t imm10 = instruction & 0x3FF;
5647                                         uint32_t imm11 = (instruction >> 16) & 0x7FF;
5648                                         uint32_t i1 = (j1 == s);
5649                                         uint32_t i2 = (j2 == s);
5650                                         uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
5651                                         int32_t sdis = dis;
5652                                         if ( s )
5653                                                 sdis |= 0xFE000000;
5654                                         displacement = sdis;
5655                                 }
5656                                 // The pc added will be +4 from the pc
5657                                 displacement += 4;
5658                                 // If the instruction was blx, force the low 2 bits to be clear
5659                                 dstAddr = srcAddr + displacement;
5660                                 if ((instruction & 0xF8000000) == 0xE8000000)
5661                                         dstAddr &= 0xFFFFFFFC;
5662
5663                                 if ( reloc->r_extern() ) {
5664                                         target.addend = dstAddr;
5665                                 }
5666                                 else {
5667                                         parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
5668                                 }
5669                                 // special case "calls" for dtrace
5670                                 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5671                                         parser.addFixup(src, ld::Fixup::k1of1,
5672                                                                                                                         ld::Fixup::kindStoreThumbDtraceCallSiteNop, false, target.name);
5673                                         parser.addDtraceExtraInfos(src, &target.name[16]);
5674                                 }
5675                                 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5676                                         parser.addFixup(src, ld::Fixup::k1of1,
5677                                                                                                                         ld::Fixup::kindStoreThumbDtraceIsEnableSiteClear, false, target.name);
5678                                         parser.addDtraceExtraInfos(src, &target.name[20]);
5679                                 }
5680                                 else {
5681                                         parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
5682                                 }
5683                                 break;
5684                         case ARM_RELOC_VANILLA:
5685                                 if ( reloc->r_length() != 2 )
5686                                         throw "bad length for ARM_RELOC_VANILLA";
5687                                 contentValue = LittleEndian::get32(*fixUpPtr);
5688                                 if ( reloc->r_extern() ) {
5689                                         target.addend = (int32_t)contentValue;
5690                                         if ( externSymbolIsThumbDef )
5691                                                 target.addend &= -2; // remove thumb bit
5692                                 }
5693                                 else {
5694                                         parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5695                                         // possible non-extern relocation turned into by-name ref because target is a weak-def
5696                                         if ( target.atom != NULL ) {
5697                                                 if ( target.atom->isThumb() )
5698                                                         target.addend &= -2; // remove thumb bit
5699                                                 // if reference to LSDA, add group subordinate fixup
5700                                                 if ( target.atom->contentType() == ld::Atom::typeLSDA ) {
5701                                                         Parser<arm>::SourceLocation     src2;
5702                                                         src2.atom = src.atom;
5703                                                         src2.offsetInAtom = 0;
5704                                                         parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, target.atom);
5705                                                 }
5706                                         }
5707                                 }
5708                                 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5709                                 break;
5710                         case ARM_THUMB_32BIT_BRANCH:
5711                                 // silently ignore old unnecessary reloc
5712                                 break;
5713                         case ARM_RELOC_HALF:
5714                                 nextReloc = &reloc[1];
5715                                 if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
5716                                         uint32_t instruction16;
5717                                         uint32_t other16 = (nextReloc->r_address() & 0xFFFF);
5718                                         bool isThumb;
5719                                         if ( reloc->r_length() & 2 ) {
5720                                                 isThumb = true;
5721                                                 uint32_t i =    ((instruction & 0x00000400) >> 10);
5722                                                 uint32_t imm4 =  (instruction & 0x0000000F);
5723                                                 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
5724                                                 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
5725                                                 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
5726                                         }
5727                                         else {
5728                                                 isThumb = false;
5729                                                 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
5730                                                 uint32_t imm12 = (instruction & 0x00000FFF);
5731                                                 instruction16 = (imm4 << 12) | imm12;
5732                                         }
5733                                         if ( reloc->r_length() & 1 ) {
5734                                                 // high 16
5735                                                 dstAddr = ((instruction16 << 16) | other16);
5736                         if ( reloc->r_extern() ) {
5737                             target.addend = dstAddr;
5738                                                         if ( externSymbolIsThumbDef )
5739                                                                 target.addend &= -2; // remove thumb bit
5740                                                 }
5741                         else {
5742                             parser.findTargetFromAddress(dstAddr, target);
5743                             if ( target.atom->isThumb() )
5744                                 target.addend &= (-2); // remove thumb bit
5745                         }
5746                                                 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16), target);
5747                                         }
5748                                         else {
5749                                                 // low 16
5750                                                 dstAddr = (other16 << 16) | instruction16;
5751                         if ( reloc->r_extern() ) {
5752                             target.addend = dstAddr;
5753                                                         if ( externSymbolIsThumbDef )
5754                                                                 target.addend &= -2; // remove thumb bit
5755                         }
5756                         else {
5757                             parser.findTargetFromAddress(dstAddr, target);
5758                             if ( target.atom->isThumb() )
5759                                 target.addend &= (-2); // remove thumb bit
5760                         }
5761                                                 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16), target);
5762                                         }
5763                                         result = true;
5764                                 }
5765                                 else
5766                                         throw "for ARM_RELOC_HALF, next reloc is not ARM_RELOC_PAIR";
5767                                 break;
5768                         default:
5769                                 throwf("unknown relocation type %d", reloc->r_type());
5770                                 break;
5771                 }
5772         }
5773         else {
5774                 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
5775                 // file format allows pair to be scattered or not
5776                 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
5777                 nextReloc = &reloc[1];
5778                 srcAddr = sect->addr() + sreloc->r_address();
5779                 dstAddr = sreloc->r_value();
5780                 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + sreloc->r_address());
5781                 instruction = LittleEndian::get32(*fixUpPtr);
5782                 src.atom = this->findAtomByAddress(srcAddr);
5783                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5784                 bool nextRelocIsPair = false;
5785                 uint32_t nextRelocAddress = 0;
5786                 uint32_t nextRelocValue = 0;
5787                 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
5788                         if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
5789                                 nextRelocIsPair = true;
5790                                 nextRelocAddress = nextReloc->r_address();
5791                                 result = true;
5792                         }
5793                 }
5794                 else {
5795                         if ( nextSReloc->r_type() == ARM_RELOC_PAIR ) {
5796                                 nextRelocIsPair = true;
5797                                 nextRelocAddress = nextSReloc->r_address();
5798                                 nextRelocValue = nextSReloc->r_value();
5799                                 result = true;
5800                         }
5801                 }
5802                 switch ( sreloc->r_type() ) {
5803                         case ARM_RELOC_VANILLA:
5804                                 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
5805                                 if ( sreloc->r_length() != 2 )
5806                                         throw "bad length for ARM_RELOC_VANILLA";
5807                                 target.atom = parser.findAtomByAddress(sreloc->r_value());
5808                                 contentValue = LittleEndian::get32(*fixUpPtr);
5809                                 target.addend = contentValue - target.atom->_objAddress;
5810                                 if ( target.atom->isThumb() )
5811                                         target.addend &= -2; // remove thumb bit
5812                                 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5813                                 break;
5814                         case ARM_RELOC_BR24:
5815                                 // Sign-extend displacement
5816                                 displacement = (instruction & 0x00FFFFFF) << 2;
5817                                 if ( (displacement & 0x02000000) != 0 )
5818                                         displacement |= 0xFC000000;
5819                                 // The pc added will be +8 from the pc
5820                                 displacement += 8;
5821                                 // If this is BLX add H << 1
5822                                 if ((instruction & 0xFE000000) == 0xFA000000)
5823                                         displacement += ((instruction & 0x01000000) >> 23);
5824                                 target.atom = parser.findAtomByAddress(sreloc->r_value());
5825                                 target.addend = (int64_t)(srcAddr + displacement) - (int64_t)(target.atom->_objAddress);
5826                                 parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
5827                                 break;
5828                         case ARM_THUMB_RELOC_BR22:
5829                                 // thumb2 added two more bits to displacement, complicating the displacement decoding
5830                                 {
5831                                         uint32_t s = (instruction >> 10) & 0x1;
5832                                         uint32_t j1 = (instruction >> 29) & 0x1;
5833                                         uint32_t j2 = (instruction >> 27) & 0x1;
5834                                         uint32_t imm10 = instruction & 0x3FF;
5835                                         uint32_t imm11 = (instruction >> 16) & 0x7FF;
5836                                         uint32_t i1 = (j1 == s);
5837                                         uint32_t i2 = (j2 == s);
5838                                         uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
5839                                         int32_t sdis = dis;
5840                                         if ( s )
5841                                                 sdis |= 0xFE000000;
5842                                         displacement = sdis;
5843                                 }
5844                                 // The pc added will be +4 from the pc
5845                                 displacement += 4;
5846                                 dstAddr = srcAddr+displacement;
5847                                 // If the instruction was blx, force the low 2 bits to be clear
5848                                 if ((instruction & 0xF8000000) == 0xE8000000)
5849                                         dstAddr &= 0xFFFFFFFC;
5850                                 target.atom = parser.findAtomByAddress(sreloc->r_value());
5851                                 target.addend = dstAddr - target.atom->_objAddress;
5852                                 parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
5853                                 break;
5854                         case ARM_RELOC_SECTDIFF:
5855                         case ARM_RELOC_LOCAL_SECTDIFF:
5856                                 {
5857                                         if ( ! nextRelocIsPair )
5858                                                 throw "ARM_RELOC_SECTDIFF missing following pair";
5859                                         if ( sreloc->r_length() != 2 )
5860                                                 throw "bad length for ARM_RELOC_SECTDIFF";
5861                                         contentValue = LittleEndian::get32(*fixUpPtr);
5862                                         Atom<arm>* fromAtom  = parser.findAtomByAddress(nextRelocValue);
5863                                         uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5864                                         uint32_t offsetInTarget;
5865                                         Atom<arm>* targetAtom = parser.findAtomByAddressOrLocalTargetOfStub(sreloc->r_value(), &offsetInTarget);
5866                                         // check for addend encoded in the section content
5867                     int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
5868                                         if ( targetAtom->isThumb() )
5869                                                 addend &= -2; // remove thumb bit
5870                                         // if reference to LSDA, add group subordinate fixup
5871                                         if ( targetAtom->contentType() == ld::Atom::typeLSDA ) {
5872                                                 Parser<arm>::SourceLocation     src2;
5873                                                 src2.atom = src.atom;
5874                                                 src2.offsetInAtom = 0;
5875                                                 parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, targetAtom);
5876                                         }
5877                                         if ( addend < 0 ) {
5878                                                 // switch binding base on coalescing
5879                                                 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5880                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
5881                                                 }
5882                                                 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5883                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5884                                                 }
5885                                                 else {
5886                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5887                                                 }
5888                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, offsetInTarget);
5889                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5890                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
5891                                                 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
5892                                         }
5893                                         else {
5894                                                 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5895                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
5896                                                 }
5897                                                 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5898                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5899                                                 }
5900                                                 else {
5901                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5902                                                 }
5903                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)(offsetInTarget+addend));
5904                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5905                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
5906                                                 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
5907                                         }
5908                                 }
5909                                 break;
5910                         case ARM_RELOC_HALF_SECTDIFF:
5911                                 if ( nextRelocIsPair ) {
5912                                         instruction = LittleEndian::get32(*fixUpPtr);
5913                                         Atom<arm>* fromAtom  = parser.findAtomByAddress(nextRelocValue);
5914                                         uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5915                                         Atom<arm>* targetAtom  = parser.findAtomByAddress(sreloc->r_value());
5916                                         uint32_t offsetInTarget = sreloc->r_value() - targetAtom->_objAddress;
5917                                         uint32_t instruction16;
5918                                         uint32_t other16 = (nextRelocAddress & 0xFFFF);
5919                                         bool isThumb;
5920                                         if ( sreloc->r_length() & 2 ) {
5921                                                 isThumb = true;
5922                                                 uint32_t i =    ((instruction & 0x00000400) >> 10);
5923                                                 uint32_t imm4 =  (instruction & 0x0000000F);
5924                                                 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
5925                                                 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
5926                                                 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
5927                                         }
5928                                         else {
5929                                                 isThumb = false;
5930                                                 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
5931                                                 uint32_t imm12 = (instruction & 0x00000FFF);
5932                                                 instruction16 = (imm4 << 12) | imm12;
5933                                         }
5934                                         if ( sreloc->r_length() & 1 )
5935                                                 dstAddr = ((instruction16 << 16) | other16);
5936                                         else
5937                                                 dstAddr = (other16 << 16) | instruction16;
5938                                         if ( targetAtom->isThumb() )
5939                                                 dstAddr &= (-2); // remove thumb bit
5940                     int32_t addend = dstAddr - (sreloc->r_value() - nextRelocValue);
5941                                         if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5942                                                 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
5943                                         }
5944                                         else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5945                                                 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5946                                         }
5947                                         else {
5948                                                 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5949                                         }
5950                                         parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)offsetInTarget+addend);
5951                                         parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5952                                         parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
5953                                         if ( sreloc->r_length() & 1 ) {
5954                                                 // high 16
5955                                                 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
5956                                         }
5957                                         else {
5958                                                 // low 16
5959                                                 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
5960                                         }
5961                                         result = true;
5962                                 }
5963                                 else
5964                                         throw "ARM_RELOC_HALF_SECTDIFF reloc missing following pair";
5965                                 break;
5966                         case ARM_RELOC_HALF:
5967                                 if ( nextRelocIsPair ) {
5968                                         instruction = LittleEndian::get32(*fixUpPtr);
5969                                         Atom<arm>* targetAtom  = parser.findAtomByAddress(sreloc->r_value());
5970                                         uint32_t instruction16;
5971                                         uint32_t other16 = (nextRelocAddress & 0xFFFF);
5972                                         bool isThumb;
5973                                         if ( sreloc->r_length() & 2 ) {
5974                                                 isThumb = true;
5975                                                 uint32_t i =    ((instruction & 0x00000400) >> 10);
5976                                                 uint32_t imm4 =  (instruction & 0x0000000F);
5977                                                 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
5978                                                 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
5979                                                 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
5980                                         }
5981                                         else {
5982                                                 isThumb = false;
5983                                                 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
5984                                                 uint32_t imm12 = (instruction & 0x00000FFF);
5985                                                 instruction16 = (imm4 << 12) | imm12;
5986                                         }
5987                                         if ( sreloc->r_length() & 1 )
5988                                                 dstAddr = ((instruction16 << 16) | other16);
5989                                         else
5990                                                 dstAddr = (other16 << 16) | instruction16;
5991                                         if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5992                                                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, targetAtom);
5993                                         }
5994                                         else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5995                                                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5996                                         }
5997                                         else {
5998                                                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5999                                         }
6000                                         parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, dstAddr - targetAtom->_objAddress);
6001                                         if ( sreloc->r_length() & 1 ) {
6002                                                 // high 16
6003                                                 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
6004                                         }
6005                                         else {
6006                                                 // low 16
6007                                                 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
6008                                         }
6009                                         result = true;
6010                                 }
6011                                 else
6012                                         throw "scattered ARM_RELOC_HALF reloc missing following pair";
6013                                 break;
6014                         default:
6015                                 throwf("unknown ARM scattered relocation type %d", sreloc->r_type());
6016                 }
6017         }
6018         return result;
6019 }
6020 #endif
6021
6022
6023
6024
6025
6026 template <typename A>
6027 bool ObjC1ClassSection<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
6028 {
6029         // inherited
6030         FixedSizeSection<A>::addRelocFixup(parser, reloc);
6031
6032         assert(0 && "needs template specialization");
6033         return false;
6034 }
6035
6036 template <>
6037 bool ObjC1ClassSection<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
6038 {
6039         // if this is the reloc for the super class name string, add implicit reference to super class
6040         if ( ((reloc->r_address() & R_SCATTERED) == 0) && (reloc->r_type() == GENERIC_RELOC_VANILLA) ) {
6041                 assert( reloc->r_length() == 2 );
6042                 assert( ! reloc->r_pcrel() );
6043
6044                 const macho_section<P>* sect = this->machoSection();
6045                 Parser<x86>::SourceLocation     src;
6046                 uint32_t srcAddr = sect->addr() + reloc->r_address();
6047                 src.atom = this->findAtomByAddress(srcAddr);
6048                 src.offsetInAtom = srcAddr - src.atom->objectAddress();
6049                 if ( src.offsetInAtom == 4 ) {
6050                         Parser<x86>::TargetDesc         stringTarget;
6051                         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6052                         uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6053                         parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
6054
6055                         assert(stringTarget.atom != NULL);
6056                         assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
6057                         const char* superClassBaseName = (char*)stringTarget.atom->rawContentPointer();
6058                         char* superClassName = new char[strlen(superClassBaseName) + 20];
6059                         strcpy(superClassName, ".objc_class_name_");
6060                         strcat(superClassName, superClassBaseName);
6061
6062                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, superClassName);
6063                 }
6064         }
6065         // inherited
6066         return FixedSizeSection<x86>::addRelocFixup(parser, reloc);
6067 }
6068
6069
6070
6071 template <typename A>
6072 bool Objc1ClassReferences<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
6073 {
6074         // inherited
6075         PointerToCStringSection<A>::addRelocFixup(parser, reloc);
6076
6077         assert(0 && "needs template specialization");
6078         return false;
6079 }
6080
6081
6082
6083 template <>
6084 bool Objc1ClassReferences<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
6085 {
6086         // add implict class refs, fixups not usable yet, so look at relocations
6087         assert( (reloc->r_address() & R_SCATTERED) == 0 );
6088         assert( reloc->r_type() == GENERIC_RELOC_VANILLA );
6089         assert( reloc->r_length() == 2 );
6090         assert( ! reloc->r_pcrel() );
6091
6092         const macho_section<P>* sect = this->machoSection();
6093         Parser<x86>::SourceLocation     src;
6094         uint32_t srcAddr = sect->addr() + reloc->r_address();
6095         src.atom = this->findAtomByAddress(srcAddr);
6096         src.offsetInAtom = srcAddr - src.atom->objectAddress();
6097         Parser<x86>::TargetDesc         stringTarget;
6098         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6099         uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6100         parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
6101
6102         assert(stringTarget.atom != NULL);
6103         assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
6104         const char* baseClassName = (char*)stringTarget.atom->rawContentPointer();
6105         char* objcClassName = new char[strlen(baseClassName) + 20];
6106         strcpy(objcClassName, ".objc_class_name_");
6107         strcat(objcClassName, baseClassName);
6108
6109         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, objcClassName);
6110
6111         // inherited
6112         return PointerToCStringSection<x86>::addRelocFixup(parser, reloc);
6113 }
6114
6115
6116 template <typename A>
6117 void Section<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
6118 {
6119         const macho_section<P>* sect = this->machoSection();
6120         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + sect->reloff());
6121         const uint32_t relocCount = sect->nreloc();
6122         for (uint32_t r = 0; r < relocCount; ++r) {
6123                 try {
6124                         if ( this->addRelocFixup(parser, &relocs[r]) )
6125                                 ++r; // skip next
6126                 }
6127                 catch (const char* msg) {
6128                         throwf("in section %s,%s reloc %u: %s", sect->segname(), Section<A>::makeSectionName(sect), r, msg);
6129                 }
6130         }
6131
6132         // add follow-on fixups if .o file is missing .subsections_via_symbols
6133         if ( this->addFollowOnFixups() ) {
6134                 Atom<A>* end = &_endAtoms[-1];
6135                 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
6136                         typename Parser<A>::SourceLocation src(p, 0);
6137                         Atom<A>* nextAtom = &p[1];
6138                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
6139                 }
6140         }
6141         else if ( this->type() == ld::Section::typeCode ) {
6142                 // if FDE broke text not at a symbol, use followOn to keep code together
6143                 Atom<A>* end = &_endAtoms[-1];
6144                 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
6145                         typename Parser<A>::SourceLocation src(p, 0);
6146                         Atom<A>* nextAtom = &p[1];
6147                         if ( (p->symbolTableInclusion() == ld::Atom::symbolTableIn) && (nextAtom->symbolTableInclusion() == ld::Atom::symbolTableNotIn) ) {
6148                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
6149                         }
6150                 }
6151         }
6152
6153         // <rdar://problem/9218847> track data-in-code
6154         if ( parser.hasDataInCodeLabels() && (this->type() == ld::Section::typeCode) ) {
6155                 for (uint32_t i=0; i < parser.symbolCount(); ++i) {
6156                         const macho_nlist<P>& sym =     parser.symbolFromIndex(i);
6157                         // ignore stabs
6158                         if ( (sym.n_type() & N_STAB) != 0 )
6159                                 continue;
6160                         // ignore non-definitions
6161                         if ( (sym.n_type() & N_TYPE) != N_SECT )
6162                                 continue;
6163
6164                         // 'L' labels do not denote atom breaks
6165                         const char* symbolName = parser.nameFromSymbol(sym);
6166                         if ( symbolName[0] == 'L' ) {
6167                                 if ( strncmp(symbolName, "L$start$", 8) == 0 ) {
6168                                         ld::Fixup::Kind kind = ld::Fixup::kindNone;
6169                                         if ( strncmp(&symbolName[8], "data$", 5) == 0 )
6170                                                 kind = ld::Fixup::kindDataInCodeStartData;
6171                                         else if ( strncmp(&symbolName[8], "code$", 5) == 0 )
6172                                                 kind = ld::Fixup::kindDataInCodeEnd;
6173                                         else if ( strncmp(&symbolName[8], "jt8$", 4) == 0 )
6174                                                 kind = ld::Fixup::kindDataInCodeStartJT8;
6175                                         else if ( strncmp(&symbolName[8], "jt16$", 4) == 0 )
6176                                                 kind = ld::Fixup::kindDataInCodeStartJT16;
6177                                         else if ( strncmp(&symbolName[8], "jt32$", 4) == 0 )
6178                                                 kind = ld::Fixup::kindDataInCodeStartJT32;
6179                                         else if ( strncmp(&symbolName[8], "jta32$", 4) == 0 )
6180                                                 kind = ld::Fixup::kindDataInCodeStartJTA32;
6181                                         else
6182                                                 warning("unknown L$start$ label %s in file %s", symbolName, this->file().path());
6183                                         if ( kind != ld::Fixup::kindNone ) {
6184                                                 Atom<A>* inAtom = parser.findAtomByAddress(sym.n_value());
6185                                                 typename Parser<A>::SourceLocation src(inAtom, sym.n_value() - inAtom->objectAddress());
6186                                                 parser.addFixup(src, ld::Fixup::k1of1, kind);
6187                                         }
6188                                 }
6189                         }
6190                 }
6191         }
6192
6193         // add follow-on fixups for aliases
6194         if ( _hasAliases ) {
6195                 for(Atom<A>* p = _beginAtoms; p < _endAtoms; ++p) {
6196                         if ( p->isAlias() && ! this->addFollowOnFixups() ) {
6197                                 Atom<A>* targetOfAlias = &p[1];
6198                                 assert(p < &_endAtoms[-1]);
6199                                 assert(p->_objAddress == targetOfAlias->_objAddress);
6200                                 typename Parser<A>::SourceLocation src(p, 0);
6201                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, targetOfAlias);
6202                         }
6203                 }
6204         }
6205 }
6206
6207
6208
6209 //
6210 // main function used by linker to instantiate ld::Files
6211 //
6212 ld::relocatable::File* parse(const uint8_t* fileContent, uint64_t fileLength,
6213                                                          const char* path, time_t modTime, ld::File::Ordinal ordinal, const ParserOptions& opts)
6214 {
6215         switch ( opts.architecture ) {
6216 #if SUPPORT_ARCH_x86_64
6217                 case CPU_TYPE_X86_64:
6218                         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) )
6219                                 return mach_o::relocatable::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
6220                         break;
6221 #endif
6222 #if SUPPORT_ARCH_i386
6223                 case CPU_TYPE_I386:
6224                         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) )
6225                                 return mach_o::relocatable::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
6226                         break;
6227 #endif
6228 #if SUPPORT_ARCH_arm_any
6229                 case CPU_TYPE_ARM:
6230                         if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) )
6231                                 return mach_o::relocatable::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
6232                         break;
6233 #endif
6234         }
6235         return NULL;
6236 }
6237
6238 //
6239 // used by archive reader to validate member object file
6240 //
6241 bool isObjectFile(const uint8_t* fileContent, uint64_t fileLength, const ParserOptions& opts)
6242 {
6243         switch ( opts.architecture ) {
6244                 case CPU_TYPE_X86_64:
6245                         return ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) );
6246                 case CPU_TYPE_I386:
6247                         return ( mach_o::relocatable::Parser<x86>::validFile(fileContent) );
6248                 case CPU_TYPE_ARM:
6249                         return ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) );
6250         }
6251         return false;
6252 }
6253
6254 //
6255 // used by linker to infer architecture when no -arch is on command line
6256 //
6257 bool isObjectFile(const uint8_t* fileContent, cpu_type_t* result, cpu_subtype_t* subResult)
6258 {
6259         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
6260                 *result = CPU_TYPE_X86_64;
6261                 *subResult = CPU_SUBTYPE_X86_64_ALL;
6262                 return true;
6263         }
6264         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
6265                 *result = CPU_TYPE_I386;
6266                 *subResult = CPU_SUBTYPE_X86_ALL;
6267                 return true;
6268         }
6269         if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
6270                 *result = CPU_TYPE_ARM;
6271                 const macho_header<Pointer32<LittleEndian> >* header = (const macho_header<Pointer32<LittleEndian> >*)fileContent;
6272                 *subResult = header->cpusubtype();
6273                 return true;
6274         }
6275         return false;
6276 }
6277
6278 //
6279 // used by linker is error messages to describe bad .o file
6280 //
6281 const char* archName(const uint8_t* fileContent)
6282 {
6283         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
6284                 return mach_o::relocatable::Parser<x86_64>::fileKind(fileContent);
6285         }
6286         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
6287                 return mach_o::relocatable::Parser<x86>::fileKind(fileContent);
6288         }
6289         if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
6290                 return mach_o::relocatable::Parser<arm>::fileKind(fileContent);
6291         }
6292         return NULL;
6293 }
6294
6295 //
6296 // Used by archive reader when -ObjC option is specified
6297 //
6298 bool hasObjC2Categories(const uint8_t* fileContent)
6299 {
6300         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
6301                 return mach_o::relocatable::Parser<x86_64>::hasObjC2Categories(fileContent);
6302         }
6303         else if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
6304                 return mach_o::relocatable::Parser<arm>::hasObjC2Categories(fileContent);
6305         }
6306         else if ( mach_o::relocatable::Parser<x86>::validFile(fileContent, false, 0) ) {
6307                 return mach_o::relocatable::Parser<x86>::hasObjC2Categories(fileContent);
6308         }
6309         return false;
6310 }
6311
6312 //
6313 // Used by archive reader when -ObjC option is specified
6314 //
6315 bool hasObjC1Categories(const uint8_t* fileContent)
6316 {
6317         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent, false, 0) ) {
6318                 return mach_o::relocatable::Parser<x86>::hasObjC1Categories(fileContent);
6319         }
6320         return false;
6321 }
6322
6323
6324
6325 } // namespace relocatable
6326 } // namespace mach_o
6327
6328