src/ld/SymbolTable.cpp

   1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-*
   2  *
   3  * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
   4  *
   5  * @APPLE_LICENSE_HEADER_START@
   6  *
   7  * This file contains Original Code and/or Modifications of Original Code
   8  * as defined in and that are subject to the Apple Public Source License
   9  * Version 2.0 (the 'License'). You may not use this file except in
  10  * compliance with the License. Please obtain a copy of the License at
  11  * http://www.opensource.apple.com/apsl/ and read it before using this
  12  * file.
  13  *
  14  * The Original Code and all software distributed under the License are
  15  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  19  * Please see the License for the specific language governing rights and
  20  * limitations under the License.
  21  *
  22  * @APPLE_LICENSE_HEADER_END@
  23  */
  24
  25
  26 #include <stdlib.h>
  27 #include <sys/types.h>
  28 #include <sys/stat.h>
  29 #include <sys/mman.h>
  30 #include <sys/sysctl.h>
  31 #include <fcntl.h>
  32 #include <errno.h>
  33 #include <limits.h>
  34 #include <unistd.h>
  35 #include <assert.h>
  36
  37 #include <iostream>
  38 #include <sstream>
  39 #include <string>
  40 #include <map>
  41 #include <set>
  42 #include <vector>
  43 #include <algorithm>
  44
  45 #include "Options.h"
  46
  47 #include "ld.hpp"
  48 #include "InputFiles.h"
  49 #include "SymbolTable.h"
  50
  51
  52
  53 namespace ld {
  54 namespace tool {
  55
  56
  57 // HACK, I can't find a way to pass values in the compare classes (e.g. ContentFuncs)
  58 // so use global variable to pass info.
  59 static ld::IndirectBindingTable*        _s_indirectBindingTable = NULL;
  60
  61
  62 SymbolTable::SymbolTable(const Options& opts, std::vector<const ld::Atom*>& ibt)
  63         : _options(opts), _cstringTable(6151), _indirectBindingTable(ibt), _hasExternalTentativeDefinitions(false)
  64 {
  65         _s_indirectBindingTable = this;
  66 }
  67
  68
  69 size_t SymbolTable::ContentFuncs::operator()(const ld::Atom* atom) const
  70 {
  71         return atom->contentHash(*_s_indirectBindingTable);
  72 }
  73
  74 bool SymbolTable::ContentFuncs::operator()(const ld::Atom* left, const ld::Atom* right) const
  75 {
  76         return (memcmp(left->rawContentPointer(), right->rawContentPointer(), left->size()) == 0);
  77 }
  78
  79
  80
  81 size_t SymbolTable::CStringHashFuncs::operator()(const ld::Atom* atom) const
  82 {
  83         return atom->contentHash(*_s_indirectBindingTable);
  84 }
  85
  86 bool SymbolTable::CStringHashFuncs::operator()(const ld::Atom* left, const ld::Atom* right) const
  87 {
  88         return (strcmp((char*)left->rawContentPointer(), (char*)right->rawContentPointer()) == 0);
  89 }
  90
  91
  92 size_t SymbolTable::UTF16StringHashFuncs::operator()(const ld::Atom* atom) const
  93 {
  94         return atom->contentHash(*_s_indirectBindingTable);
  95 }
  96
  97 bool SymbolTable::UTF16StringHashFuncs::operator()(const ld::Atom* left, const ld::Atom* right) const
  98 {
  99         if ( left == right )
 100                 return true;
 101         const void* leftContent = left->rawContentPointer();
 102         const void* rightContent = right->rawContentPointer();
 103         unsigned int amount = left->size()-2;
 104         bool result = (memcmp(leftContent, rightContent, amount) == 0);
 105         return result;
 106 }
 107
 108
 109 size_t SymbolTable::ReferencesHashFuncs::operator()(const ld::Atom* atom) const
 110 {
 111         return atom->contentHash(*_s_indirectBindingTable);
 112 }
 113
 114 bool SymbolTable::ReferencesHashFuncs::operator()(const ld::Atom* left, const ld::Atom* right) const
 115 {
 116         return left->canCoalesceWith(*right, *_s_indirectBindingTable);
 117 }
 118
 119
 120 void SymbolTable::addDuplicateSymbol(const char *name, const ld::Atom *atom)
 121 {
 122     // Look up or create the file list for name.
 123     DuplicateSymbols::iterator symbolsIterator = _duplicateSymbols.find(name);
 124     DuplicatedSymbolAtomList *atoms = NULL;
 125     if (symbolsIterator != _duplicateSymbols.end()) {
 126         atoms = symbolsIterator->second;
 127     } else {
 128         atoms = new std::vector<const ld::Atom *>;
 129         _duplicateSymbols.insert(std::pair<const char *, DuplicatedSymbolAtomList *>(name, atoms));
 130     }
 131
 132     // check if file is already in the list, add it if not
 133     bool found = false;
 134     for (DuplicatedSymbolAtomList::iterator it = atoms->begin(); !found && it != atoms->end(); it++)
 135         if (strcmp((*it)->file()->path(), atom->file()->path()) == 0)
 136             found = true;
 137     if (!found)
 138         atoms->push_back(atom);
 139 }
 140
 141 void SymbolTable::checkDuplicateSymbols() const
 142 {
 143     bool foundDuplicate = false;
 144     for (DuplicateSymbols::const_iterator symbolIt = _duplicateSymbols.begin(); symbolIt != _duplicateSymbols.end(); symbolIt++) {
 145         DuplicatedSymbolAtomList *atoms = symbolIt->second;
 146         bool reportDuplicate;
 147         if (_options.deadCodeStrip()) {
 148             // search for a live atom
 149             reportDuplicate = false;
 150             for (DuplicatedSymbolAtomList::iterator it = atoms->begin(); !reportDuplicate && it != atoms->end(); it++) {
 151                 if ((*it)->live())
 152                     reportDuplicate = true;
 153             }
 154         } else {
 155             reportDuplicate = true;
 156         }
 157         if (reportDuplicate) {
 158             foundDuplicate = true;
 159             fprintf(stderr, "duplicate symbol %s in:\n", symbolIt->first);
 160             for (DuplicatedSymbolAtomList::iterator atomIt = atoms->begin(); atomIt != atoms->end(); atomIt++) {
 161                 fprintf(stderr, "    %s\n", (*atomIt)->file()->path());
 162             }
 163         }
 164     }
 165     if (foundDuplicate)
 166         throwf("%d duplicate symbol%s", (int)_duplicateSymbols.size(), _duplicateSymbols.size()==1?"":"s");
 167 }
 168
 169 // AtomPicker encapsulates the logic for picking which atom to use when adding an atom by name results in a collision
 170 class NameCollisionResolution {
 171 public:
 172         NameCollisionResolution(const ld::Atom& a, const ld::Atom& b, bool ignoreDuplicates, const Options& options) : _atomA(a), _atomB(b), _options(options), _reportDuplicate(false), _ignoreDuplicates(ignoreDuplicates) {
 173                 pickAtom();
 174         }
 175
 176         // Returns which atom to use
 177         const ld::Atom& chosen() { return *_chosen; }
 178         bool choseAtom(const ld::Atom& atom) { return _chosen == &atom; }
 179
 180         // Returns true if the two atoms should be reported as a duplicate symbol
 181         bool reportDuplicate()  { return _reportDuplicate; }
 182
 183 private:
 184         const ld::Atom& _atomA;
 185         const ld::Atom& _atomB;
 186         const Options& _options;
 187         const ld::Atom* _chosen;
 188         bool _reportDuplicate;
 189         bool _ignoreDuplicates;
 190
 191         void pickAtom(const ld::Atom& atom) { _chosen = &atom; } // primitive to set which atom is picked
 192         void pickAtomA() { pickAtom(_atomA); }  // primitive to pick atom A
 193         void pickAtomB() { pickAtom(_atomB); }  // primitive to pick atom B
 194
 195         // use atom A if pickA, otherwise use atom B
 196         void pickAOrB(bool pickA) { if (pickA) pickAtomA(); else pickAtomB(); }
 197
 198         void pickHigherOrdinal() {
 199                 pickAOrB(_atomA.file()->ordinal() < _atomB.file()->ordinal());
 200         }
 201
 202         void pickLowerOrdinal() {
 203                 pickAOrB(_atomA.file()->ordinal() > _atomB.file()->ordinal());
 204         }
 205
 206         void pickLargerSize() {
 207                 if (_atomA.size() == _atomB.size())
 208                         pickLowerOrdinal();
 209                 else
 210                         pickAOrB(_atomA.size() > _atomB.size());
 211         }
 212
 213         void pickGreaterAlignment() {
 214                 pickAOrB(_atomA.alignment().trailingZeros() > _atomB.alignment().trailingZeros());
 215         }
 216
 217         void pickBetweenRegularAtoms() {
 218                 if ( _atomA.combine() == ld::Atom::combineByName ) {
 219                         if ( _atomB.combine() == ld::Atom::combineByName ) {
 220                                 // <rdar://problem/9183821> always choose mach-o over llvm bit code, otherwise LTO may eliminate the llvm atom
 221                                 const bool aIsLTO = (_atomA.contentType() == ld::Atom::typeLTOtemporary);
 222                                 const bool bIsLTO = (_atomB.contentType() == ld::Atom::typeLTOtemporary);
 223                                 // <rdar://problem/9183821> always choose mach-o over llvm bit code, otherwise LTO may eliminate the llvm atom
 224                                 if ( aIsLTO != bIsLTO ) {
 225                                         pickAOrB(!aIsLTO);
 226                                 }
 227                                 else {
 228                                         // both weak, prefer non-auto-hide one
 229                                         if ( _atomA.autoHide() != _atomB.autoHide() ) {
 230                                                 // <rdar://problem/6783167> support auto hidden weak symbols: .weak_def_can_be_hidden
 231                                                 pickAOrB(!_atomA.autoHide());
 232                                         }
 233                                         else if ( _atomA.autoHide() && _atomB.autoHide() ) {
 234                                                 // both have auto-hide, so use one with greater alignment
 235                                                 pickGreaterAlignment();
 236                                         }
 237                                         else {
 238                                                 // neither auto-hide, check visibility
 239                                                 if ( _atomA.scope() != _atomB.scope() ) {
 240                                                         // <rdar://problem/8304984> use more visible weak def symbol
 241                                                         pickAOrB(_atomA.scope() == ld::Atom::scopeGlobal);
 242                                                 }
 243                                                 else {
 244                                                         // both have same visibility, use one with greater alignment
 245                                                         pickGreaterAlignment();
 246                                                 }
 247                                         }
 248                                 }
 249                         }
 250                         else {
 251                                 pickAtomB(); // pick not-weak
 252
 253                         }
 254                 }
 255                 else {
 256                         if ( _atomB.combine() == ld::Atom::combineByName ) {
 257                                 pickAtomA(); // pick not-weak
 258
 259                         }
 260                         else {
 261                                 // both are not-weak
 262                                 if ( _atomA.section().type() == ld::Section::typeMachHeader ) {
 263                                         pickAtomA();
 264                                 }
 265                                 else if ( _atomB.section().type() == ld::Section::typeMachHeader ) {
 266                                         pickAtomB();
 267                                 }
 268                                 else {
 269                                         if ( _ignoreDuplicates ) {
 270                                                 pickLowerOrdinal();
 271                                         }
 272                                         else {
 273                                                 _reportDuplicate = true;
 274                                         }
 275                                 }
 276                         }
 277                 }
 278         }
 279
 280         void pickCommonsMode(const ld::Atom& dylib, const ld::Atom& proxy) {
 281                 assert(dylib.definition() == ld::Atom::definitionTentative);
 282                 assert(proxy.definition() == ld::Atom::definitionProxy);
 283                 switch ( _options.commonsMode() ) {
 284                         case Options::kCommonsIgnoreDylibs:
 285                                 if ( _options.warnCommons() )
 286                                         warning("using common symbol %s from %s and ignoring defintion from dylib %s",
 287                                                         proxy.name(), proxy.file()->path(), dylib.file()->path());
 288                                 pickAtom(dylib);
 289                                 break;
 290                         case Options::kCommonsOverriddenByDylibs:
 291                                 if ( _options.warnCommons() )
 292                                         warning("replacing common symbol %s from %s with true definition from dylib %s",
 293                                                         proxy.name(), proxy.file()->path(), dylib.file()->path());
 294                                 pickAtom(proxy);
 295                                 break;
 296                         case Options::kCommonsConflictsDylibsError:
 297                                 throwf("common symbol %s from %s conflicts with defintion from dylib %s",
 298                                            proxy.name(), proxy.file()->path(), dylib.file()->path());
 299                 }
 300         }
 301
 302         void pickProxyAtom() {
 303                 // both atoms are definitionProxy
 304                 // <rdar://problem/5137732> ld should keep looking when it finds a weak definition in a dylib
 305                 if ( _atomA.combine() == ld::Atom::combineByName ) {
 306                         pickAtomB();
 307                 } else if ( _atomB.combine() == ld::Atom::combineByName ) {
 308                         pickAtomA();
 309                 } else {
 310                                 throwf("symbol %s exported from both %s and %s\n", _atomA.name(), _atomA.file()->path(), _atomB.file()->path());
 311                 }
 312         }
 313
 314         void pickAtom() {
 315                 //fprintf(stderr, "pickAtom(), a=%p, def=%d, b=%p, def=%d\n", &_atomA, _atomA.definition(), &_atomB, _atomB.definition());
 316                 // First, discriminate by definition
 317                 switch (_atomA.definition()) {
 318                         case ld::Atom::definitionRegular:
 319                                 switch (_atomB.definition()) {
 320                                         case ld::Atom::definitionRegular:
 321                                                 pickBetweenRegularAtoms();
 322                                                 break;
 323                                         case ld::Atom::definitionTentative:
 324                                                 if ( _atomB.size() > _atomA.size() ) {
 325                                                         const char* atomApath = (_atomA.file() != NULL) ? _atomA.file()->path() : "<internal>";
 326                                                         const char* atomBpath = (_atomB.file() != NULL) ? _atomB.file()->path() : "<internal>";
 327                                                         warning("tentative definition of '%s' with size %llu from '%s' is being replaced by real definition of smaller size %llu from '%s'",
 328                                                                         _atomA.name(), _atomB.size(), atomBpath, _atomA.size(), atomApath);
 329                                                 }
 330                                                 pickAtomA();
 331                                                 break;
 332                                         case ld::Atom::definitionAbsolute:
 333                                                 _reportDuplicate = true;
 334                                                 pickHigherOrdinal();
 335                                                 break;
 336                                         case ld::Atom::definitionProxy:
 337                                                 pickAtomA();
 338                                                 break;
 339                                 }
 340                                 break;
 341                         case ld::Atom::definitionTentative:
 342                                 switch (_atomB.definition()) {
 343                                         case ld::Atom::definitionRegular:
 344                                                 if ( _atomA.size() > _atomB.size() ) {
 345                                                         const char* atomApath = (_atomA.file() != NULL) ? _atomA.file()->path() : "<internal>";
 346                                                         const char* atomBpath = (_atomB.file() != NULL) ? _atomB.file()->path() : "<internal>";
 347                                                         warning("tentative definition of '%s' with size %llu from '%s' is being replaced by real definition of smaller size %llu from '%s'",
 348                                                                         _atomA.name(), _atomA.size(),atomApath, _atomB.size(), atomBpath);
 349                                                 }
 350                                                 pickAtomB();
 351                                                 break;
 352                                         case ld::Atom::definitionTentative:
 353                                                 pickLargerSize();
 354                                                 break;
 355                                         case ld::Atom::definitionAbsolute:
 356                                                 pickHigherOrdinal();
 357                                                 break;
 358                                         case ld::Atom::definitionProxy:
 359                                                 pickCommonsMode(_atomA, _atomB);
 360                                                 break;
 361                                 }
 362                                 break;
 363                         case ld::Atom::definitionAbsolute:
 364                                 switch (_atomB.definition()) {
 365                                         case ld::Atom::definitionRegular:
 366                                                 _reportDuplicate = true;
 367                                                 pickHigherOrdinal();
 368                                                 break;
 369                                         case ld::Atom::definitionTentative:
 370                                                 pickAtomA();
 371                                                 break;
 372                                         case ld::Atom::definitionAbsolute:
 373                                                 _reportDuplicate = true;
 374                                                 pickHigherOrdinal();
 375                                                 break;
 376                                         case ld::Atom::definitionProxy:
 377                                                 pickAtomA();
 378                                                 break;
 379                                 }
 380                                 break;
 381                         case ld::Atom::definitionProxy:
 382                                 switch (_atomB.definition()) {
 383                                         case ld::Atom::definitionRegular:
 384                                                 pickAtomB();
 385                                                 break;
 386                                         case ld::Atom::definitionTentative:
 387                                                 pickCommonsMode(_atomB, _atomA);
 388                                                 break;
 389                                         case ld::Atom::definitionAbsolute:
 390                                                 pickAtomB();
 391                                                 break;
 392                                         case ld::Atom::definitionProxy:
 393                                                 pickProxyAtom();
 394                                                 break;
 395                                 }
 396                                 break;
 397                 }
 398         }
 399 };
 400
 401 bool SymbolTable::addByName(const ld::Atom& newAtom, bool ignoreDuplicates)
 402 {
 403         bool useNew = true;
 404         assert(newAtom.name() != NULL);
 405         const char* name = newAtom.name();
 406         IndirectBindingSlot slot = this->findSlotForName(name);
 407         const ld::Atom* existingAtom = _indirectBindingTable[slot];
 408         //fprintf(stderr, "addByName(%p) name=%s, slot=%u, existing=%p\n", &newAtom, newAtom.name(), slot, existingAtom);
 409         if ( existingAtom != NULL ) {
 410                 assert(&newAtom != existingAtom);
 411                 NameCollisionResolution picker(newAtom, *existingAtom, ignoreDuplicates, _options);
 412                 if (picker.reportDuplicate()) {
 413                         addDuplicateSymbol(name, existingAtom);
 414                         addDuplicateSymbol(name, &newAtom);
 415                 }
 416                 useNew = picker.choseAtom(newAtom);
 417         }
 418         if ( useNew ) {
 419                 _indirectBindingTable[slot] = &newAtom;
 420                 if ( existingAtom != NULL ) {
 421                         markCoalescedAway(existingAtom);
 422                 }
 423                 if ( newAtom.scope() == ld::Atom::scopeGlobal ) {
 424                         if ( newAtom.definition() == ld::Atom::definitionTentative ) {
 425                                 _hasExternalTentativeDefinitions = true;
 426                         }
 427                 }
 428         }
 429         else {
 430                 markCoalescedAway(&newAtom);
 431         }
 432         // return if existing atom in symbol table was replaced
 433         return useNew && (existingAtom != NULL);
 434 }
 435
 436
 437 bool SymbolTable::addByContent(const ld::Atom& newAtom)
 438 {
 439         bool useNew = true;
 440         const ld::Atom* existingAtom;
 441         IndirectBindingSlot slot = this->findSlotForContent(&newAtom, &existingAtom);
 442         //fprintf(stderr, "addByContent(%p) name=%s, slot=%u, existing=%p\n", &newAtom, newAtom.name(), slot, existingAtom);
 443         if ( existingAtom != NULL ) {
 444                 // use existing unless new one has greater alignment requirements
 445                 useNew = ( newAtom.alignment().trailingZeros() > existingAtom->alignment().trailingZeros() );
 446         }
 447         if ( useNew ) {
 448                 _indirectBindingTable[slot] = &newAtom;
 449                 if ( existingAtom != NULL )
 450                         markCoalescedAway(existingAtom);
 451         }
 452         else {
 453                 _indirectBindingTable[slot] = existingAtom;
 454                 if ( existingAtom != &newAtom )
 455                         markCoalescedAway(&newAtom);
 456         }
 457         // return if existing atom in symbol table was replaced
 458         return useNew && (existingAtom != NULL);
 459 }
 460
 461 bool SymbolTable::addByReferences(const ld::Atom& newAtom)
 462 {
 463         bool useNew = true;
 464         const ld::Atom* existingAtom;
 465         IndirectBindingSlot slot = this->findSlotForReferences(&newAtom, &existingAtom);
 466         //fprintf(stderr, "addByReferences(%p) name=%s, slot=%u, existing=%p\n", &newAtom, newAtom.name(), slot, existingAtom);
 467         if ( existingAtom != NULL ) {
 468                 // use existing unless new one has greater alignment requirements
 469                 useNew = ( newAtom.alignment().trailingZeros() > existingAtom->alignment().trailingZeros() );
 470         }
 471         if ( useNew ) {
 472                 _indirectBindingTable[slot] = &newAtom;
 473                 if ( existingAtom != NULL )
 474                         markCoalescedAway(existingAtom);
 475         }
 476         else {
 477                 if ( existingAtom != &newAtom )
 478                         markCoalescedAway(&newAtom);
 479         }
 480         // return if existing atom in symbol table was replaced
 481         return useNew && (existingAtom != NULL);
 482 }
 483
 484
 485 bool SymbolTable::add(const ld::Atom& atom, bool ignoreDuplicates)
 486 {
 487         //fprintf(stderr, "SymbolTable::add(%p), name=%s\n", &atom, atom.name());
 488         assert(atom.scope() != ld::Atom::scopeTranslationUnit);
 489         switch ( atom.combine() ) {
 490                 case ld::Atom::combineNever:
 491                 case ld::Atom::combineByName:
 492                         return this->addByName(atom, ignoreDuplicates);
 493                         break;
 494                 case ld::Atom::combineByNameAndContent:
 495                         return this->addByContent(atom);
 496                         break;
 497                 case ld::Atom::combineByNameAndReferences:
 498                         return this->addByReferences(atom);
 499                         break;
 500         }
 501
 502         return false;
 503 }
 504
 505 void SymbolTable::markCoalescedAway(const ld::Atom* atom)
 506 {
 507         // remove this from list of all atoms used
 508         //fprintf(stderr, "markCoalescedAway(%p) from %s\n", atom, atom->file()->path());
 509         (const_cast<ld::Atom*>(atom))->setCoalescedAway();
 510
 511         //
 512         // The fixupNoneGroupSubordinate* fixup kind is used to model group comdat.
 513         // The "signature" atom in the group has a fixupNoneGroupSubordinate* fixup to
 514         // all other members of the group.  So, if the signature atom is
 515         // coalesced away, all other atoms in the group should also be removed.
 516         //
 517         for (ld::Fixup::iterator fit=atom->fixupsBegin(), fend=atom->fixupsEnd(); fit != fend; ++fit) {
 518                 switch ( fit->kind ) {
 519                         case ld::Fixup::kindNoneGroupSubordinate:
 520                         case ld::Fixup::kindNoneGroupSubordinateFDE:
 521                         case ld::Fixup::kindNoneGroupSubordinateLSDA:
 522                                 assert(fit->binding == ld::Fixup::bindingDirectlyBound);
 523                                 this->markCoalescedAway(fit->u.target);
 524                                 break;
 525                         default:
 526                                 break;
 527                 }
 528         }
 529
 530 }
 531
 532
 533 struct StrcmpSorter {
 534                 bool operator() (const char* i,const char* j) {
 535                         if (i==NULL)
 536                                 return true;
 537                         if (j==NULL)
 538                                 return false;
 539                         return strcmp(i, j)<0;}
 540 };
 541
 542 void SymbolTable::undefines(std::vector<const char*>& undefs)
 543 {
 544         // return all names in _byNameTable that have no associated atom
 545         for (NameToSlot::iterator it=_byNameTable.begin(); it != _byNameTable.end(); ++it) {
 546                 //fprintf(stderr, "  _byNameTable[%s] = slot %d which has atom %p\n", it->first, it->second, _indirectBindingTable[it->second]);
 547                 if ( _indirectBindingTable[it->second] == NULL )
 548                         undefs.push_back(it->first);
 549         }
 550         // sort so that undefines are in a stable order (not dependent on hashing functions)
 551         struct StrcmpSorter strcmpSorter;
 552         std::sort(undefs.begin(), undefs.end(), strcmpSorter);
 553 }
 554
 555
 556 void SymbolTable::tentativeDefs(std::vector<const char*>& tents)
 557 {
 558         // return all names in _byNameTable that have no associated atom
 559         for (NameToSlot::iterator it=_byNameTable.begin(); it != _byNameTable.end(); ++it) {
 560                 const char* name = it->first;
 561                 const ld::Atom* atom = _indirectBindingTable[it->second];
 562                 if ( (atom != NULL) && (atom->definition() == ld::Atom::definitionTentative) )
 563                         tents.push_back(name);
 564         }
 565         std::sort(tents.begin(), tents.end());
 566 }
 567
 568
 569 void SymbolTable::mustPreserveForBitcode(std::unordered_set<const char*>& syms)
 570 {
 571         // return all names in _byNameTable that have no associated atom
 572         for (const auto &entry: _byNameTable) {
 573                 const char* name = entry.first;
 574                 const ld::Atom* atom = _indirectBindingTable[entry.second];
 575                 if ( (atom == NULL) || (atom->definition() == ld::Atom::definitionProxy) )
 576                         syms.insert(name);
 577         }
 578 }
 579
 580
 581 bool SymbolTable::hasName(const char* name)
 582 {
 583         NameToSlot::iterator pos = _byNameTable.find(name);
 584         if ( pos == _byNameTable.end() )
 585                 return false;
 586         return (_indirectBindingTable[pos->second] != NULL);
 587 }
 588
 589 // find existing or create new slot
 590 SymbolTable::IndirectBindingSlot SymbolTable::findSlotForName(const char* name)
 591 {
 592         NameToSlot::iterator pos = _byNameTable.find(name);
 593         if ( pos != _byNameTable.end() )
 594                 return pos->second;
 595         // create new slot for this name
 596         SymbolTable::IndirectBindingSlot slot = _indirectBindingTable.size();
 597         _indirectBindingTable.push_back(NULL);
 598         _byNameTable[name] = slot;
 599         _byNameReverseTable[slot] = name;
 600         return slot;
 601 }
 602
 603 void SymbolTable::removeDeadAtoms()
 604 {
 605         // remove dead atoms from: _byNameTable, _byNameReverseTable, and _indirectBindingTable
 606         std::vector<const char*> namesToRemove;
 607         for (NameToSlot::iterator it=_byNameTable.begin(); it != _byNameTable.end(); ++it) {
 608                 IndirectBindingSlot slot = it->second;
 609                 const ld::Atom* atom = _indirectBindingTable[slot];
 610                 if ( atom != NULL ) {
 611                         if ( !atom->live() && !atom->dontDeadStrip() ) {
 612                                 //fprintf(stderr, "removing from symbolTable[%u] %s\n", slot, atom->name());
 613                                 _indirectBindingTable[slot] = NULL;
 614                                 // <rdar://problem/16025786> need to completely remove dead atoms from symbol table
 615                                 _byNameReverseTable.erase(slot);
 616                                 // can't remove while iterating, do it after iteration
 617                                 namesToRemove.push_back(it->first);
 618                         }
 619                 }
 620         }
 621         for (std::vector<const char*>::iterator it = namesToRemove.begin(); it != namesToRemove.end(); ++it) {
 622                 _byNameTable.erase(*it);
 623         }
 624
 625         // remove dead atoms from _nonLazyPointerTable
 626         for (ReferencesToSlot::iterator it=_nonLazyPointerTable.begin(); it != _nonLazyPointerTable.end(); ) {
 627                 const ld::Atom* atom = it->first;
 628                 assert(atom != NULL);
 629                 if ( !atom->live() && !atom->dontDeadStrip() )
 630                         it = _nonLazyPointerTable.erase(it);
 631                 else
 632                         ++it;
 633         }
 634
 635         // remove dead atoms from _cstringTable
 636         for (CStringToSlot::iterator it=_cstringTable.begin(); it != _cstringTable.end(); ) {
 637                 const ld::Atom* atom = it->first;
 638                 assert(atom != NULL);
 639                 if ( !atom->live() && !atom->dontDeadStrip() )
 640                         it = _cstringTable.erase(it);
 641                 else
 642                         ++it;
 643         }
 644
 645         // remove dead atoms from _utf16Table
 646         for (UTF16StringToSlot::iterator it=_utf16Table.begin(); it != _utf16Table.end(); ) {
 647                 const ld::Atom* atom = it->first;
 648                 assert(atom != NULL);
 649                 if ( !atom->live() && !atom->dontDeadStrip() )
 650                         it = _utf16Table.erase(it);
 651                 else
 652                         ++it;
 653         }
 654
 655         // remove dead atoms from _cfStringTable
 656         for (ReferencesToSlot::iterator it=_cfStringTable.begin(); it != _cfStringTable.end(); ) {
 657                 const ld::Atom* atom = it->first;
 658                 assert(atom != NULL);
 659                 if ( !atom->live() && !atom->dontDeadStrip() )
 660                         it = _cfStringTable.erase(it);
 661                 else
 662                         ++it;
 663         }
 664
 665         // remove dead atoms from _literal4Table
 666         for (ContentToSlot::iterator it=_literal4Table.begin(); it != _literal4Table.end(); ) {
 667                 const ld::Atom* atom = it->first;
 668                 assert(atom != NULL);
 669                 if ( !atom->live() && !atom->dontDeadStrip() )
 670                         it = _literal4Table.erase(it);
 671                 else
 672                         ++it;
 673         }
 674
 675         // remove dead atoms from _literal8Table
 676         for (ContentToSlot::iterator it=_literal8Table.begin(); it != _literal8Table.end(); ) {
 677                 const ld::Atom* atom = it->first;
 678                 assert(atom != NULL);
 679                 if ( !atom->live() && !atom->dontDeadStrip() )
 680                         it = _literal8Table.erase(it);
 681                 else
 682                         ++it;
 683         }
 684
 685         // remove dead atoms from _literal16Table
 686         for (ContentToSlot::iterator it=_literal16Table.begin(); it != _literal16Table.end(); ) {
 687                 const ld::Atom* atom = it->first;
 688                 assert(atom != NULL);
 689                 if ( !atom->live() && !atom->dontDeadStrip() )
 690                         it = _literal16Table.erase(it);
 691                 else
 692                         ++it;
 693         }
 694 }
 695
 696
 697 // find existing or create new slot
 698 SymbolTable::IndirectBindingSlot SymbolTable::findSlotForContent(const ld::Atom* atom, const ld::Atom** existingAtom)
 699 {
 700         //fprintf(stderr, "findSlotForContent(%p)\n", atom);
 701         SymbolTable::IndirectBindingSlot slot = 0;
 702         UTF16StringToSlot::iterator upos;
 703         CStringToSlot::iterator cspos;
 704         ContentToSlot::iterator pos;
 705         switch ( atom->section().type() ) {
 706                 case ld::Section::typeCString:
 707                         cspos = _cstringTable.find(atom);
 708                         if ( cspos != _cstringTable.end() ) {
 709                                 *existingAtom = _indirectBindingTable[cspos->second];
 710                                 return cspos->second;
 711                         }
 712                         slot = _indirectBindingTable.size();
 713                         _cstringTable[atom] = slot;
 714                         break;
 715                 case ld::Section::typeNonStdCString:
 716                         {
 717                                 // use seg/sect name is key to map to avoid coalescing across segments and sections
 718                                 char segsect[64];
 719                                 sprintf(segsect, "%s/%s", atom->section().segmentName(), atom->section().sectionName());
 720                                 NameToMap::iterator mpos = _nonStdCStringSectionToMap.find(segsect);
 721                                 CStringToSlot* map = NULL;
 722                                 if ( mpos == _nonStdCStringSectionToMap.end() ) {
 723                                         map = new CStringToSlot();
 724                                         _nonStdCStringSectionToMap[strdup(segsect)] = map;
 725                                 }
 726                                 else {
 727                                         map = mpos->second;
 728                                 }
 729                                 cspos = map->find(atom);
 730                                 if ( cspos != map->end() ) {
 731                                         *existingAtom = _indirectBindingTable[cspos->second];
 732                                         return cspos->second;
 733                                 }
 734                                 slot = _indirectBindingTable.size();
 735                                 map->operator[](atom) = slot;
 736                         }
 737                         break;
 738                 case ld::Section::typeUTF16Strings:
 739                         upos = _utf16Table.find(atom);
 740                         if ( upos != _utf16Table.end() ) {
 741                                 *existingAtom = _indirectBindingTable[upos->second];
 742                                 return upos->second;
 743                         }
 744                         slot = _indirectBindingTable.size();
 745                         _utf16Table[atom] = slot;
 746                         break;
 747                 case ld::Section::typeLiteral4:
 748                         pos = _literal4Table.find(atom);
 749                         if ( pos != _literal4Table.end() ) {
 750                                 *existingAtom = _indirectBindingTable[pos->second];
 751                                 return pos->second;
 752                         }
 753                         slot = _indirectBindingTable.size();
 754                         _literal4Table[atom] = slot;
 755                         break;
 756                 case ld::Section::typeLiteral8:
 757                         pos = _literal8Table.find(atom);
 758                         if ( pos != _literal8Table.end() ) {
 759                                 *existingAtom = _indirectBindingTable[pos->second];
 760                                 return pos->second;
 761                         }
 762                         slot = _indirectBindingTable.size();
 763                         _literal8Table[atom] = slot;
 764                         break;
 765                 case ld::Section::typeLiteral16:
 766                         pos = _literal16Table.find(atom);
 767                         if ( pos != _literal16Table.end() ) {
 768                                 *existingAtom = _indirectBindingTable[pos->second];
 769                                 return pos->second;
 770                         }
 771                         slot = _indirectBindingTable.size();
 772                         _literal16Table[atom] = slot;
 773                         break;
 774                 default:
 775                         assert(0 && "section type does not support coalescing by content");
 776         }
 777         _indirectBindingTable.push_back(atom);
 778         *existingAtom = NULL;
 779         return slot;
 780 }
 781
 782
 783
 784 // find existing or create new slot
 785 SymbolTable::IndirectBindingSlot SymbolTable::findSlotForReferences(const ld::Atom* atom, const ld::Atom** existingAtom)
 786 {
 787         //fprintf(stderr, "findSlotForReferences(%p)\n", atom);
 788
 789         SymbolTable::IndirectBindingSlot slot = 0;
 790         ReferencesToSlot::iterator pos;
 791         switch ( atom->section().type() ) {
 792                 case ld::Section::typeNonLazyPointer:
 793                         pos = _nonLazyPointerTable.find(atom);
 794                         if ( pos != _nonLazyPointerTable.end() ) {
 795                                 *existingAtom = _indirectBindingTable[pos->second];
 796                                 return pos->second;
 797                         }
 798                         slot = _indirectBindingTable.size();
 799                         _nonLazyPointerTable[atom] = slot;
 800                         break;
 801                 case ld::Section::typeCFString:
 802                         pos = _cfStringTable.find(atom);
 803                         if ( pos != _cfStringTable.end() ) {
 804                                 *existingAtom = _indirectBindingTable[pos->second];
 805                                 return pos->second;
 806                         }
 807                         slot = _indirectBindingTable.size();
 808                         _cfStringTable[atom] = slot;
 809                         break;
 810                 case ld::Section::typeObjCClassRefs:
 811                         pos = _objc2ClassRefTable.find(atom);
 812                         if ( pos != _objc2ClassRefTable.end() ) {
 813                                 *existingAtom = _indirectBindingTable[pos->second];
 814                                 return pos->second;
 815                         }
 816                         slot = _indirectBindingTable.size();
 817                         _objc2ClassRefTable[atom] = slot;
 818                         break;
 819                 case ld::Section::typeCStringPointer:
 820                         pos = _pointerToCStringTable.find(atom);
 821                         if ( pos != _pointerToCStringTable.end() ) {
 822                                 *existingAtom = _indirectBindingTable[pos->second];
 823                                 return pos->second;
 824                         }
 825                         slot = _indirectBindingTable.size();
 826                         _pointerToCStringTable[atom] = slot;
 827                         break;
 828                 case ld::Section::typeTLVPointers:
 829                         pos = _threadPointerTable.find(atom);
 830                         if ( pos != _threadPointerTable.end() ) {
 831                                 *existingAtom = _indirectBindingTable[pos->second];
 832                                 return pos->second;
 833                         }
 834                         slot = _indirectBindingTable.size();
 835                         _threadPointerTable[atom] = slot;
 836                         break;
 837                 default:
 838                         assert(0 && "section type does not support coalescing by references");
 839         }
 840         _indirectBindingTable.push_back(atom);
 841         *existingAtom = NULL;
 842         return slot;
 843 }
 844
 845
 846 const char*     SymbolTable::indirectName(IndirectBindingSlot slot) const
 847 {
 848         assert(slot < _indirectBindingTable.size());
 849         const ld::Atom* target = _indirectBindingTable[slot];
 850         if ( target != NULL )  {
 851                 return target->name();
 852         }
 853         // handle case when by-name reference is indirected and no atom yet in _byNameTable
 854         SlotToName::const_iterator pos = _byNameReverseTable.find(slot);
 855         if ( pos != _byNameReverseTable.end() )
 856                 return pos->second;
 857         assert(0);
 858         return NULL;
 859 }
 860
 861 const ld::Atom* SymbolTable::indirectAtom(IndirectBindingSlot slot) const
 862 {
 863         assert(slot < _indirectBindingTable.size());
 864         return _indirectBindingTable[slot];
 865 }
 866
 867 void SymbolTable::printStatistics()
 868 {
 869 //      fprintf(stderr, "cstring table size: %lu, bucket count: %lu, hash func called %u times\n",
 870 //                              _cstringTable.size(), _cstringTable.bucket_count(), cstringHashCount);
 871         int count[11];
 872         for(unsigned int b=0; b < 11; ++b) {
 873                 count[b] = 0;
 874         }
 875         for(unsigned int i=0; i < _cstringTable.bucket_count(); ++i) {
 876                 unsigned int n = _cstringTable.bucket_size(i);
 877                 if ( n < 10 )
 878                         count[n] += 1;
 879                 else
 880                         count[10] += 1;
 881         }
 882         fprintf(stderr, "cstring table distribution\n");
 883         for(unsigned int b=0; b < 11; ++b) {
 884                 fprintf(stderr, "%u buckets have %u elements\n", count[b], b);
 885         }
 886         fprintf(stderr, "indirect table size: %lu\n", _indirectBindingTable.size());
 887         fprintf(stderr, "by-name table size: %lu\n", _byNameTable.size());
 888 //      fprintf(stderr, "by-content table size: %lu, hash count: %u, equals count: %u, lookup count: %u\n",
 889 //                                              _byContentTable.size(), contentHashCount, contentEqualCount, contentLookupCount);
 890 //      fprintf(stderr, "by-ref table size: %lu, hashed count: %u, equals count: %u, lookup count: %u, insert count: %u\n",
 891 //                                              _byReferencesTable.size(), refHashCount, refEqualsCount, refLookupCount, refInsertCount);
 892
 893         //ReferencesHash obj;
 894         //for(ReferencesHashToSlot::iterator it=_byReferencesTable.begin(); it != _byReferencesTable.end(); ++it) {
 895         //      if ( obj.operator()(it->first) == 0x2F3AC0EAC744EA70 ) {
 896         //              fprintf(stderr, "hash=0x2F3AC0EAC744EA70 for %p %s from %s\n", it->first, it->first->name(), it->first->file()->path());
 897         //
 898         //      }
 899         //}
 900
 901 }
 902
 903 } // namespace tool
 904 } // namespace ld
 905