]>
Commit | Line | Data |
---|---|---|
a645023d A |
1 | /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- |
2 | * | |
3 | * Copyright (c) 2006-2010 Apple Inc. All rights reserved. | |
4 | * | |
5 | * @APPLE_LICENSE_HEADER_START@ | |
6 | * | |
7 | * This file contains Original Code and/or Modifications of Original Code | |
8 | * as defined in and that are subject to the Apple Public Source License | |
9 | * Version 2.0 (the 'License'). You may not use this file except in | |
10 | * compliance with the License. Please obtain a copy of the License at | |
11 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
12 | * file. | |
13 | * | |
14 | * The Original Code and all software distributed under the License are | |
15 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
16 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
17 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
19 | * Please see the License for the specific language governing rights and | |
20 | * limitations under the License. | |
21 | * | |
22 | * @APPLE_LICENSE_HEADER_END@ | |
23 | */ | |
24 | ||
25 | #ifndef __LTO_READER_H__ | |
26 | #define __LTO_READER_H__ | |
27 | ||
28 | #include <stdlib.h> | |
29 | #include <sys/param.h> | |
30 | #include <sys/fcntl.h> | |
31 | #include <sys/stat.h> | |
32 | #include <errno.h> | |
33 | #include <mach-o/dyld.h> | |
34 | #include <vector> | |
35 | #include <ext/hash_set> | |
36 | #include <ext/hash_map> | |
37 | ||
38 | #include "MachOFileAbstraction.hpp" | |
39 | #include "Architectures.hpp" | |
40 | #include "ld.hpp" | |
41 | #include "macho_relocatable_file.h" | |
42 | #include "lto_file.h" | |
43 | ||
44 | #include "llvm-c/lto.h" | |
45 | ||
46 | ||
47 | namespace lto { | |
48 | ||
49 | ||
50 | // | |
51 | // ld64 only tracks non-internal symbols from an llvm bitcode file. | |
52 | // We model this by having an InternalAtom which represent all internal functions and data. | |
53 | // All non-interal symbols from a bitcode file are represented by an Atom | |
54 | // and each Atom has a reference to the InternalAtom. The InternalAtom | |
55 | // also has references to each symbol external to the bitcode file. | |
56 | // | |
57 | class InternalAtom : public ld::Atom | |
58 | { | |
59 | public: | |
60 | InternalAtom(class File& f); | |
61 | // overrides of ld::Atom | |
62 | virtual ld::File* file() const { return &_file; } | |
63 | virtual bool translationUnitSource(const char** dir, const char** nm) const | |
64 | { return false; } | |
65 | virtual const char* name() const { return "import-atom"; } | |
66 | virtual uint64_t size() const { return 0; } | |
67 | virtual uint64_t objectAddress() const { return 0; } | |
68 | virtual void copyRawContent(uint8_t buffer[]) const { } | |
69 | virtual void setScope(Scope) { } | |
70 | virtual ld::Fixup::iterator fixupsBegin() const { return &_undefs[0]; } | |
71 | virtual ld::Fixup::iterator fixupsEnd() const { return &_undefs[_undefs.size()]; } | |
72 | ||
73 | // for adding references to symbols outside bitcode file | |
74 | void addReference(const char* nm) | |
75 | { _undefs.push_back(ld::Fixup(0, ld::Fixup::k1of1, | |
76 | ld::Fixup::kindNone, false, nm)); } | |
77 | private: | |
78 | ||
79 | ld::File& _file; | |
80 | mutable std::vector<ld::Fixup> _undefs; | |
81 | }; | |
82 | ||
83 | ||
84 | // | |
85 | // LLVM bitcode file | |
86 | // | |
87 | class File : public ld::relocatable::File | |
88 | { | |
89 | public: | |
90 | File(const char* path, time_t mTime, const uint8_t* content, | |
91 | uint32_t contentLength, uint32_t ordinal, cpu_type_t arch); | |
92 | virtual ~File(); | |
93 | ||
94 | // overrides of ld::File | |
95 | virtual bool forEachAtom(ld::File::AtomHandler&) const; | |
96 | virtual bool justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const | |
97 | { return false; } | |
98 | virtual uint32_t cpuSubType() const { return _cpuSubType; } | |
99 | ||
100 | // overrides of ld::relocatable::File | |
101 | virtual bool objcReplacementClasses() const { return false; } | |
102 | virtual DebugInfoKind debugInfo() const { return _debugInfo; } | |
103 | virtual const char* debugInfoPath() const { return _debugInfoPath; } | |
104 | virtual time_t debugInfoModificationTime() const | |
105 | { return _debugInfoModTime; } | |
106 | virtual const std::vector<ld::relocatable::File::Stab>* stabs() const { return NULL; } | |
107 | virtual bool canScatterAtoms() const { return true; } | |
108 | ||
109 | lto_module_t module() { return _module; } | |
110 | class InternalAtom& internalAtom() { return _internalAtom; } | |
111 | void setDebugInfo(ld::relocatable::File::DebugInfoKind k, | |
112 | const char* pth, time_t modTime, uint32_t subtype) | |
113 | { _debugInfo = k; | |
114 | _debugInfoPath = pth; | |
115 | _debugInfoModTime = modTime; | |
116 | _cpuSubType = subtype;} | |
117 | ||
118 | private: | |
119 | friend class Atom; | |
120 | friend class InternalAtom; | |
121 | friend class Parser; | |
122 | ||
123 | cpu_type_t _architecture; | |
124 | class InternalAtom _internalAtom; | |
125 | class Atom* _atomArray; | |
126 | uint32_t _atomArrayCount; | |
127 | lto_module_t _module; | |
128 | const char* _debugInfoPath; | |
129 | time_t _debugInfoModTime; | |
130 | ld::Section _section; | |
131 | ld::Fixup _fixupToInternal; | |
132 | ld::relocatable::File::DebugInfoKind _debugInfo; | |
133 | uint32_t _cpuSubType; | |
134 | }; | |
135 | ||
136 | // | |
137 | // Atom acts as a proxy Atom for the symbols that are exported by LLVM bitcode file. Initially, | |
138 | // Reader creates Atoms to allow linker proceed with usual symbol resolution phase. After | |
139 | // optimization is performed, real Atoms are created for these symobls. However these real Atoms | |
140 | // are not inserted into global symbol table. Atom holds real Atom and forwards appropriate | |
141 | // methods to real atom. | |
142 | // | |
143 | class Atom : public ld::Atom | |
144 | { | |
145 | public: | |
146 | Atom(File& f, const char* name, ld::Atom::Scope s, | |
147 | ld::Atom::Definition d, ld::Atom::Combine c, ld::Atom::Alignment a); | |
148 | ||
149 | // overrides of ld::Atom | |
150 | virtual ld::File* file() const { return &_file; } | |
151 | virtual bool translationUnitSource(const char** dir, const char** nm) const | |
152 | { return (_compiledAtom ? _compiledAtom->translationUnitSource(dir, nm) : false); } | |
153 | virtual const char* name() const { return _name; } | |
154 | virtual uint64_t size() const { return (_compiledAtom ? _compiledAtom->size() : 0); } | |
155 | virtual uint64_t objectAddress() const { return (_compiledAtom ? _compiledAtom->objectAddress() : 0); } | |
156 | virtual void copyRawContent(uint8_t buffer[]) const | |
157 | { if (_compiledAtom) _compiledAtom->copyRawContent(buffer); } | |
158 | virtual const uint8_t* rawContentPointer() const | |
159 | { return (_compiledAtom ? _compiledAtom->rawContentPointer() : NULL); } | |
160 | virtual unsigned long contentHash(const class ld::IndirectBindingTable& ibt) const | |
161 | { return (_compiledAtom ? _compiledAtom->contentHash(ibt) : 0); } | |
162 | virtual bool canCoalesceWith(const ld::Atom& rhs, const class ld::IndirectBindingTable& ibt) const | |
163 | { return (_compiledAtom ? _compiledAtom->canCoalesceWith(rhs,ibt) : false); } | |
164 | virtual ld::Fixup::iterator fixupsBegin() const | |
165 | { return (_compiledAtom ? _compiledAtom->fixupsBegin() : (ld::Fixup*)&_file._fixupToInternal); } | |
166 | virtual ld::Fixup::iterator fixupsEnd() const | |
167 | { return (_compiledAtom ? _compiledAtom->fixupsEnd() : &((ld::Fixup*)&_file._fixupToInternal)[1]); } | |
168 | virtual ld::Atom::UnwindInfo::iterator beginUnwind() const | |
169 | { return (_compiledAtom ? _compiledAtom->beginUnwind() : NULL); } | |
170 | virtual ld::Atom::UnwindInfo::iterator endUnwind() const | |
171 | { return (_compiledAtom ? _compiledAtom->endUnwind() : NULL); } | |
172 | virtual ld::Atom::LineInfo::iterator beginLineInfo() const | |
173 | { return (_compiledAtom ? _compiledAtom->beginLineInfo() : NULL); } | |
174 | virtual ld::Atom::LineInfo::iterator endLineInfo() const | |
175 | { return (_compiledAtom ? _compiledAtom->endLineInfo() : NULL); } | |
176 | ||
177 | const ld::Atom* compiledAtom() { return _compiledAtom; } | |
178 | void setCompiledAtom(const ld::Atom& atom); | |
179 | ||
180 | private: | |
181 | ||
182 | File& _file; | |
183 | const char* _name; | |
184 | const ld::Atom* _compiledAtom; | |
185 | }; | |
186 | ||
187 | ||
188 | ||
189 | ||
190 | ||
191 | ||
192 | ||
193 | class Parser | |
194 | { | |
195 | public: | |
196 | static bool validFile(const uint8_t* fileContent, uint64_t fileLength, cpu_type_t architecture, cpu_subtype_t subarch); | |
197 | static const char* fileKind(const uint8_t* fileContent, uint64_t fileLength); | |
198 | static File* parse(const uint8_t* fileContent, uint64_t fileLength, const char* path, | |
199 | time_t modTime, uint32_t ordinal, cpu_type_t architecture, cpu_subtype_t subarch, bool logAllFiles); | |
200 | static bool libLTOisLoaded() { return (::lto_get_version() != NULL); } | |
201 | static bool optimize( const std::vector<const ld::Atom*>& allAtoms, | |
202 | ld::Internal& state, | |
203 | uint32_t nextInputOrdinal, | |
204 | const OptimizeOptions& options, | |
205 | ld::File::AtomHandler& handler, | |
206 | std::vector<const ld::Atom*>& newAtoms, | |
207 | std::vector<const char*>& additionalUndefines); | |
208 | ||
209 | static const char* ltoVersion() { return ::lto_get_version(); } | |
210 | ||
211 | private: | |
212 | static const char* tripletPrefixForArch(cpu_type_t arch); | |
213 | static ld::relocatable::File* parseMachOFile(const uint8_t* p, size_t len, uint32_t nextInputOrdinal, const OptimizeOptions& options); | |
214 | ||
215 | class CStringEquals | |
216 | { | |
217 | public: | |
218 | bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); } | |
219 | }; | |
220 | typedef __gnu_cxx::hash_set<const char*, __gnu_cxx::hash<const char*>, CStringEquals> CStringSet; | |
221 | typedef __gnu_cxx::hash_map<const char*, Atom*, __gnu_cxx::hash<const char*>, CStringEquals> CStringToAtom; | |
222 | ||
223 | class AtomSyncer : public ld::File::AtomHandler { | |
224 | public: | |
225 | AtomSyncer(std::vector<const char*>& a, std::vector<const ld::Atom*>&na, | |
226 | CStringToAtom la, CStringToAtom dla, const OptimizeOptions& options) : | |
227 | _options(options), _additionalUndefines(a), _newAtoms(na), _llvmAtoms(la), _deadllvmAtoms(dla) { } | |
228 | virtual void doAtom(const class ld::Atom&); | |
229 | virtual void doFile(const class ld::File&) { } | |
230 | ||
231 | const OptimizeOptions& _options; | |
232 | std::vector<const char*>& _additionalUndefines; | |
233 | std::vector<const ld::Atom*>& _newAtoms; | |
234 | CStringToAtom _llvmAtoms; | |
235 | CStringToAtom _deadllvmAtoms; | |
236 | }; | |
237 | ||
238 | static std::vector<File*> _s_files; | |
239 | }; | |
240 | ||
241 | std::vector<File*> Parser::_s_files; | |
242 | ||
243 | ||
244 | bool Parser::validFile(const uint8_t* fileContent, uint64_t fileLength, cpu_type_t architecture, cpu_subtype_t subarch) | |
245 | { | |
246 | switch (architecture) { | |
247 | case CPU_TYPE_I386: | |
248 | return ::lto_module_is_object_file_in_memory_for_target(fileContent, fileLength, "i386-"); | |
249 | case CPU_TYPE_X86_64: | |
250 | return ::lto_module_is_object_file_in_memory_for_target(fileContent, fileLength, "x86_64-"); | |
251 | case CPU_TYPE_ARM: | |
252 | switch ( subarch ) { | |
253 | case CPU_SUBTYPE_ARM_V6: | |
254 | return ::lto_module_is_object_file_in_memory_for_target(fileContent, fileLength, "armv6-"); | |
255 | case CPU_SUBTYPE_ARM_V7: | |
256 | return ::lto_module_is_object_file_in_memory_for_target(fileContent, fileLength, "thumbv7-"); | |
257 | } | |
258 | break; | |
259 | case CPU_TYPE_POWERPC: | |
260 | return ::lto_module_is_object_file_in_memory_for_target(fileContent, fileLength, "powerpc-"); | |
261 | } | |
262 | return false; | |
263 | } | |
264 | ||
265 | const char* Parser::fileKind(const uint8_t* p, uint64_t fileLength) | |
266 | { | |
267 | if ( (p[0] == 0xDE) && (p[1] == 0xC0) && (p[2] == 0x17) && (p[3] == 0x0B) ) { | |
268 | uint32_t arch = LittleEndian::get32(*((uint32_t*)(&p[16]))); | |
269 | switch (arch) { | |
270 | case CPU_TYPE_POWERPC: | |
271 | return "ppc"; | |
272 | case CPU_TYPE_I386: | |
273 | return "i386"; | |
274 | case CPU_TYPE_X86_64: | |
275 | return "x86_64"; | |
276 | case CPU_TYPE_ARM: | |
277 | if ( ::lto_module_is_object_file_in_memory_for_target(p, fileLength, "armv6-") ) | |
278 | return "armv6"; | |
279 | if ( ::lto_module_is_object_file_in_memory_for_target(p, fileLength, "thumbv7-") ) | |
280 | return "armv7"; | |
281 | return "arm"; | |
282 | } | |
283 | return "unknown bitcode architecture"; | |
284 | } | |
285 | return NULL; | |
286 | } | |
287 | ||
288 | File* Parser::parse(const uint8_t* fileContent, uint64_t fileLength, const char* path, time_t modTime, | |
289 | uint32_t ordinal, cpu_type_t architecture, cpu_subtype_t subarch, bool logAllFiles) | |
290 | { | |
291 | File* f = new File(path, modTime, fileContent, fileLength, ordinal, architecture); | |
292 | _s_files.push_back(f); | |
293 | if ( logAllFiles ) | |
294 | printf("%s\n", path); | |
295 | return f; | |
296 | } | |
297 | ||
298 | ||
299 | ld::relocatable::File* Parser::parseMachOFile(const uint8_t* p, size_t len, uint32_t nextInputOrdinal, const OptimizeOptions& options) | |
300 | { | |
301 | mach_o::relocatable::ParserOptions objOpts; | |
302 | objOpts.architecture = options.arch; | |
303 | objOpts.objSubtypeMustMatch = false; | |
304 | objOpts.logAllFiles = false; | |
305 | objOpts.convertUnwindInfo = true; | |
306 | objOpts.subType = 0; | |
307 | ||
308 | // mach-o parsing is done in-memory, but need path for debug notes | |
309 | const char* path = "/tmp/lto.o"; | |
310 | time_t modTime = 0; | |
311 | if ( options.tmpObjectFilePath != NULL ) { | |
312 | path = options.tmpObjectFilePath; | |
313 | struct stat statBuffer; | |
314 | if ( stat(options.tmpObjectFilePath, &statBuffer) == 0 ) | |
315 | modTime = statBuffer.st_mtime; | |
316 | } | |
317 | ||
318 | ld::relocatable::File* result = mach_o::relocatable::parse(p, len, path, modTime, nextInputOrdinal, objOpts); | |
319 | if ( result != NULL ) | |
320 | return result; | |
321 | throw "LLVM LTO, file is not of required architecture"; | |
322 | } | |
323 | ||
324 | ||
325 | ||
326 | File::File(const char* pth, time_t mTime, const uint8_t* content, uint32_t contentLength, uint32_t ord, cpu_type_t arch) | |
327 | : ld::relocatable::File(pth,mTime,ord), _architecture(arch), _internalAtom(*this), | |
328 | _atomArray(NULL), _atomArrayCount(0), _module(NULL), _debugInfoPath(pth), | |
329 | _section("__TEXT_", "__tmp_lto", ld::Section::typeTempLTO), | |
330 | _fixupToInternal(0, ld::Fixup::k1of1, ld::Fixup::kindNone, &_internalAtom), | |
331 | _debugInfo(ld::relocatable::File::kDebugInfoNone), _cpuSubType(0) | |
332 | { | |
333 | const bool log = false; | |
334 | ||
335 | // create llvm module | |
336 | _module = ::lto_module_create_from_memory(content, contentLength); | |
337 | if ( _module == NULL ) | |
338 | throwf("could not parse object file %s: %s", pth, lto_get_error_message()); | |
339 | ||
340 | if ( log ) fprintf(stderr, "bitcode file: %s\n", pth); | |
341 | ||
342 | // create atom for each global symbol in module | |
343 | uint32_t count = ::lto_module_get_num_symbols(_module); | |
344 | _atomArray = (Atom*)malloc(sizeof(Atom)*count); | |
345 | for (uint32_t i=0; i < count; ++i) { | |
346 | const char* name = ::lto_module_get_symbol_name(_module, i); | |
347 | lto_symbol_attributes attr = lto_module_get_symbol_attribute(_module, i); | |
348 | ||
349 | // <rdar://problem/6378110> LTO doesn't like dtrace symbols | |
350 | // ignore dtrace static probes for now | |
351 | // later when codegen is done and a mach-o file is produces the probes will be processed | |
352 | if ( (strncmp(name, "___dtrace_probe$", 16) == 0) || (strncmp(name, "___dtrace_isenabled$", 20) == 0) ) | |
353 | continue; | |
354 | ||
355 | ld::Atom::Definition def; | |
356 | ld::Atom::Combine combine = ld::Atom::combineNever; | |
357 | switch ( attr & LTO_SYMBOL_DEFINITION_MASK ) { | |
358 | case LTO_SYMBOL_DEFINITION_REGULAR: | |
359 | def = ld::Atom::definitionRegular; | |
360 | break; | |
361 | case LTO_SYMBOL_DEFINITION_TENTATIVE: | |
362 | def = ld::Atom::definitionTentative; | |
363 | break; | |
364 | case LTO_SYMBOL_DEFINITION_WEAK: | |
365 | def = ld::Atom::definitionRegular; | |
366 | combine = ld::Atom::combineByName; | |
367 | break; | |
368 | case LTO_SYMBOL_DEFINITION_UNDEFINED: | |
369 | case LTO_SYMBOL_DEFINITION_WEAKUNDEF: | |
370 | def = ld::Atom::definitionProxy; | |
371 | break; | |
372 | default: | |
373 | throwf("unknown definition kind for symbol %s in bitcode file %s", name, pth); | |
374 | } | |
375 | ||
376 | // make LLVM atoms for definitions and a reference for undefines | |
377 | if ( def != ld::Atom::definitionProxy ) { | |
378 | ld::Atom::Scope scope; | |
379 | switch ( attr & LTO_SYMBOL_SCOPE_MASK) { | |
380 | case LTO_SYMBOL_SCOPE_INTERNAL: | |
381 | scope = ld::Atom::scopeTranslationUnit; | |
382 | break; | |
383 | case LTO_SYMBOL_SCOPE_HIDDEN: | |
384 | scope = ld::Atom::scopeLinkageUnit; | |
385 | break; | |
386 | case LTO_SYMBOL_SCOPE_DEFAULT: | |
387 | scope = ld::Atom::scopeGlobal; | |
388 | break; | |
389 | default: | |
390 | throwf("unknown scope for symbol %s in bitcode file %s", name, pth); | |
391 | } | |
392 | // only make atoms for non-internal symbols | |
393 | if ( scope == ld::Atom::scopeTranslationUnit ) | |
394 | continue; | |
395 | uint8_t alignment = (attr & LTO_SYMBOL_ALIGNMENT_MASK); | |
396 | // make Atom using placement new operator | |
397 | new (&_atomArray[_atomArrayCount++]) Atom(*this, name, scope, def, combine, alignment); | |
398 | if ( scope == ld::Atom::scopeLinkageUnit ) | |
399 | _internalAtom.addReference(name); | |
400 | if ( log ) fprintf(stderr, "\t0x%08X %s\n", attr, name); | |
401 | } | |
402 | else { | |
403 | // add to list of external references | |
404 | _internalAtom.addReference(name); | |
405 | if ( log ) fprintf(stderr, "\t%s (undefined)\n", name); | |
406 | } | |
407 | } | |
408 | } | |
409 | ||
410 | File::~File() | |
411 | { | |
412 | if ( _module != NULL ) | |
413 | ::lto_module_dispose(_module); | |
414 | } | |
415 | ||
416 | bool File::forEachAtom(ld::File::AtomHandler& handler) const | |
417 | { | |
418 | handler.doAtom(_internalAtom); | |
419 | for(uint32_t i=0; i < _atomArrayCount; ++i) { | |
420 | handler.doAtom(_atomArray[i]); | |
421 | } | |
422 | return true; | |
423 | } | |
424 | ||
425 | InternalAtom::InternalAtom(File& f) | |
426 | : ld::Atom(f._section, ld::Atom::definitionRegular, ld::Atom::combineNever, ld::Atom::scopeTranslationUnit, | |
427 | ld::Atom::typeLTOtemporary, ld::Atom::symbolTableNotIn, true, false, false, ld::Atom::Alignment(0)), | |
428 | _file(f) | |
429 | { | |
430 | } | |
431 | ||
432 | Atom::Atom(File& f, const char* nm, ld::Atom::Scope s, ld::Atom::Definition d, ld::Atom::Combine c, ld::Atom::Alignment a) | |
433 | : ld::Atom(f._section, d, c, s, ld::Atom::typeLTOtemporary, | |
434 | ld::Atom::symbolTableIn, false, false, false, a), | |
435 | _file(f), _name(nm), _compiledAtom(NULL) | |
436 | { | |
437 | } | |
438 | ||
439 | void Atom::setCompiledAtom(const ld::Atom& atom) | |
440 | { | |
441 | // set delegate so virtual methods go to it | |
442 | _compiledAtom = &atom; | |
443 | ||
444 | //fprintf(stderr, "setting lto atom %p to delegate to mach-o atom %p (%s)\n", this, &atom, atom.name()); | |
445 | ||
446 | // update fields in ld::Atom to match newly constructed mach-o atom | |
447 | (const_cast<Atom*>(this))->setAttributesFromAtom(atom); | |
448 | } | |
449 | ||
450 | ||
451 | ||
452 | bool Parser::optimize( const std::vector<const ld::Atom*>& allAtoms, | |
453 | ld::Internal& state, | |
454 | uint32_t nextInputOrdinal, | |
455 | const OptimizeOptions& options, | |
456 | ld::File::AtomHandler& handler, | |
457 | std::vector<const ld::Atom*>& newAtoms, | |
458 | std::vector<const char*>& additionalUndefines) | |
459 | { | |
460 | const bool logMustPreserve = false; | |
461 | const bool logExtraOptions = false; | |
462 | const bool logBitcodeFiles = false; | |
463 | const bool logAtomsBeforeSync = false; | |
464 | ||
465 | // exit quickly if nothing to do | |
466 | if ( _s_files.size() == 0 ) | |
467 | return false; | |
468 | ||
469 | // print out LTO version string if -v was used | |
470 | if ( options.verbose ) | |
471 | fprintf(stderr, "%s\n", lto_get_version()); | |
472 | ||
473 | // create optimizer and add each Reader | |
474 | lto_code_gen_t generator = ::lto_codegen_create(); | |
475 | for (std::vector<File*>::iterator it=_s_files.begin(); it != _s_files.end(); ++it) { | |
476 | if ( logBitcodeFiles ) fprintf(stderr, "lto_codegen_add_module(%s)\n", (*it)->path()); | |
477 | if ( ::lto_codegen_add_module(generator, (*it)->module()) ) | |
478 | throwf("lto: could not merge in %s because %s", (*it)->path(), ::lto_get_error_message()); | |
479 | } | |
480 | ||
481 | // add any -mllvm command line options | |
482 | for (std::vector<const char*>::const_iterator it=options.llvmOptions->begin(); it != options.llvmOptions->end(); ++it) { | |
483 | if ( logExtraOptions ) fprintf(stderr, "passing option to llvm: %s\n", *it); | |
484 | ::lto_codegen_debug_options(generator, *it); | |
485 | } | |
486 | ||
487 | // The atom graph uses directed edges (references). Collect all references where | |
488 | // originating atom is not part of any LTO Reader. This allows optimizer to optimize an | |
489 | // external (i.e. not originated from same .o file) reference if all originating atoms are also | |
490 | // defined in llvm bitcode file. | |
491 | CStringSet nonLLVMRefs; | |
492 | CStringToAtom llvmAtoms; | |
493 | bool hasNonllvmAtoms = false; | |
494 | for (std::vector<const ld::Atom*>::const_iterator it = allAtoms.begin(); it != allAtoms.end(); ++it) { | |
495 | const ld::Atom* atom = *it; | |
496 | // only look at references that come from an atom that is not an llvm atom | |
497 | if ( atom->contentType() != ld::Atom::typeLTOtemporary ) { | |
498 | if ( (atom->section().type() != ld::Section::typeMachHeader) && (atom->definition() != ld::Atom::definitionProxy) ) { | |
499 | hasNonllvmAtoms = true; | |
500 | } | |
501 | const ld::Atom* target; | |
502 | for (ld::Fixup::iterator fit=atom->fixupsBegin(); fit != atom->fixupsEnd(); ++fit) { | |
503 | switch ( fit->binding ) { | |
504 | case ld::Fixup::bindingDirectlyBound: | |
505 | // that reference an llvm atom | |
506 | if ( fit->u.target->contentType() == ld::Atom::typeLTOtemporary ) | |
507 | nonLLVMRefs.insert(fit->u.target->name()); | |
508 | break; | |
509 | case ld::Fixup::bindingsIndirectlyBound: | |
510 | target = state.indirectBindingTable[fit->u.bindingIndex]; | |
511 | if ( target == NULL ) | |
512 | throwf("'%s' in %s contains undefined reference", atom->name(), atom->file()->path()); | |
513 | assert(target != NULL); | |
514 | if ( target->contentType() == ld::Atom::typeLTOtemporary ) | |
515 | nonLLVMRefs.insert(target->name()); | |
516 | default: | |
517 | break; | |
518 | } | |
519 | } | |
520 | } | |
521 | else { | |
522 | llvmAtoms[atom->name()] = (Atom*)atom; | |
523 | } | |
524 | } | |
525 | // if entry point is in a llvm bitcode file, it must be preserved by LTO | |
526 | if ( state.entryPoint!= NULL ) { | |
527 | if ( state.entryPoint->contentType() == ld::Atom::typeLTOtemporary ) | |
528 | nonLLVMRefs.insert(state.entryPoint->name()); | |
529 | } | |
530 | ||
531 | // deadAtoms are the atoms that the linker coalesced. For instance weak or tentative definitions | |
532 | // overriden by another atom. If any of these deadAtoms are llvm atoms and they were replaced | |
533 | // with a mach-o atom, we need to tell the lto engine to preserve (not optimize away) its dead | |
534 | // atom so that the linker can replace it with the mach-o one later. | |
535 | CStringToAtom deadllvmAtoms; | |
536 | for (std::vector<const ld::Atom*>::const_iterator it = allAtoms.begin(); it != allAtoms.end(); ++it) { | |
537 | const ld::Atom* atom = *it; | |
538 | if ( atom->coalescedAway() && (atom->contentType() == ld::Atom::typeLTOtemporary) ) { | |
539 | const char* name = atom->name(); | |
540 | if ( logMustPreserve ) fprintf(stderr, "lto_codegen_add_must_preserve_symbol(%s) because linker coalesce away and replace with a mach-o atom\n", name); | |
541 | ::lto_codegen_add_must_preserve_symbol(generator, name); | |
542 | deadllvmAtoms[name] = (Atom*)atom; | |
543 | } | |
544 | } | |
545 | for (std::vector<File*>::iterator it=_s_files.begin(); it != _s_files.end(); ++it) { | |
546 | File* file = *it; | |
547 | for(uint32_t i=0; i < file->_atomArrayCount; ++i) { | |
548 | Atom* llvmAtom = &file->_atomArray[i]; | |
549 | if ( llvmAtom->coalescedAway() ) { | |
550 | const char* name = llvmAtom->name(); | |
551 | if ( deadllvmAtoms.find(name) == deadllvmAtoms.end() ) { | |
552 | if ( logMustPreserve ) | |
553 | fprintf(stderr, "lto_codegen_add_must_preserve_symbol(%s) because linker coalesce away and replace with a mach-o atom\n", name); | |
554 | ::lto_codegen_add_must_preserve_symbol(generator, name); | |
555 | deadllvmAtoms[name] = (Atom*)llvmAtom; | |
556 | } | |
557 | } | |
558 | else if ( options.linkerDeadStripping && !llvmAtom->live() ) { | |
559 | const char* name = llvmAtom->name(); | |
560 | deadllvmAtoms[name] = (Atom*)llvmAtom; | |
561 | } | |
562 | } | |
563 | } | |
564 | ||
565 | // tell code generator about symbols that must be preserved | |
566 | for (CStringToAtom::iterator it = llvmAtoms.begin(); it != llvmAtoms.end(); ++it) { | |
567 | const char* name = it->first; | |
568 | Atom* atom = it->second; | |
569 | // Include llvm Symbol in export list if it meets one of following two conditions | |
570 | // 1 - atom scope is global (and not linkage unit). | |
571 | // 2 - included in nonLLVMRefs set. | |
572 | // If a symbol is not listed in exportList then LTO is free to optimize it away. | |
573 | if ( (atom->scope() == ld::Atom::scopeGlobal) ) { | |
574 | if ( logMustPreserve ) fprintf(stderr, "lto_codegen_add_must_preserve_symbol(%s) because global symbol\n", name); | |
575 | ::lto_codegen_add_must_preserve_symbol(generator, name); | |
576 | } | |
577 | else if ( nonLLVMRefs.find(name) != nonLLVMRefs.end() ) { | |
578 | if ( logMustPreserve ) fprintf(stderr, "lto_codegen_add_must_preserve_symbol(%s) because referenced by a mach-o atom\n", name); | |
579 | ::lto_codegen_add_must_preserve_symbol(generator, name); | |
580 | } | |
581 | } | |
582 | ||
583 | // special case running ld -r on all bitcode files to produce another bitcode file (instead of mach-o) | |
584 | if ( options.relocatable && !hasNonllvmAtoms ) { | |
585 | if ( ! ::lto_codegen_write_merged_modules(generator, options.outputFilePath) ) { | |
586 | // HACK, no good way to tell linker we are all done, so just quit | |
587 | exit(0); | |
588 | } | |
589 | warning("could not produce merged bitcode file"); | |
590 | } | |
591 | ||
592 | // set code-gen model | |
593 | lto_codegen_model model = LTO_CODEGEN_PIC_MODEL_DYNAMIC; | |
594 | if ( options.mainExecutable ) { | |
595 | if ( options.staticExecutable ) { | |
596 | // darwin x86_64 "static" code model is really dynamic code model | |
597 | if ( options.arch == CPU_TYPE_X86_64 ) | |
598 | model = LTO_CODEGEN_PIC_MODEL_DYNAMIC; | |
599 | else | |
600 | model = LTO_CODEGEN_PIC_MODEL_STATIC; | |
601 | } | |
602 | else { | |
603 | if ( options.pie ) | |
604 | model = LTO_CODEGEN_PIC_MODEL_DYNAMIC; | |
605 | else | |
606 | model = LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC; | |
607 | } | |
608 | } | |
609 | else { | |
610 | if ( options.allowTextRelocs ) | |
611 | model = LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC; | |
612 | else | |
613 | model = LTO_CODEGEN_PIC_MODEL_DYNAMIC; | |
614 | } | |
615 | if ( ::lto_codegen_set_pic_model(generator, model) ) | |
616 | throwf("could not create set codegen model: %s", lto_get_error_message()); | |
617 | ||
618 | // if requested, save off merged bitcode file | |
619 | if ( options.saveTemps ) { | |
620 | char tempBitcodePath[MAXPATHLEN]; | |
621 | strcpy(tempBitcodePath, options.outputFilePath); | |
622 | strcat(tempBitcodePath, ".lto.bc"); | |
623 | ::lto_codegen_write_merged_modules(generator, tempBitcodePath); | |
624 | } | |
625 | ||
626 | #if LTO_API_VERSION >= 3 | |
627 | // find assembler next to linker | |
628 | char path[PATH_MAX]; | |
629 | uint32_t bufSize = PATH_MAX; | |
630 | if ( _NSGetExecutablePath(path, &bufSize) != -1 ) { | |
631 | char* lastSlash = strrchr(path, '/'); | |
632 | if ( lastSlash != NULL ) { | |
633 | strcpy(lastSlash+1, "as"); | |
634 | struct stat statInfo; | |
635 | if ( stat(path, &statInfo) == 0 ) | |
636 | ::lto_codegen_set_assembler_path(generator, path); | |
637 | } | |
638 | } | |
639 | #endif | |
640 | // run code generator | |
641 | size_t machOFileLen; | |
642 | const uint8_t* machOFile = (uint8_t*)::lto_codegen_compile(generator, &machOFileLen); | |
643 | if ( machOFile == NULL ) | |
644 | throwf("could not do LTO codegen: %s", ::lto_get_error_message()); | |
645 | ||
646 | // if requested, save off temp mach-o file | |
647 | if ( options.saveTemps ) { | |
648 | char tempMachoPath[MAXPATHLEN]; | |
649 | strcpy(tempMachoPath, options.outputFilePath); | |
650 | strcat(tempMachoPath, ".lto.o"); | |
651 | int fd = ::open(tempMachoPath, O_CREAT | O_WRONLY | O_TRUNC, 0666); | |
652 | if ( fd != -1) { | |
653 | ::write(fd, machOFile, machOFileLen); | |
654 | ::close(fd); | |
655 | } | |
656 | // save off merged bitcode file | |
657 | char tempOptBitcodePath[MAXPATHLEN]; | |
658 | strcpy(tempOptBitcodePath, options.outputFilePath); | |
659 | strcat(tempOptBitcodePath, ".lto.opt.bc"); | |
660 | ::lto_codegen_write_merged_modules(generator, tempOptBitcodePath); | |
661 | } | |
662 | ||
663 | // if needed, save temp mach-o file to specific location | |
664 | if ( options.tmpObjectFilePath != NULL ) { | |
665 | int fd = ::open(options.tmpObjectFilePath, O_CREAT | O_WRONLY | O_TRUNC, 0666); | |
666 | if ( fd != -1) { | |
667 | ::write(fd, machOFile, machOFileLen); | |
668 | ::close(fd); | |
669 | } | |
670 | else { | |
671 | warning("could not write LTO temp file '%s', errno=%d", options.tmpObjectFilePath, errno); | |
672 | } | |
673 | } | |
674 | ||
675 | // parse generated mach-o file into a MachOReader | |
676 | ld::relocatable::File* machoFile = parseMachOFile(machOFile, machOFileLen, nextInputOrdinal, options); | |
677 | ||
678 | // sync generated mach-o atoms with existing atoms ld knows about | |
679 | if ( logAtomsBeforeSync ) { | |
680 | fprintf(stderr, "llvmAtoms:\n"); | |
681 | for (CStringToAtom::iterator it = llvmAtoms.begin(); it != llvmAtoms.end(); ++it) { | |
682 | const char* name = it->first; | |
683 | //Atom* atom = it->second; | |
684 | fprintf(stderr, "\t%s\n", name); | |
685 | } | |
686 | fprintf(stderr, "deadllvmAtoms:\n"); | |
687 | for (CStringToAtom::iterator it = deadllvmAtoms.begin(); it != deadllvmAtoms.end(); ++it) { | |
688 | const char* name = it->first; | |
689 | //Atom* atom = it->second; | |
690 | fprintf(stderr, "\t%s\n", name); | |
691 | } | |
692 | } | |
693 | AtomSyncer syncer(additionalUndefines, newAtoms, llvmAtoms, deadllvmAtoms, options); | |
694 | machoFile->forEachAtom(syncer); | |
695 | ||
696 | // Remove InternalAtoms from ld | |
697 | for (std::vector<File*>::iterator it=_s_files.begin(); it != _s_files.end(); ++it) { | |
698 | (*it)->internalAtom().setCoalescedAway(); | |
699 | } | |
700 | // Remove Atoms from ld if code generator optimized them away | |
701 | for (CStringToAtom::iterator li = llvmAtoms.begin(), le = llvmAtoms.end(); li != le; ++li) { | |
702 | // check if setRealAtom() called on this Atom | |
703 | if ( li->second->compiledAtom() == NULL ) { | |
704 | //fprintf(stderr, "llvm optimized away %p %s\n", li->second, li->second->name()); | |
705 | li->second->setCoalescedAway(); | |
706 | } | |
707 | } | |
708 | ||
709 | // notify about file level attributes | |
710 | handler.doFile(*machoFile); | |
711 | ||
712 | // if final mach-o file has debug info, update original bitcode files to match | |
713 | for (std::vector<File*>::iterator it=_s_files.begin(); it != _s_files.end(); ++it) { | |
714 | (*it)->setDebugInfo(machoFile->debugInfo(), machoFile->path(), | |
715 | machoFile->modificationTime(), machoFile->cpuSubType()); | |
716 | } | |
717 | ||
718 | return true; | |
719 | } | |
720 | ||
721 | ||
722 | void Parser::AtomSyncer::doAtom(const ld::Atom& machoAtom) | |
723 | { | |
724 | // update proxy atoms to point to real atoms and find new atoms | |
725 | const char* name = machoAtom.name(); | |
726 | if ( machoAtom.scope() >= ld::Atom::scopeLinkageUnit ) { | |
727 | CStringToAtom::iterator pos = _llvmAtoms.find(name); | |
728 | if ( pos != _llvmAtoms.end() ) { | |
729 | // turn Atom into a proxy for this mach-o atom | |
730 | pos->second->setCompiledAtom(machoAtom); | |
731 | } | |
732 | else { | |
733 | // an atom of this name was not in the allAtoms list the linker gave us | |
734 | if ( _deadllvmAtoms.find(name) != _deadllvmAtoms.end() ) { | |
735 | // this corresponding to an atom that the linker coalesced away or marked not-live | |
736 | if ( _options.linkerDeadStripping ) { | |
737 | // llvm seems to want this atom and -dead_strip is enabled, so it will be deleted if not needed, so add back | |
738 | Atom* llvmAtom = _deadllvmAtoms[name]; | |
739 | llvmAtom->setCompiledAtom(machoAtom); | |
740 | _newAtoms.push_back(&machoAtom); | |
741 | } | |
742 | else { | |
743 | // Don't pass it back as a new atom | |
744 | } | |
745 | } | |
746 | else | |
747 | { | |
748 | // this is something new that lto conjured up, tell ld its new | |
749 | _newAtoms.push_back(&machoAtom); | |
750 | } | |
751 | } | |
752 | } | |
753 | else { | |
754 | // ld only knew about non-static atoms, so this one must be new | |
755 | _newAtoms.push_back(&machoAtom); | |
756 | } | |
757 | ||
758 | // adjust fixups to go through proxy atoms | |
759 | //fprintf(stderr, "adjusting fixups in atom: %s\n", machoAtom.name()); | |
760 | for (ld::Fixup::iterator fit=machoAtom.fixupsBegin(); fit != machoAtom.fixupsEnd(); ++fit) { | |
761 | switch ( fit->binding ) { | |
762 | case ld::Fixup::bindingNone: | |
763 | break; | |
764 | case ld::Fixup::bindingByNameUnbound: | |
765 | // don't know if this target has been seen by linker before or if it is new | |
766 | // be conservative and tell linker it is new | |
767 | _additionalUndefines.push_back(fit->u.name); | |
768 | //fprintf(stderr, " by name ref to: %s\n", fit->u.name); | |
769 | break; | |
770 | case ld::Fixup::bindingDirectlyBound: | |
771 | // If mach-o atom is referencing another mach-o atom then | |
772 | // reference is not going through Atom proxy. Fix it here to ensure that all | |
773 | // llvm symbol references always go through Atom proxy. | |
774 | if ( fit->u.target->scope() != ld::Atom::scopeTranslationUnit ) { | |
775 | const char* targetName = fit->u.target->name(); | |
776 | CStringToAtom::iterator pos = _llvmAtoms.find(targetName); | |
777 | if ( pos != _llvmAtoms.end() ) { | |
778 | fit->u.target = pos->second; | |
779 | } | |
780 | else { | |
781 | if ( _deadllvmAtoms.find(targetName) != _deadllvmAtoms.end() ) { | |
782 | // target was coalesed away and replace by mach-o atom from a non llvm .o file | |
783 | fit->binding = ld::Fixup::bindingByNameUnbound; | |
784 | fit->u.name = targetName; | |
785 | } | |
786 | } | |
787 | } | |
788 | //fprintf(stderr, " direct ref to: %s (scope=%d)\n", fit->u.target->name(), fit->u.target->scope()); | |
789 | break; | |
790 | case ld::Fixup::bindingByContentBound: | |
791 | //fprintf(stderr, " direct by content to: %s\n", fit->u.target->name()); | |
792 | break; | |
793 | case ld::Fixup::bindingsIndirectlyBound: | |
794 | assert(0 && "indirect binding found in initial mach-o file?"); | |
795 | //fprintf(stderr, " indirect by content to: %u\n", fit->u.bindingIndex); | |
796 | break; | |
797 | } | |
798 | } | |
799 | ||
800 | } | |
801 | ||
802 | ||
803 | // | |
804 | // Used by archive reader to see if member is an llvm bitcode file | |
805 | // | |
806 | bool isObjectFile(const uint8_t* fileContent, uint64_t fileLength, cpu_type_t architecture, cpu_subtype_t subarch) | |
807 | { | |
808 | return Parser::validFile(fileContent, fileLength, architecture, subarch); | |
809 | } | |
810 | ||
811 | ||
812 | // | |
813 | // main function used by linker to instantiate ld::Files | |
814 | // | |
815 | ld::relocatable::File* parse(const uint8_t* fileContent, uint64_t fileLength, | |
816 | const char* path, time_t modTime, uint32_t ordinal, | |
817 | cpu_type_t architecture, cpu_subtype_t subarch, bool logAllFiles) | |
818 | { | |
819 | if ( Parser::validFile(fileContent, fileLength, architecture, subarch) ) | |
820 | return Parser::parse(fileContent, fileLength, path, modTime, ordinal, architecture, subarch, logAllFiles); | |
821 | else | |
822 | return NULL; | |
823 | } | |
824 | ||
825 | // | |
826 | // used by "ld -v" to report version of libLTO.dylib being used | |
827 | // | |
828 | const char* version() | |
829 | { | |
830 | return ::lto_get_version(); | |
831 | } | |
832 | ||
833 | ||
834 | // | |
835 | // used by ld for error reporting | |
836 | // | |
837 | bool libLTOisLoaded() | |
838 | { | |
839 | return (::lto_get_version() != NULL); | |
840 | } | |
841 | ||
842 | // | |
843 | // used by ld for error reporting | |
844 | // | |
845 | const char* archName(const uint8_t* fileContent, uint64_t fileLength) | |
846 | { | |
847 | return Parser::fileKind(fileContent, fileLength); | |
848 | } | |
849 | ||
850 | // | |
851 | // used by ld for doing link time optimization | |
852 | // | |
853 | bool optimize( const std::vector<const ld::Atom*>& allAtoms, | |
854 | ld::Internal& state, | |
855 | uint32_t nextInputOrdinal, | |
856 | const OptimizeOptions& options, | |
857 | ld::File::AtomHandler& handler, | |
858 | std::vector<const ld::Atom*>& newAtoms, | |
859 | std::vector<const char*>& additionalUndefines) | |
860 | { | |
861 | return Parser::optimize(allAtoms, state, nextInputOrdinal, options, handler, newAtoms, additionalUndefines); | |
862 | } | |
863 | ||
864 | ||
865 | ||
866 | }; // namespace lto | |
867 | ||
868 | ||
869 | #endif | |
870 |