]> git.saurik.com Git - apple/ld64.git/blob - src/abstraction/MachOTrie.hpp
ld64-123.2.tar.gz
[apple/ld64.git] / src / abstraction / MachOTrie.hpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24 #ifndef __MACH_O_TRIE__
25 #define __MACH_O_TRIE__
26
27 #include <algorithm>
28 #include <assert.h>
29
30 #include "MachOFileAbstraction.hpp"
31
32
33 namespace mach_o {
34 namespace trie {
35
36 struct Edge
37 {
38 Edge(const char* s, struct Node* n) : fSubString(s), fChild(n) { }
39 ~Edge() { }
40 const char* fSubString;
41 struct Node* fChild;
42
43 };
44
45 struct Node
46 {
47 Node(const char* s) : fCummulativeString(s), fAddress(0), fFlags(0),
48 fOther(0), fImportedName(NULL), fOrdered(false),
49 fHaveExportInfo(false), fTrieOffset(0) {}
50 ~Node() { }
51 const char* fCummulativeString;
52 std::vector<Edge> fChildren;
53 uint64_t fAddress;
54 uint64_t fFlags;
55 uint64_t fOther;
56 const char* fImportedName;
57 bool fOrdered;
58 bool fHaveExportInfo;
59 uint32_t fTrieOffset;
60
61 void addSymbol(const char* fullStr, uint64_t address, uint64_t flags, uint64_t other, const char* importName) {
62 const char* partialStr = &fullStr[strlen(fCummulativeString)];
63 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
64 Edge& e = *it;
65 int subStringLen = strlen(e.fSubString);
66 if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
67 // already have matching edge, go down that path
68 e.fChild->addSymbol(fullStr, address, flags, other, importName);
69 return;
70 }
71 else {
72 for (int i=subStringLen-1; i > 0; --i) {
73 if ( strncmp(e.fSubString, partialStr, i) == 0 ) {
74 // found a common substring, splice in new node
75 // was A -> C, now A -> B -> C
76 char* bNodeCummStr = strdup(e.fChild->fCummulativeString);
77 bNodeCummStr[strlen(bNodeCummStr)+i-subStringLen] = '\0';
78 //node* aNode = this;
79 Node* bNode = new Node(bNodeCummStr);
80 Node* cNode = e.fChild;
81 char* abEdgeStr = strdup(e.fSubString);
82 abEdgeStr[i] = '\0';
83 char* bcEdgeStr = strdup(&e.fSubString[i]);
84 Edge& abEdge = e;
85 abEdge.fSubString = abEdgeStr;
86 abEdge.fChild = bNode;
87 Edge bcEdge(bcEdgeStr, cNode);
88 bNode->fChildren.push_back(bcEdge);
89 bNode->addSymbol(fullStr, address, flags, other, importName);
90 return;
91 }
92 }
93 }
94 }
95 if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
96 assert(importName != NULL);
97 assert(other != 0);
98 }
99 if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
100 assert(other != 0);
101 }
102 // no commonality with any existing child, make a new edge that is this whole string
103 Node* newNode = new Node(strdup(fullStr));
104 Edge newEdge(strdup(partialStr), newNode);
105 fChildren.push_back(newEdge);
106 newNode->fAddress = address;
107 newNode->fFlags = flags;
108 newNode->fOther = other;
109 if ( (flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && (importName != NULL) && (strcmp(fullStr,importName) != 0) )
110 newNode->fImportedName = importName;
111 else
112 newNode->fImportedName = NULL;
113 newNode->fHaveExportInfo = true;
114 }
115
116 void addOrderedNodes(const char* name, std::vector<Node*>& orderedNodes) {
117 if ( !fOrdered ) {
118 orderedNodes.push_back(this);
119 //fprintf(stderr, "ordered %p %s\n", this, fCummulativeString);
120 fOrdered = true;
121 }
122 const char* partialStr = &name[strlen(fCummulativeString)];
123 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
124 Edge& e = *it;
125 int subStringLen = strlen(e.fSubString);
126 if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
127 // already have matching edge, go down that path
128 e.fChild->addOrderedNodes(name, orderedNodes);
129 return;
130 }
131 }
132 }
133
134 // byte for terminal node size in bytes, or 0x00 if not terminal node
135 // teminal node (uleb128 flags, uleb128 addr [uleb128 other])
136 // byte for child node count
137 // each child: zero terminated substring, uleb128 node offset
138 bool updateOffset(uint32_t& offset) {
139 uint32_t nodeSize = 1; // length of export info when no export info
140 if ( fHaveExportInfo ) {
141 if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
142 nodeSize = uleb128_size(fFlags) + uleb128_size(fOther); // ordinal
143 if ( fImportedName != NULL )
144 nodeSize += strlen(fImportedName);
145 ++nodeSize; // trailing zero in imported name
146 }
147 else {
148 nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress);
149 if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
150 nodeSize += uleb128_size(fOther);
151 }
152 // do have export info, overall node size so far is uleb128 of export info + export info
153 nodeSize += uleb128_size(nodeSize);
154 }
155 // add children
156 ++nodeSize; // byte for count of chidren
157 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
158 Edge& e = *it;
159 nodeSize += strlen(e.fSubString) + 1 + uleb128_size(e.fChild->fTrieOffset);
160 }
161 bool result = (fTrieOffset != offset);
162 fTrieOffset = offset;
163 //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString);
164 offset += nodeSize;
165 // return true if fTrieOffset was changed
166 return result;
167 }
168
169 void appendToStream(std::vector<uint8_t>& out) {
170 if ( fHaveExportInfo ) {
171 if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
172 if ( fImportedName != NULL ) {
173 // nodes with re-export info: size, flags, ordinal, string
174 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + strlen(fImportedName) + 1;
175 out.push_back(nodeSize);
176 append_uleb128(fFlags, out);
177 append_uleb128(fOther, out);
178 append_string(fImportedName, out);
179 }
180 else {
181 // nodes with re-export info: size, flags, ordinal, empty-string
182 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + 1;
183 out.push_back(nodeSize);
184 append_uleb128(fFlags, out);
185 append_uleb128(fOther, out);
186 out.push_back(0);
187 }
188 }
189 else if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
190 // nodes with export info: size, flags, address, other
191 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress) + uleb128_size(fOther);
192 out.push_back(nodeSize);
193 append_uleb128(fFlags, out);
194 append_uleb128(fAddress, out);
195 append_uleb128(fOther, out);
196 }
197 else {
198 // nodes with export info: size, flags, address
199 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress);
200 out.push_back(nodeSize);
201 append_uleb128(fFlags, out);
202 append_uleb128(fAddress, out);
203 }
204 }
205 else {
206 // no export info uleb128 of zero is one byte of zero
207 out.push_back(0);
208 }
209 // write number of children
210 out.push_back(fChildren.size());
211 // write each child
212 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
213 Edge& e = *it;
214 append_string(e.fSubString, out);
215 append_uleb128(e.fChild->fTrieOffset, out);
216 }
217 }
218
219 private:
220 static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) {
221 uint8_t byte;
222 do {
223 byte = value & 0x7F;
224 value &= ~0x7F;
225 if ( value != 0 )
226 byte |= 0x80;
227 out.push_back(byte);
228 value = value >> 7;
229 } while( byte >= 0x80 );
230 }
231
232 static void append_string(const char* str, std::vector<uint8_t>& out) {
233 for (const char* s = str; *s != '\0'; ++s)
234 out.push_back(*s);
235 out.push_back('\0');
236 }
237
238 static unsigned int uleb128_size(uint64_t value) {
239 uint32_t result = 0;
240 do {
241 value = value >> 7;
242 ++result;
243 } while ( value != 0 );
244 return result;
245 }
246
247
248 };
249
250 inline uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end) {
251 uint64_t result = 0;
252 int bit = 0;
253 do {
254 if (p == end)
255 throw "malformed uleb128 extends beyond trie";
256
257 uint64_t slice = *p & 0x7f;
258
259 if (bit >= 64 || slice << bit >> bit != slice)
260 throw "uleb128 too big for 64-bits";
261 else {
262 result |= (slice << bit);
263 bit += 7;
264 }
265 }
266 while (*p++ & 0x80);
267 return result;
268 }
269
270
271
272 struct Entry
273 {
274 const char* name;
275 uint64_t address;
276 uint64_t flags;
277 uint64_t other;
278 const char* importName;
279 };
280
281
282
283 inline void makeTrie(const std::vector<Entry>& entries, std::vector<uint8_t>& output)
284 {
285 Node start(strdup(""));
286
287 // make nodes for all exported symbols
288 for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
289 start.addSymbol(it->name, it->address, it->flags, it->other, it->importName);
290 }
291
292 // create vector of nodes
293 std::vector<Node*> orderedNodes;
294 orderedNodes.reserve(entries.size()*2);
295 for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
296 start.addOrderedNodes(it->name, orderedNodes);
297 }
298
299 // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized
300 bool more;
301 do {
302 uint32_t offset = 0;
303 more = false;
304 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
305 if ( (*it)->updateOffset(offset) )
306 more = true;
307 }
308 } while ( more );
309
310 // create trie stream
311 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
312 (*it)->appendToStream(output);
313 }
314 }
315
316 struct EntryWithOffset
317 {
318 uintptr_t nodeOffset;
319 Entry entry;
320
321 bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); }
322 };
323
324
325
326 static inline void processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
327 char* cummulativeString, int curStrOffset,
328 std::vector<EntryWithOffset>& output)
329 {
330 if ( p >= end )
331 throw "malformed trie, node past end";
332 const uint8_t terminalSize = read_uleb128(p, end);
333 const uint8_t* children = p + terminalSize;
334 if ( terminalSize != 0 ) {
335 EntryWithOffset e;
336 e.nodeOffset = p-start;
337 e.entry.name = strdup(cummulativeString);
338 e.entry.flags = read_uleb128(p, end);
339 if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
340 e.entry.address = 0;
341 e.entry.other = read_uleb128(p, end); // dylib ordinal
342 e.entry.importName = (char*)p;
343 }
344 else {
345 e.entry.address = read_uleb128(p, end);
346 if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
347 e.entry.other = read_uleb128(p, end);
348 else
349 e.entry.other = 0;
350 e.entry.importName = NULL;
351 }
352 output.push_back(e);
353 }
354 const uint8_t childrenCount = *children++;
355 const uint8_t* s = children;
356 for (uint8_t i=0; i < childrenCount; ++i) {
357 int edgeStrLen = 0;
358 while (*s != '\0') {
359 cummulativeString[curStrOffset+edgeStrLen] = *s++;
360 ++edgeStrLen;
361 }
362 cummulativeString[curStrOffset+edgeStrLen] = *s++;
363 uint32_t childNodeOffet = read_uleb128(s, end);
364 processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output);
365 }
366 }
367
368
369 inline void parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output)
370 {
371 // empty trie has no entries
372 if ( start == end )
373 return;
374 char cummulativeString[4000];
375 std::vector<EntryWithOffset> entries;
376 processExportNode(start, start, end, cummulativeString, 0, entries);
377 // to preserve tie layout order, sort by node offset
378 std::sort(entries.begin(), entries.end());
379 // copy to output
380 output.reserve(entries.size());
381 for (std::vector<EntryWithOffset>::iterator it=entries.begin(); it != entries.end(); ++it)
382 output.push_back(it->entry);
383 }
384
385
386
387
388 }; // namespace trie
389 }; // namespace mach_o
390
391
392 #endif // __MACH_O_TRIE__
393
394