]>
Commit | Line | Data |
---|---|---|
39a8cd10 A |
1 | /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- |
2 | * | |
412ebb8e | 3 | * Copyright (c) 2008-2010 Apple Inc. All rights reserved. |
39a8cd10 A |
4 | * |
5 | * @APPLE_LICENSE_HEADER_START@ | |
6 | * | |
7 | * This file contains Original Code and/or Modifications of Original Code | |
8 | * as defined in and that are subject to the Apple Public Source License | |
9 | * Version 2.0 (the 'License'). You may not use this file except in | |
10 | * compliance with the License. Please obtain a copy of the License at | |
11 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
12 | * file. | |
13 | * | |
14 | * The Original Code and all software distributed under the License are | |
15 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
16 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
17 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
19 | * Please see the License for the specific language governing rights and | |
20 | * limitations under the License. | |
21 | * | |
22 | * @APPLE_LICENSE_HEADER_END@ | |
23 | */ | |
24 | #ifndef __MACH_O_TRIE__ | |
25 | #define __MACH_O_TRIE__ | |
26 | ||
412ebb8e | 27 | #include <algorithm> |
19894a12 | 28 | #include <vector> |
39a8cd10 A |
29 | |
30 | #include "MachOFileAbstraction.hpp" | |
31 | ||
412ebb8e | 32 | |
39a8cd10 A |
33 | namespace mach_o { |
34 | namespace trie { | |
35 | ||
36 | struct Edge | |
37 | { | |
38 | Edge(const char* s, struct Node* n) : fSubString(s), fChild(n) { } | |
39 | ~Edge() { } | |
40 | const char* fSubString; | |
41 | struct Node* fChild; | |
42 | ||
43 | }; | |
44 | ||
45 | struct Node | |
46 | { | |
412ebb8e A |
47 | Node(const char* s) : fCummulativeString(s), fAddress(0), fFlags(0), |
48 | fOther(0), fImportedName(NULL), fOrdered(false), | |
39a8cd10 A |
49 | fHaveExportInfo(false), fTrieOffset(0) {} |
50 | ~Node() { } | |
51 | const char* fCummulativeString; | |
52 | std::vector<Edge> fChildren; | |
53 | uint64_t fAddress; | |
412ebb8e A |
54 | uint64_t fFlags; |
55 | uint64_t fOther; | |
56 | const char* fImportedName; | |
39a8cd10 A |
57 | bool fOrdered; |
58 | bool fHaveExportInfo; | |
59 | uint32_t fTrieOffset; | |
60 | ||
412ebb8e | 61 | void addSymbol(const char* fullStr, uint64_t address, uint64_t flags, uint64_t other, const char* importName) { |
39a8cd10 A |
62 | const char* partialStr = &fullStr[strlen(fCummulativeString)]; |
63 | for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { | |
64 | Edge& e = *it; | |
65 | int subStringLen = strlen(e.fSubString); | |
66 | if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) { | |
67 | // already have matching edge, go down that path | |
412ebb8e | 68 | e.fChild->addSymbol(fullStr, address, flags, other, importName); |
39a8cd10 A |
69 | return; |
70 | } | |
71 | else { | |
72 | for (int i=subStringLen-1; i > 0; --i) { | |
73 | if ( strncmp(e.fSubString, partialStr, i) == 0 ) { | |
74 | // found a common substring, splice in new node | |
75 | // was A -> C, now A -> B -> C | |
76 | char* bNodeCummStr = strdup(e.fChild->fCummulativeString); | |
77 | bNodeCummStr[strlen(bNodeCummStr)+i-subStringLen] = '\0'; | |
78 | //node* aNode = this; | |
79 | Node* bNode = new Node(bNodeCummStr); | |
80 | Node* cNode = e.fChild; | |
81 | char* abEdgeStr = strdup(e.fSubString); | |
82 | abEdgeStr[i] = '\0'; | |
83 | char* bcEdgeStr = strdup(&e.fSubString[i]); | |
84 | Edge& abEdge = e; | |
85 | abEdge.fSubString = abEdgeStr; | |
86 | abEdge.fChild = bNode; | |
87 | Edge bcEdge(bcEdgeStr, cNode); | |
88 | bNode->fChildren.push_back(bcEdge); | |
412ebb8e | 89 | bNode->addSymbol(fullStr, address, flags, other, importName); |
39a8cd10 A |
90 | return; |
91 | } | |
92 | } | |
93 | } | |
94 | } | |
412ebb8e | 95 | |
39a8cd10 A |
96 | // no commonality with any existing child, make a new edge that is this whole string |
97 | Node* newNode = new Node(strdup(fullStr)); | |
98 | Edge newEdge(strdup(partialStr), newNode); | |
99 | fChildren.push_back(newEdge); | |
100 | newNode->fAddress = address; | |
101 | newNode->fFlags = flags; | |
412ebb8e A |
102 | newNode->fOther = other; |
103 | if ( (flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && (importName != NULL) && (strcmp(fullStr,importName) != 0) ) | |
104 | newNode->fImportedName = importName; | |
105 | else | |
106 | newNode->fImportedName = NULL; | |
39a8cd10 A |
107 | newNode->fHaveExportInfo = true; |
108 | } | |
109 | ||
110 | void addOrderedNodes(const char* name, std::vector<Node*>& orderedNodes) { | |
111 | if ( !fOrdered ) { | |
112 | orderedNodes.push_back(this); | |
113 | //fprintf(stderr, "ordered %p %s\n", this, fCummulativeString); | |
114 | fOrdered = true; | |
115 | } | |
116 | const char* partialStr = &name[strlen(fCummulativeString)]; | |
117 | for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { | |
118 | Edge& e = *it; | |
119 | int subStringLen = strlen(e.fSubString); | |
120 | if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) { | |
121 | // already have matching edge, go down that path | |
122 | e.fChild->addOrderedNodes(name, orderedNodes); | |
123 | return; | |
124 | } | |
125 | } | |
126 | } | |
127 | ||
128 | // byte for terminal node size in bytes, or 0x00 if not terminal node | |
412ebb8e | 129 | // teminal node (uleb128 flags, uleb128 addr [uleb128 other]) |
39a8cd10 A |
130 | // byte for child node count |
131 | // each child: zero terminated substring, uleb128 node offset | |
132 | bool updateOffset(uint32_t& offset) { | |
412ebb8e A |
133 | uint32_t nodeSize = 1; // length of export info when no export info |
134 | if ( fHaveExportInfo ) { | |
135 | if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { | |
136 | nodeSize = uleb128_size(fFlags) + uleb128_size(fOther); // ordinal | |
137 | if ( fImportedName != NULL ) | |
138 | nodeSize += strlen(fImportedName); | |
139 | ++nodeSize; // trailing zero in imported name | |
140 | } | |
141 | else { | |
142 | nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress); | |
143 | if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) | |
144 | nodeSize += uleb128_size(fOther); | |
145 | } | |
146 | // do have export info, overall node size so far is uleb128 of export info + export info | |
147 | nodeSize += uleb128_size(nodeSize); | |
148 | } | |
39a8cd10 A |
149 | // add children |
150 | ++nodeSize; // byte for count of chidren | |
151 | for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { | |
152 | Edge& e = *it; | |
153 | nodeSize += strlen(e.fSubString) + 1 + uleb128_size(e.fChild->fTrieOffset); | |
154 | } | |
155 | bool result = (fTrieOffset != offset); | |
156 | fTrieOffset = offset; | |
157 | //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString); | |
158 | offset += nodeSize; | |
159 | // return true if fTrieOffset was changed | |
160 | return result; | |
161 | } | |
162 | ||
163 | void appendToStream(std::vector<uint8_t>& out) { | |
164 | if ( fHaveExportInfo ) { | |
412ebb8e A |
165 | if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { |
166 | if ( fImportedName != NULL ) { | |
167 | // nodes with re-export info: size, flags, ordinal, string | |
168 | uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + strlen(fImportedName) + 1; | |
169 | out.push_back(nodeSize); | |
170 | append_uleb128(fFlags, out); | |
171 | append_uleb128(fOther, out); | |
172 | append_string(fImportedName, out); | |
173 | } | |
174 | else { | |
175 | // nodes with re-export info: size, flags, ordinal, empty-string | |
176 | uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + 1; | |
177 | out.push_back(nodeSize); | |
178 | append_uleb128(fFlags, out); | |
179 | append_uleb128(fOther, out); | |
180 | out.push_back(0); | |
181 | } | |
182 | } | |
183 | else if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { | |
184 | // nodes with export info: size, flags, address, other | |
185 | uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress) + uleb128_size(fOther); | |
186 | out.push_back(nodeSize); | |
187 | append_uleb128(fFlags, out); | |
188 | append_uleb128(fAddress, out); | |
189 | append_uleb128(fOther, out); | |
190 | } | |
191 | else { | |
192 | // nodes with export info: size, flags, address | |
193 | uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress); | |
194 | out.push_back(nodeSize); | |
195 | append_uleb128(fFlags, out); | |
196 | append_uleb128(fAddress, out); | |
197 | } | |
39a8cd10 A |
198 | } |
199 | else { | |
412ebb8e | 200 | // no export info uleb128 of zero is one byte of zero |
39a8cd10 A |
201 | out.push_back(0); |
202 | } | |
203 | // write number of children | |
204 | out.push_back(fChildren.size()); | |
205 | // write each child | |
206 | for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { | |
207 | Edge& e = *it; | |
208 | append_string(e.fSubString, out); | |
209 | append_uleb128(e.fChild->fTrieOffset, out); | |
210 | } | |
211 | } | |
212 | ||
213 | private: | |
214 | static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) { | |
215 | uint8_t byte; | |
216 | do { | |
217 | byte = value & 0x7F; | |
218 | value &= ~0x7F; | |
219 | if ( value != 0 ) | |
220 | byte |= 0x80; | |
221 | out.push_back(byte); | |
222 | value = value >> 7; | |
223 | } while( byte >= 0x80 ); | |
224 | } | |
225 | ||
226 | static void append_string(const char* str, std::vector<uint8_t>& out) { | |
227 | for (const char* s = str; *s != '\0'; ++s) | |
228 | out.push_back(*s); | |
229 | out.push_back('\0'); | |
230 | } | |
231 | ||
232 | static unsigned int uleb128_size(uint64_t value) { | |
233 | uint32_t result = 0; | |
234 | do { | |
235 | value = value >> 7; | |
236 | ++result; | |
237 | } while ( value != 0 ); | |
238 | return result; | |
239 | } | |
240 | ||
241 | ||
242 | }; | |
243 | ||
244 | inline uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end) { | |
245 | uint64_t result = 0; | |
246 | int bit = 0; | |
247 | do { | |
248 | if (p == end) | |
19894a12 | 249 | #if __EXCEPTIONS |
39a8cd10 | 250 | throw "malformed uleb128 extends beyond trie"; |
19894a12 A |
251 | #else |
252 | return result; | |
253 | #endif | |
39a8cd10 A |
254 | uint64_t slice = *p & 0x7f; |
255 | ||
256 | if (bit >= 64 || slice << bit >> bit != slice) | |
19894a12 | 257 | #if __EXCEPTIONS |
39a8cd10 | 258 | throw "uleb128 too big for 64-bits"; |
19894a12 A |
259 | #else |
260 | return result; | |
261 | #endif | |
39a8cd10 A |
262 | else { |
263 | result |= (slice << bit); | |
264 | bit += 7; | |
265 | } | |
266 | } | |
267 | while (*p++ & 0x80); | |
268 | return result; | |
269 | } | |
270 | ||
271 | ||
272 | ||
273 | struct Entry | |
274 | { | |
275 | const char* name; | |
276 | uint64_t address; | |
277 | uint64_t flags; | |
412ebb8e A |
278 | uint64_t other; |
279 | const char* importName; | |
39a8cd10 A |
280 | }; |
281 | ||
282 | ||
412ebb8e A |
283 | |
284 | inline void makeTrie(const std::vector<Entry>& entries, std::vector<uint8_t>& output) | |
39a8cd10 A |
285 | { |
286 | Node start(strdup("")); | |
287 | ||
288 | // make nodes for all exported symbols | |
412ebb8e A |
289 | for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) { |
290 | start.addSymbol(it->name, it->address, it->flags, it->other, it->importName); | |
39a8cd10 A |
291 | } |
292 | ||
293 | // create vector of nodes | |
294 | std::vector<Node*> orderedNodes; | |
412ebb8e A |
295 | orderedNodes.reserve(entries.size()*2); |
296 | for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) { | |
39a8cd10 A |
297 | start.addOrderedNodes(it->name, orderedNodes); |
298 | } | |
299 | ||
300 | // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized | |
301 | bool more; | |
302 | do { | |
303 | uint32_t offset = 0; | |
304 | more = false; | |
305 | for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) { | |
306 | if ( (*it)->updateOffset(offset) ) | |
307 | more = true; | |
308 | } | |
309 | } while ( more ); | |
310 | ||
311 | // create trie stream | |
312 | for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) { | |
313 | (*it)->appendToStream(output); | |
314 | } | |
315 | } | |
316 | ||
317 | struct EntryWithOffset | |
318 | { | |
319 | uintptr_t nodeOffset; | |
320 | Entry entry; | |
321 | ||
322 | bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); } | |
323 | }; | |
324 | ||
325 | ||
326 | ||
327 | static inline void processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end, | |
412ebb8e A |
328 | char* cummulativeString, int curStrOffset, |
329 | std::vector<EntryWithOffset>& output) | |
39a8cd10 A |
330 | { |
331 | if ( p >= end ) | |
19894a12 | 332 | #if __EXCEPTIONS |
412ebb8e | 333 | throw "malformed trie, node past end"; |
19894a12 A |
334 | #else |
335 | return; | |
336 | #endif | |
412ebb8e | 337 | const uint8_t terminalSize = read_uleb128(p, end); |
39a8cd10 A |
338 | const uint8_t* children = p + terminalSize; |
339 | if ( terminalSize != 0 ) { | |
340 | EntryWithOffset e; | |
341 | e.nodeOffset = p-start; | |
342 | e.entry.name = strdup(cummulativeString); | |
343 | e.entry.flags = read_uleb128(p, end); | |
412ebb8e A |
344 | if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { |
345 | e.entry.address = 0; | |
346 | e.entry.other = read_uleb128(p, end); // dylib ordinal | |
347 | e.entry.importName = (char*)p; | |
348 | } | |
349 | else { | |
350 | e.entry.address = read_uleb128(p, end); | |
351 | if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) | |
352 | e.entry.other = read_uleb128(p, end); | |
353 | else | |
354 | e.entry.other = 0; | |
355 | e.entry.importName = NULL; | |
356 | } | |
39a8cd10 A |
357 | output.push_back(e); |
358 | } | |
359 | const uint8_t childrenCount = *children++; | |
360 | const uint8_t* s = children; | |
361 | for (uint8_t i=0; i < childrenCount; ++i) { | |
362 | int edgeStrLen = 0; | |
363 | while (*s != '\0') { | |
364 | cummulativeString[curStrOffset+edgeStrLen] = *s++; | |
365 | ++edgeStrLen; | |
366 | } | |
367 | cummulativeString[curStrOffset+edgeStrLen] = *s++; | |
368 | uint32_t childNodeOffet = read_uleb128(s, end); | |
369 | processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output); | |
370 | } | |
371 | } | |
372 | ||
373 | ||
374 | inline void parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output) | |
375 | { | |
376 | // empty trie has no entries | |
377 | if ( start == end ) | |
378 | return; | |
39a8cd10 A |
379 | char cummulativeString[4000]; |
380 | std::vector<EntryWithOffset> entries; | |
381 | processExportNode(start, start, end, cummulativeString, 0, entries); | |
382 | // to preserve tie layout order, sort by node offset | |
383 | std::sort(entries.begin(), entries.end()); | |
384 | // copy to output | |
385 | output.reserve(entries.size()); | |
386 | for (std::vector<EntryWithOffset>::iterator it=entries.begin(); it != entries.end(); ++it) | |
387 | output.push_back(it->entry); | |
388 | } | |
389 | ||
390 | ||
391 | ||
392 | ||
393 | }; // namespace trie | |
394 | }; // namespace mach_o | |
395 | ||
396 | ||
397 | #endif // __MACH_O_TRIE__ | |
398 | ||
399 |