]>
Commit | Line | Data |
---|---|---|
55e3d2f6 A |
1 | /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- |
2 | * | |
a645023d | 3 | * Copyright (c) 2008-2010 Apple Inc. All rights reserved. |
55e3d2f6 A |
4 | * |
5 | * @APPLE_LICENSE_HEADER_START@ | |
6 | * | |
7 | * This file contains Original Code and/or Modifications of Original Code | |
8 | * as defined in and that are subject to the Apple Public Source License | |
9 | * Version 2.0 (the 'License'). You may not use this file except in | |
10 | * compliance with the License. Please obtain a copy of the License at | |
11 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
12 | * file. | |
13 | * | |
14 | * The Original Code and all software distributed under the License are | |
15 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
16 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
17 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
19 | * Please see the License for the specific language governing rights and | |
20 | * limitations under the License. | |
21 | * | |
22 | * @APPLE_LICENSE_HEADER_END@ | |
23 | */ | |
24 | #ifndef __MACH_O_TRIE__ | |
25 | #define __MACH_O_TRIE__ | |
26 | ||
27 | #include <algorithm> | |
a645023d | 28 | #include <assert.h> |
55e3d2f6 A |
29 | |
30 | #include "MachOFileAbstraction.hpp" | |
31 | ||
a645023d | 32 | |
55e3d2f6 A |
33 | namespace mach_o { |
34 | namespace trie { | |
35 | ||
36 | struct Edge | |
37 | { | |
38 | Edge(const char* s, struct Node* n) : fSubString(s), fChild(n) { } | |
39 | ~Edge() { } | |
40 | const char* fSubString; | |
41 | struct Node* fChild; | |
42 | ||
43 | }; | |
44 | ||
45 | struct Node | |
46 | { | |
a645023d A |
47 | Node(const char* s) : fCummulativeString(s), fAddress(0), fFlags(0), |
48 | fOther(0), fImportedName(NULL), fOrdered(false), | |
55e3d2f6 A |
49 | fHaveExportInfo(false), fTrieOffset(0) {} |
50 | ~Node() { } | |
51 | const char* fCummulativeString; | |
52 | std::vector<Edge> fChildren; | |
53 | uint64_t fAddress; | |
a645023d A |
54 | uint64_t fFlags; |
55 | uint64_t fOther; | |
56 | const char* fImportedName; | |
55e3d2f6 A |
57 | bool fOrdered; |
58 | bool fHaveExportInfo; | |
59 | uint32_t fTrieOffset; | |
60 | ||
a645023d | 61 | void addSymbol(const char* fullStr, uint64_t address, uint64_t flags, uint64_t other, const char* importName) { |
55e3d2f6 A |
62 | const char* partialStr = &fullStr[strlen(fCummulativeString)]; |
63 | for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { | |
64 | Edge& e = *it; | |
65 | int subStringLen = strlen(e.fSubString); | |
66 | if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) { | |
67 | // already have matching edge, go down that path | |
a645023d | 68 | e.fChild->addSymbol(fullStr, address, flags, other, importName); |
55e3d2f6 A |
69 | return; |
70 | } | |
71 | else { | |
72 | for (int i=subStringLen-1; i > 0; --i) { | |
73 | if ( strncmp(e.fSubString, partialStr, i) == 0 ) { | |
74 | // found a common substring, splice in new node | |
75 | // was A -> C, now A -> B -> C | |
76 | char* bNodeCummStr = strdup(e.fChild->fCummulativeString); | |
77 | bNodeCummStr[strlen(bNodeCummStr)+i-subStringLen] = '\0'; | |
78 | //node* aNode = this; | |
79 | Node* bNode = new Node(bNodeCummStr); | |
80 | Node* cNode = e.fChild; | |
81 | char* abEdgeStr = strdup(e.fSubString); | |
82 | abEdgeStr[i] = '\0'; | |
83 | char* bcEdgeStr = strdup(&e.fSubString[i]); | |
84 | Edge& abEdge = e; | |
85 | abEdge.fSubString = abEdgeStr; | |
86 | abEdge.fChild = bNode; | |
87 | Edge bcEdge(bcEdgeStr, cNode); | |
88 | bNode->fChildren.push_back(bcEdge); | |
a645023d | 89 | bNode->addSymbol(fullStr, address, flags, other, importName); |
55e3d2f6 A |
90 | return; |
91 | } | |
92 | } | |
93 | } | |
94 | } | |
a645023d A |
95 | if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { |
96 | assert(importName != NULL); | |
97 | assert(other != 0); | |
98 | } | |
99 | if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { | |
100 | assert(other != 0); | |
101 | } | |
55e3d2f6 A |
102 | // no commonality with any existing child, make a new edge that is this whole string |
103 | Node* newNode = new Node(strdup(fullStr)); | |
104 | Edge newEdge(strdup(partialStr), newNode); | |
105 | fChildren.push_back(newEdge); | |
106 | newNode->fAddress = address; | |
107 | newNode->fFlags = flags; | |
a645023d A |
108 | newNode->fOther = other; |
109 | if ( (flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && (importName != NULL) && (strcmp(fullStr,importName) != 0) ) | |
110 | newNode->fImportedName = importName; | |
111 | else | |
112 | newNode->fImportedName = NULL; | |
55e3d2f6 A |
113 | newNode->fHaveExportInfo = true; |
114 | } | |
115 | ||
116 | void addOrderedNodes(const char* name, std::vector<Node*>& orderedNodes) { | |
117 | if ( !fOrdered ) { | |
118 | orderedNodes.push_back(this); | |
119 | //fprintf(stderr, "ordered %p %s\n", this, fCummulativeString); | |
120 | fOrdered = true; | |
121 | } | |
122 | const char* partialStr = &name[strlen(fCummulativeString)]; | |
123 | for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { | |
124 | Edge& e = *it; | |
125 | int subStringLen = strlen(e.fSubString); | |
126 | if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) { | |
127 | // already have matching edge, go down that path | |
128 | e.fChild->addOrderedNodes(name, orderedNodes); | |
129 | return; | |
130 | } | |
131 | } | |
132 | } | |
133 | ||
134 | // byte for terminal node size in bytes, or 0x00 if not terminal node | |
a645023d | 135 | // teminal node (uleb128 flags, uleb128 addr [uleb128 other]) |
55e3d2f6 A |
136 | // byte for child node count |
137 | // each child: zero terminated substring, uleb128 node offset | |
138 | bool updateOffset(uint32_t& offset) { | |
a645023d A |
139 | uint32_t nodeSize = 1; // length of export info when no export info |
140 | if ( fHaveExportInfo ) { | |
141 | if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { | |
142 | nodeSize = uleb128_size(fFlags) + uleb128_size(fOther); // ordinal | |
143 | if ( fImportedName != NULL ) | |
144 | nodeSize += strlen(fImportedName); | |
145 | ++nodeSize; // trailing zero in imported name | |
146 | } | |
147 | else { | |
148 | nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress); | |
149 | if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) | |
150 | nodeSize += uleb128_size(fOther); | |
151 | } | |
152 | // do have export info, overall node size so far is uleb128 of export info + export info | |
153 | nodeSize += uleb128_size(nodeSize); | |
154 | } | |
55e3d2f6 A |
155 | // add children |
156 | ++nodeSize; // byte for count of chidren | |
157 | for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { | |
158 | Edge& e = *it; | |
159 | nodeSize += strlen(e.fSubString) + 1 + uleb128_size(e.fChild->fTrieOffset); | |
160 | } | |
161 | bool result = (fTrieOffset != offset); | |
162 | fTrieOffset = offset; | |
163 | //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString); | |
164 | offset += nodeSize; | |
165 | // return true if fTrieOffset was changed | |
166 | return result; | |
167 | } | |
168 | ||
169 | void appendToStream(std::vector<uint8_t>& out) { | |
170 | if ( fHaveExportInfo ) { | |
a645023d A |
171 | if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { |
172 | if ( fImportedName != NULL ) { | |
173 | // nodes with re-export info: size, flags, ordinal, string | |
174 | uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + strlen(fImportedName) + 1; | |
175 | out.push_back(nodeSize); | |
176 | append_uleb128(fFlags, out); | |
177 | append_uleb128(fOther, out); | |
178 | append_string(fImportedName, out); | |
179 | } | |
180 | else { | |
181 | // nodes with re-export info: size, flags, ordinal, empty-string | |
182 | uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + 1; | |
183 | out.push_back(nodeSize); | |
184 | append_uleb128(fFlags, out); | |
185 | append_uleb128(fOther, out); | |
186 | out.push_back(0); | |
187 | } | |
188 | } | |
189 | else if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { | |
190 | // nodes with export info: size, flags, address, other | |
191 | uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress) + uleb128_size(fOther); | |
192 | out.push_back(nodeSize); | |
193 | append_uleb128(fFlags, out); | |
194 | append_uleb128(fAddress, out); | |
195 | append_uleb128(fOther, out); | |
196 | } | |
197 | else { | |
198 | // nodes with export info: size, flags, address | |
199 | uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress); | |
200 | out.push_back(nodeSize); | |
201 | append_uleb128(fFlags, out); | |
202 | append_uleb128(fAddress, out); | |
203 | } | |
55e3d2f6 A |
204 | } |
205 | else { | |
a645023d | 206 | // no export info uleb128 of zero is one byte of zero |
55e3d2f6 A |
207 | out.push_back(0); |
208 | } | |
209 | // write number of children | |
210 | out.push_back(fChildren.size()); | |
211 | // write each child | |
212 | for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { | |
213 | Edge& e = *it; | |
214 | append_string(e.fSubString, out); | |
215 | append_uleb128(e.fChild->fTrieOffset, out); | |
216 | } | |
217 | } | |
218 | ||
219 | private: | |
220 | static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) { | |
221 | uint8_t byte; | |
222 | do { | |
223 | byte = value & 0x7F; | |
224 | value &= ~0x7F; | |
225 | if ( value != 0 ) | |
226 | byte |= 0x80; | |
227 | out.push_back(byte); | |
228 | value = value >> 7; | |
229 | } while( byte >= 0x80 ); | |
230 | } | |
231 | ||
232 | static void append_string(const char* str, std::vector<uint8_t>& out) { | |
233 | for (const char* s = str; *s != '\0'; ++s) | |
234 | out.push_back(*s); | |
235 | out.push_back('\0'); | |
236 | } | |
237 | ||
238 | static unsigned int uleb128_size(uint64_t value) { | |
239 | uint32_t result = 0; | |
240 | do { | |
241 | value = value >> 7; | |
242 | ++result; | |
243 | } while ( value != 0 ); | |
244 | return result; | |
245 | } | |
246 | ||
247 | ||
248 | }; | |
249 | ||
250 | inline uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end) { | |
251 | uint64_t result = 0; | |
252 | int bit = 0; | |
253 | do { | |
254 | if (p == end) | |
255 | throw "malformed uleb128 extends beyond trie"; | |
256 | ||
257 | uint64_t slice = *p & 0x7f; | |
258 | ||
259 | if (bit >= 64 || slice << bit >> bit != slice) | |
260 | throw "uleb128 too big for 64-bits"; | |
261 | else { | |
262 | result |= (slice << bit); | |
263 | bit += 7; | |
264 | } | |
265 | } | |
266 | while (*p++ & 0x80); | |
267 | return result; | |
268 | } | |
269 | ||
270 | ||
271 | ||
272 | struct Entry | |
273 | { | |
274 | const char* name; | |
275 | uint64_t address; | |
276 | uint64_t flags; | |
a645023d A |
277 | uint64_t other; |
278 | const char* importName; | |
55e3d2f6 A |
279 | }; |
280 | ||
281 | ||
a645023d A |
282 | |
283 | inline void makeTrie(const std::vector<Entry>& entries, std::vector<uint8_t>& output) | |
55e3d2f6 A |
284 | { |
285 | Node start(strdup("")); | |
286 | ||
287 | // make nodes for all exported symbols | |
a645023d A |
288 | for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) { |
289 | start.addSymbol(it->name, it->address, it->flags, it->other, it->importName); | |
55e3d2f6 A |
290 | } |
291 | ||
292 | // create vector of nodes | |
293 | std::vector<Node*> orderedNodes; | |
a645023d A |
294 | orderedNodes.reserve(entries.size()*2); |
295 | for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) { | |
55e3d2f6 A |
296 | start.addOrderedNodes(it->name, orderedNodes); |
297 | } | |
298 | ||
299 | // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized | |
300 | bool more; | |
301 | do { | |
302 | uint32_t offset = 0; | |
303 | more = false; | |
304 | for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) { | |
305 | if ( (*it)->updateOffset(offset) ) | |
306 | more = true; | |
307 | } | |
308 | } while ( more ); | |
309 | ||
310 | // create trie stream | |
311 | for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) { | |
312 | (*it)->appendToStream(output); | |
313 | } | |
314 | } | |
315 | ||
316 | struct EntryWithOffset | |
317 | { | |
318 | uintptr_t nodeOffset; | |
319 | Entry entry; | |
320 | ||
321 | bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); } | |
322 | }; | |
323 | ||
324 | ||
325 | ||
326 | static inline void processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end, | |
a645023d A |
327 | char* cummulativeString, int curStrOffset, |
328 | std::vector<EntryWithOffset>& output) | |
55e3d2f6 A |
329 | { |
330 | if ( p >= end ) | |
331 | throw "malformed trie, node past end"; | |
a645023d | 332 | const uint8_t terminalSize = read_uleb128(p, end); |
55e3d2f6 A |
333 | const uint8_t* children = p + terminalSize; |
334 | if ( terminalSize != 0 ) { | |
335 | EntryWithOffset e; | |
336 | e.nodeOffset = p-start; | |
337 | e.entry.name = strdup(cummulativeString); | |
338 | e.entry.flags = read_uleb128(p, end); | |
a645023d A |
339 | if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { |
340 | e.entry.address = 0; | |
341 | e.entry.other = read_uleb128(p, end); // dylib ordinal | |
342 | e.entry.importName = (char*)p; | |
343 | } | |
344 | else { | |
345 | e.entry.address = read_uleb128(p, end); | |
346 | if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) | |
347 | e.entry.other = read_uleb128(p, end); | |
348 | else | |
349 | e.entry.other = 0; | |
350 | e.entry.importName = NULL; | |
351 | } | |
55e3d2f6 A |
352 | output.push_back(e); |
353 | } | |
354 | const uint8_t childrenCount = *children++; | |
355 | const uint8_t* s = children; | |
356 | for (uint8_t i=0; i < childrenCount; ++i) { | |
357 | int edgeStrLen = 0; | |
358 | while (*s != '\0') { | |
359 | cummulativeString[curStrOffset+edgeStrLen] = *s++; | |
360 | ++edgeStrLen; | |
361 | } | |
362 | cummulativeString[curStrOffset+edgeStrLen] = *s++; | |
363 | uint32_t childNodeOffet = read_uleb128(s, end); | |
364 | processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output); | |
365 | } | |
366 | } | |
367 | ||
368 | ||
369 | inline void parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output) | |
a645023d A |
370 | { |
371 | // empty trie has no entries | |
55e3d2f6 A |
372 | if ( start == end ) |
373 | return; | |
374 | char cummulativeString[4000]; | |
375 | std::vector<EntryWithOffset> entries; | |
376 | processExportNode(start, start, end, cummulativeString, 0, entries); | |
377 | // to preserve tie layout order, sort by node offset | |
378 | std::sort(entries.begin(), entries.end()); | |
379 | // copy to output | |
380 | output.reserve(entries.size()); | |
381 | for (std::vector<EntryWithOffset>::iterator it=entries.begin(); it != entries.end(); ++it) | |
382 | output.push_back(it->entry); | |
383 | } | |
384 | ||
385 | ||
386 | ||
387 | ||
388 | }; // namespace trie | |
389 | }; // namespace mach_o | |
390 | ||
391 | ||
392 | #endif // __MACH_O_TRIE__ | |
393 | ||
394 |