]>
Commit | Line | Data |
---|---|---|
55e3d2f6 A |
1 | /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- |
2 | * | |
3 | * Copyright (c) 2008 Apple Inc. All rights reserved. | |
4 | * | |
5 | * @APPLE_LICENSE_HEADER_START@ | |
6 | * | |
7 | * This file contains Original Code and/or Modifications of Original Code | |
8 | * as defined in and that are subject to the Apple Public Source License | |
9 | * Version 2.0 (the 'License'). You may not use this file except in | |
10 | * compliance with the License. Please obtain a copy of the License at | |
11 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
12 | * file. | |
13 | * | |
14 | * The Original Code and all software distributed under the License are | |
15 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
16 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
17 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
19 | * Please see the License for the specific language governing rights and | |
20 | * limitations under the License. | |
21 | * | |
22 | * @APPLE_LICENSE_HEADER_END@ | |
23 | */ | |
24 | #ifndef __MACH_O_TRIE__ | |
25 | #define __MACH_O_TRIE__ | |
26 | ||
27 | #include <algorithm> | |
28 | ||
29 | #include "MachOFileAbstraction.hpp" | |
30 | ||
31 | namespace mach_o { | |
32 | namespace trie { | |
33 | ||
34 | struct Edge | |
35 | { | |
36 | Edge(const char* s, struct Node* n) : fSubString(s), fChild(n) { } | |
37 | ~Edge() { } | |
38 | const char* fSubString; | |
39 | struct Node* fChild; | |
40 | ||
41 | }; | |
42 | ||
43 | struct Node | |
44 | { | |
45 | Node(const char* s) : fCummulativeString(s), fAddress(0), fFlags(0), fOrdered(false), | |
46 | fHaveExportInfo(false), fTrieOffset(0) {} | |
47 | ~Node() { } | |
48 | const char* fCummulativeString; | |
49 | std::vector<Edge> fChildren; | |
50 | uint64_t fAddress; | |
51 | uint32_t fFlags; | |
52 | bool fOrdered; | |
53 | bool fHaveExportInfo; | |
54 | uint32_t fTrieOffset; | |
55 | ||
56 | void addSymbol(const char* fullStr, uint64_t address, uint32_t flags) { | |
57 | const char* partialStr = &fullStr[strlen(fCummulativeString)]; | |
58 | for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { | |
59 | Edge& e = *it; | |
60 | int subStringLen = strlen(e.fSubString); | |
61 | if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) { | |
62 | // already have matching edge, go down that path | |
63 | e.fChild->addSymbol(fullStr, address, flags); | |
64 | return; | |
65 | } | |
66 | else { | |
67 | for (int i=subStringLen-1; i > 0; --i) { | |
68 | if ( strncmp(e.fSubString, partialStr, i) == 0 ) { | |
69 | // found a common substring, splice in new node | |
70 | // was A -> C, now A -> B -> C | |
71 | char* bNodeCummStr = strdup(e.fChild->fCummulativeString); | |
72 | bNodeCummStr[strlen(bNodeCummStr)+i-subStringLen] = '\0'; | |
73 | //node* aNode = this; | |
74 | Node* bNode = new Node(bNodeCummStr); | |
75 | Node* cNode = e.fChild; | |
76 | char* abEdgeStr = strdup(e.fSubString); | |
77 | abEdgeStr[i] = '\0'; | |
78 | char* bcEdgeStr = strdup(&e.fSubString[i]); | |
79 | Edge& abEdge = e; | |
80 | abEdge.fSubString = abEdgeStr; | |
81 | abEdge.fChild = bNode; | |
82 | Edge bcEdge(bcEdgeStr, cNode); | |
83 | bNode->fChildren.push_back(bcEdge); | |
84 | bNode->addSymbol(fullStr, address, flags); | |
85 | return; | |
86 | } | |
87 | } | |
88 | } | |
89 | } | |
90 | // no commonality with any existing child, make a new edge that is this whole string | |
91 | Node* newNode = new Node(strdup(fullStr)); | |
92 | Edge newEdge(strdup(partialStr), newNode); | |
93 | fChildren.push_back(newEdge); | |
94 | newNode->fAddress = address; | |
95 | newNode->fFlags = flags; | |
96 | newNode->fHaveExportInfo = true; | |
97 | } | |
98 | ||
99 | void addOrderedNodes(const char* name, std::vector<Node*>& orderedNodes) { | |
100 | if ( !fOrdered ) { | |
101 | orderedNodes.push_back(this); | |
102 | //fprintf(stderr, "ordered %p %s\n", this, fCummulativeString); | |
103 | fOrdered = true; | |
104 | } | |
105 | const char* partialStr = &name[strlen(fCummulativeString)]; | |
106 | for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { | |
107 | Edge& e = *it; | |
108 | int subStringLen = strlen(e.fSubString); | |
109 | if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) { | |
110 | // already have matching edge, go down that path | |
111 | e.fChild->addOrderedNodes(name, orderedNodes); | |
112 | return; | |
113 | } | |
114 | } | |
115 | } | |
116 | ||
117 | // byte for terminal node size in bytes, or 0x00 if not terminal node | |
118 | // teminal node (uleb128 flags, uleb128 addr) | |
119 | // byte for child node count | |
120 | // each child: zero terminated substring, uleb128 node offset | |
121 | bool updateOffset(uint32_t& offset) { | |
122 | uint32_t nodeSize = 1; // byte for length of export info | |
123 | if ( fHaveExportInfo ) | |
124 | nodeSize += uleb128_size(fFlags) + uleb128_size(fAddress); | |
125 | ||
126 | // add children | |
127 | ++nodeSize; // byte for count of chidren | |
128 | for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { | |
129 | Edge& e = *it; | |
130 | nodeSize += strlen(e.fSubString) + 1 + uleb128_size(e.fChild->fTrieOffset); | |
131 | } | |
132 | bool result = (fTrieOffset != offset); | |
133 | fTrieOffset = offset; | |
134 | //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString); | |
135 | offset += nodeSize; | |
136 | // return true if fTrieOffset was changed | |
137 | return result; | |
138 | } | |
139 | ||
140 | void appendToStream(std::vector<uint8_t>& out) { | |
141 | if ( fHaveExportInfo ) { | |
142 | // nodes with export info: size, flags, address | |
143 | out.push_back(uleb128_size(fFlags) + uleb128_size(fAddress)); | |
144 | append_uleb128(fFlags, out); | |
145 | append_uleb128(fAddress, out); | |
146 | } | |
147 | else { | |
148 | // no export info | |
149 | out.push_back(0); | |
150 | } | |
151 | // write number of children | |
152 | out.push_back(fChildren.size()); | |
153 | // write each child | |
154 | for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { | |
155 | Edge& e = *it; | |
156 | append_string(e.fSubString, out); | |
157 | append_uleb128(e.fChild->fTrieOffset, out); | |
158 | } | |
159 | } | |
160 | ||
161 | private: | |
162 | static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) { | |
163 | uint8_t byte; | |
164 | do { | |
165 | byte = value & 0x7F; | |
166 | value &= ~0x7F; | |
167 | if ( value != 0 ) | |
168 | byte |= 0x80; | |
169 | out.push_back(byte); | |
170 | value = value >> 7; | |
171 | } while( byte >= 0x80 ); | |
172 | } | |
173 | ||
174 | static void append_string(const char* str, std::vector<uint8_t>& out) { | |
175 | for (const char* s = str; *s != '\0'; ++s) | |
176 | out.push_back(*s); | |
177 | out.push_back('\0'); | |
178 | } | |
179 | ||
180 | static unsigned int uleb128_size(uint64_t value) { | |
181 | uint32_t result = 0; | |
182 | do { | |
183 | value = value >> 7; | |
184 | ++result; | |
185 | } while ( value != 0 ); | |
186 | return result; | |
187 | } | |
188 | ||
189 | ||
190 | }; | |
191 | ||
192 | inline uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end) { | |
193 | uint64_t result = 0; | |
194 | int bit = 0; | |
195 | do { | |
196 | if (p == end) | |
197 | throw "malformed uleb128 extends beyond trie"; | |
198 | ||
199 | uint64_t slice = *p & 0x7f; | |
200 | ||
201 | if (bit >= 64 || slice << bit >> bit != slice) | |
202 | throw "uleb128 too big for 64-bits"; | |
203 | else { | |
204 | result |= (slice << bit); | |
205 | bit += 7; | |
206 | } | |
207 | } | |
208 | while (*p++ & 0x80); | |
209 | return result; | |
210 | } | |
211 | ||
212 | ||
213 | ||
214 | struct Entry | |
215 | { | |
216 | const char* name; | |
217 | uint64_t address; | |
218 | uint64_t flags; | |
219 | }; | |
220 | ||
221 | ||
222 | inline void makeTrie(const std::vector<Entry>& input, std::vector<uint8_t>& output) | |
223 | { | |
224 | Node start(strdup("")); | |
225 | ||
226 | // make nodes for all exported symbols | |
227 | for (std::vector<Entry>::const_iterator it = input.begin(); it != input.end(); ++it) { | |
228 | start.addSymbol(it->name, it->address, it->flags); | |
229 | } | |
230 | ||
231 | // create vector of nodes | |
232 | std::vector<Node*> orderedNodes; | |
233 | orderedNodes.reserve(input.size()*2); | |
234 | for (std::vector<Entry>::const_iterator it = input.begin(); it != input.end(); ++it) { | |
235 | start.addOrderedNodes(it->name, orderedNodes); | |
236 | } | |
237 | ||
238 | // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized | |
239 | bool more; | |
240 | do { | |
241 | uint32_t offset = 0; | |
242 | more = false; | |
243 | for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) { | |
244 | if ( (*it)->updateOffset(offset) ) | |
245 | more = true; | |
246 | } | |
247 | } while ( more ); | |
248 | ||
249 | // create trie stream | |
250 | for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) { | |
251 | (*it)->appendToStream(output); | |
252 | } | |
253 | } | |
254 | ||
255 | struct EntryWithOffset | |
256 | { | |
257 | uintptr_t nodeOffset; | |
258 | Entry entry; | |
259 | ||
260 | bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); } | |
261 | }; | |
262 | ||
263 | ||
264 | ||
265 | static inline void processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end, | |
266 | char* cummulativeString, int curStrOffset, std::vector<EntryWithOffset>& output) | |
267 | { | |
268 | if ( p >= end ) | |
269 | throw "malformed trie, node past end"; | |
270 | const uint8_t terminalSize = *p++; | |
271 | const uint8_t* children = p + terminalSize; | |
272 | if ( terminalSize != 0 ) { | |
273 | EntryWithOffset e; | |
274 | e.nodeOffset = p-start; | |
275 | e.entry.name = strdup(cummulativeString); | |
276 | e.entry.flags = read_uleb128(p, end); | |
277 | e.entry.address = read_uleb128(p, end); | |
278 | output.push_back(e); | |
279 | } | |
280 | const uint8_t childrenCount = *children++; | |
281 | const uint8_t* s = children; | |
282 | for (uint8_t i=0; i < childrenCount; ++i) { | |
283 | int edgeStrLen = 0; | |
284 | while (*s != '\0') { | |
285 | cummulativeString[curStrOffset+edgeStrLen] = *s++; | |
286 | ++edgeStrLen; | |
287 | } | |
288 | cummulativeString[curStrOffset+edgeStrLen] = *s++; | |
289 | uint32_t childNodeOffet = read_uleb128(s, end); | |
290 | processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output); | |
291 | } | |
292 | } | |
293 | ||
294 | ||
295 | inline void parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output) | |
296 | { | |
297 | // empty tree has no entries | |
298 | if ( start == end ) | |
299 | return; | |
300 | char cummulativeString[4000]; | |
301 | std::vector<EntryWithOffset> entries; | |
302 | processExportNode(start, start, end, cummulativeString, 0, entries); | |
303 | // to preserve tie layout order, sort by node offset | |
304 | std::sort(entries.begin(), entries.end()); | |
305 | // copy to output | |
306 | output.reserve(entries.size()); | |
307 | for (std::vector<EntryWithOffset>::iterator it=entries.begin(); it != entries.end(); ++it) | |
308 | output.push_back(it->entry); | |
309 | } | |
310 | ||
311 | ||
312 | ||
313 | ||
314 | }; // namespace trie | |
315 | }; // namespace mach_o | |
316 | ||
317 | ||
318 | #endif // __MACH_O_TRIE__ | |
319 | ||
320 |