]> git.saurik.com Git - apple/dyld.git/blob - launch-cache/MachOTrie.hpp
05a8e070387fbee0682a8f9ed5b9e39dcf9062e3
[apple/dyld.git] / launch-cache / MachOTrie.hpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24 #ifndef __MACH_O_TRIE__
25 #define __MACH_O_TRIE__
26
27 #include <algorithm>
28
29 #include "MachOFileAbstraction.hpp"
30
31
32 namespace mach_o {
33 namespace trie {
34
35 struct Edge
36 {
37 Edge(const char* s, struct Node* n) : fSubString(s), fChild(n) { }
38 ~Edge() { }
39 const char* fSubString;
40 struct Node* fChild;
41
42 };
43
44 struct Node
45 {
46 Node(const char* s) : fCummulativeString(s), fAddress(0), fFlags(0),
47 fOther(0), fImportedName(NULL), fOrdered(false),
48 fHaveExportInfo(false), fTrieOffset(0) {}
49 ~Node() { }
50 const char* fCummulativeString;
51 std::vector<Edge> fChildren;
52 uint64_t fAddress;
53 uint64_t fFlags;
54 uint64_t fOther;
55 const char* fImportedName;
56 bool fOrdered;
57 bool fHaveExportInfo;
58 uint32_t fTrieOffset;
59
60 void addSymbol(const char* fullStr, uint64_t address, uint64_t flags, uint64_t other, const char* importName) {
61 const char* partialStr = &fullStr[strlen(fCummulativeString)];
62 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
63 Edge& e = *it;
64 int subStringLen = strlen(e.fSubString);
65 if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
66 // already have matching edge, go down that path
67 e.fChild->addSymbol(fullStr, address, flags, other, importName);
68 return;
69 }
70 else {
71 for (int i=subStringLen-1; i > 0; --i) {
72 if ( strncmp(e.fSubString, partialStr, i) == 0 ) {
73 // found a common substring, splice in new node
74 // was A -> C, now A -> B -> C
75 char* bNodeCummStr = strdup(e.fChild->fCummulativeString);
76 bNodeCummStr[strlen(bNodeCummStr)+i-subStringLen] = '\0';
77 //node* aNode = this;
78 Node* bNode = new Node(bNodeCummStr);
79 Node* cNode = e.fChild;
80 char* abEdgeStr = strdup(e.fSubString);
81 abEdgeStr[i] = '\0';
82 char* bcEdgeStr = strdup(&e.fSubString[i]);
83 Edge& abEdge = e;
84 abEdge.fSubString = abEdgeStr;
85 abEdge.fChild = bNode;
86 Edge bcEdge(bcEdgeStr, cNode);
87 bNode->fChildren.push_back(bcEdge);
88 bNode->addSymbol(fullStr, address, flags, other, importName);
89 return;
90 }
91 }
92 }
93 }
94
95 // no commonality with any existing child, make a new edge that is this whole string
96 Node* newNode = new Node(strdup(fullStr));
97 Edge newEdge(strdup(partialStr), newNode);
98 fChildren.push_back(newEdge);
99 newNode->fAddress = address;
100 newNode->fFlags = flags;
101 newNode->fOther = other;
102 if ( (flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && (importName != NULL) && (strcmp(fullStr,importName) != 0) )
103 newNode->fImportedName = importName;
104 else
105 newNode->fImportedName = NULL;
106 newNode->fHaveExportInfo = true;
107 }
108
109 void addOrderedNodes(const char* name, std::vector<Node*>& orderedNodes) {
110 if ( !fOrdered ) {
111 orderedNodes.push_back(this);
112 //fprintf(stderr, "ordered %p %s\n", this, fCummulativeString);
113 fOrdered = true;
114 }
115 const char* partialStr = &name[strlen(fCummulativeString)];
116 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
117 Edge& e = *it;
118 int subStringLen = strlen(e.fSubString);
119 if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
120 // already have matching edge, go down that path
121 e.fChild->addOrderedNodes(name, orderedNodes);
122 return;
123 }
124 }
125 }
126
127 // byte for terminal node size in bytes, or 0x00 if not terminal node
128 // teminal node (uleb128 flags, uleb128 addr [uleb128 other])
129 // byte for child node count
130 // each child: zero terminated substring, uleb128 node offset
131 bool updateOffset(uint32_t& offset) {
132 uint32_t nodeSize = 1; // length of export info when no export info
133 if ( fHaveExportInfo ) {
134 if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
135 nodeSize = uleb128_size(fFlags) + uleb128_size(fOther); // ordinal
136 if ( fImportedName != NULL )
137 nodeSize += strlen(fImportedName);
138 ++nodeSize; // trailing zero in imported name
139 }
140 else {
141 nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress);
142 if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
143 nodeSize += uleb128_size(fOther);
144 }
145 // do have export info, overall node size so far is uleb128 of export info + export info
146 nodeSize += uleb128_size(nodeSize);
147 }
148 // add children
149 ++nodeSize; // byte for count of chidren
150 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
151 Edge& e = *it;
152 nodeSize += strlen(e.fSubString) + 1 + uleb128_size(e.fChild->fTrieOffset);
153 }
154 bool result = (fTrieOffset != offset);
155 fTrieOffset = offset;
156 //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString);
157 offset += nodeSize;
158 // return true if fTrieOffset was changed
159 return result;
160 }
161
162 void appendToStream(std::vector<uint8_t>& out) {
163 if ( fHaveExportInfo ) {
164 if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
165 if ( fImportedName != NULL ) {
166 // nodes with re-export info: size, flags, ordinal, string
167 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + strlen(fImportedName) + 1;
168 out.push_back(nodeSize);
169 append_uleb128(fFlags, out);
170 append_uleb128(fOther, out);
171 append_string(fImportedName, out);
172 }
173 else {
174 // nodes with re-export info: size, flags, ordinal, empty-string
175 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + 1;
176 out.push_back(nodeSize);
177 append_uleb128(fFlags, out);
178 append_uleb128(fOther, out);
179 out.push_back(0);
180 }
181 }
182 else if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
183 // nodes with export info: size, flags, address, other
184 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress) + uleb128_size(fOther);
185 out.push_back(nodeSize);
186 append_uleb128(fFlags, out);
187 append_uleb128(fAddress, out);
188 append_uleb128(fOther, out);
189 }
190 else {
191 // nodes with export info: size, flags, address
192 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress);
193 out.push_back(nodeSize);
194 append_uleb128(fFlags, out);
195 append_uleb128(fAddress, out);
196 }
197 }
198 else {
199 // no export info uleb128 of zero is one byte of zero
200 out.push_back(0);
201 }
202 // write number of children
203 out.push_back(fChildren.size());
204 // write each child
205 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
206 Edge& e = *it;
207 append_string(e.fSubString, out);
208 append_uleb128(e.fChild->fTrieOffset, out);
209 }
210 }
211
212 private:
213 static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) {
214 uint8_t byte;
215 do {
216 byte = value & 0x7F;
217 value &= ~0x7F;
218 if ( value != 0 )
219 byte |= 0x80;
220 out.push_back(byte);
221 value = value >> 7;
222 } while( byte >= 0x80 );
223 }
224
225 static void append_string(const char* str, std::vector<uint8_t>& out) {
226 for (const char* s = str; *s != '\0'; ++s)
227 out.push_back(*s);
228 out.push_back('\0');
229 }
230
231 static unsigned int uleb128_size(uint64_t value) {
232 uint32_t result = 0;
233 do {
234 value = value >> 7;
235 ++result;
236 } while ( value != 0 );
237 return result;
238 }
239
240
241 };
242
243 inline uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end) {
244 uint64_t result = 0;
245 int bit = 0;
246 do {
247 if (p == end)
248 throw "malformed uleb128 extends beyond trie";
249
250 uint64_t slice = *p & 0x7f;
251
252 if (bit >= 64 || slice << bit >> bit != slice)
253 throw "uleb128 too big for 64-bits";
254 else {
255 result |= (slice << bit);
256 bit += 7;
257 }
258 }
259 while (*p++ & 0x80);
260 return result;
261 }
262
263
264
265 struct Entry
266 {
267 const char* name;
268 uint64_t address;
269 uint64_t flags;
270 uint64_t other;
271 const char* importName;
272 };
273
274
275
276 inline void makeTrie(const std::vector<Entry>& entries, std::vector<uint8_t>& output)
277 {
278 Node start(strdup(""));
279
280 // make nodes for all exported symbols
281 for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
282 start.addSymbol(it->name, it->address, it->flags, it->other, it->importName);
283 }
284
285 // create vector of nodes
286 std::vector<Node*> orderedNodes;
287 orderedNodes.reserve(entries.size()*2);
288 for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
289 start.addOrderedNodes(it->name, orderedNodes);
290 }
291
292 // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized
293 bool more;
294 do {
295 uint32_t offset = 0;
296 more = false;
297 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
298 if ( (*it)->updateOffset(offset) )
299 more = true;
300 }
301 } while ( more );
302
303 // create trie stream
304 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
305 (*it)->appendToStream(output);
306 }
307 }
308
309 struct EntryWithOffset
310 {
311 uintptr_t nodeOffset;
312 Entry entry;
313
314 bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); }
315 };
316
317
318
319 static inline void processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
320 char* cummulativeString, int curStrOffset,
321 std::vector<EntryWithOffset>& output)
322 {
323 if ( p >= end )
324 throw "malformed trie, node past end";
325 const uint8_t terminalSize = read_uleb128(p, end);
326 const uint8_t* children = p + terminalSize;
327 if ( terminalSize != 0 ) {
328 EntryWithOffset e;
329 e.nodeOffset = p-start;
330 e.entry.name = strdup(cummulativeString);
331 e.entry.flags = read_uleb128(p, end);
332 if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
333 e.entry.address = 0;
334 e.entry.other = read_uleb128(p, end); // dylib ordinal
335 e.entry.importName = (char*)p;
336 }
337 else {
338 e.entry.address = read_uleb128(p, end);
339 if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
340 e.entry.other = read_uleb128(p, end);
341 else
342 e.entry.other = 0;
343 e.entry.importName = NULL;
344 }
345 output.push_back(e);
346 }
347 const uint8_t childrenCount = *children++;
348 const uint8_t* s = children;
349 for (uint8_t i=0; i < childrenCount; ++i) {
350 int edgeStrLen = 0;
351 while (*s != '\0') {
352 cummulativeString[curStrOffset+edgeStrLen] = *s++;
353 ++edgeStrLen;
354 }
355 cummulativeString[curStrOffset+edgeStrLen] = *s++;
356 uint32_t childNodeOffet = read_uleb128(s, end);
357 processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output);
358 }
359 }
360
361
362 inline void parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output)
363 {
364 // empty trie has no entries
365 if ( start == end )
366 return;
367 char cummulativeString[4000];
368 std::vector<EntryWithOffset> entries;
369 processExportNode(start, start, end, cummulativeString, 0, entries);
370 // to preserve tie layout order, sort by node offset
371 std::sort(entries.begin(), entries.end());
372 // copy to output
373 output.reserve(entries.size());
374 for (std::vector<EntryWithOffset>::iterator it=entries.begin(); it != entries.end(); ++it)
375 output.push_back(it->entry);
376 }
377
378
379
380
381 }; // namespace trie
382 }; // namespace mach_o
383
384
385 #endif // __MACH_O_TRIE__
386
387