]> git.saurik.com Git - apple/dyld.git/blob - launch-cache/MachOTrie.hpp
dyld-132.13.tar.gz
[apple/dyld.git] / launch-cache / MachOTrie.hpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2008 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24 #ifndef __MACH_O_TRIE__
25 #define __MACH_O_TRIE__
26
27
28 #include "MachOFileAbstraction.hpp"
29
30 namespace mach_o {
31 namespace trie {
32
33 struct Edge
34 {
35 Edge(const char* s, struct Node* n) : fSubString(s), fChild(n) { }
36 ~Edge() { }
37 const char* fSubString;
38 struct Node* fChild;
39
40 };
41
42 struct Node
43 {
44 Node(const char* s) : fCummulativeString(s), fAddress(0), fFlags(0), fOrdered(false),
45 fHaveExportInfo(false), fTrieOffset(0) {}
46 ~Node() { }
47 const char* fCummulativeString;
48 std::vector<Edge> fChildren;
49 uint64_t fAddress;
50 uint32_t fFlags;
51 bool fOrdered;
52 bool fHaveExportInfo;
53 uint32_t fTrieOffset;
54
55 void addSymbol(const char* fullStr, uint64_t address, uint32_t flags) {
56 const char* partialStr = &fullStr[strlen(fCummulativeString)];
57 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
58 Edge& e = *it;
59 int subStringLen = strlen(e.fSubString);
60 if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
61 // already have matching edge, go down that path
62 e.fChild->addSymbol(fullStr, address, flags);
63 return;
64 }
65 else {
66 for (int i=subStringLen-1; i > 0; --i) {
67 if ( strncmp(e.fSubString, partialStr, i) == 0 ) {
68 // found a common substring, splice in new node
69 // was A -> C, now A -> B -> C
70 char* bNodeCummStr = strdup(e.fChild->fCummulativeString);
71 bNodeCummStr[strlen(bNodeCummStr)+i-subStringLen] = '\0';
72 //node* aNode = this;
73 Node* bNode = new Node(bNodeCummStr);
74 Node* cNode = e.fChild;
75 char* abEdgeStr = strdup(e.fSubString);
76 abEdgeStr[i] = '\0';
77 char* bcEdgeStr = strdup(&e.fSubString[i]);
78 Edge& abEdge = e;
79 abEdge.fSubString = abEdgeStr;
80 abEdge.fChild = bNode;
81 Edge bcEdge(bcEdgeStr, cNode);
82 bNode->fChildren.push_back(bcEdge);
83 bNode->addSymbol(fullStr, address, flags);
84 return;
85 }
86 }
87 }
88 }
89 // no commonality with any existing child, make a new edge that is this whole string
90 Node* newNode = new Node(strdup(fullStr));
91 Edge newEdge(strdup(partialStr), newNode);
92 fChildren.push_back(newEdge);
93 newNode->fAddress = address;
94 newNode->fFlags = flags;
95 newNode->fHaveExportInfo = true;
96 }
97
98 void addOrderedNodes(const char* name, std::vector<Node*>& orderedNodes) {
99 if ( !fOrdered ) {
100 orderedNodes.push_back(this);
101 //fprintf(stderr, "ordered %p %s\n", this, fCummulativeString);
102 fOrdered = true;
103 }
104 const char* partialStr = &name[strlen(fCummulativeString)];
105 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
106 Edge& e = *it;
107 int subStringLen = strlen(e.fSubString);
108 if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
109 // already have matching edge, go down that path
110 e.fChild->addOrderedNodes(name, orderedNodes);
111 return;
112 }
113 }
114 }
115
116 // byte for terminal node size in bytes, or 0x00 if not terminal node
117 // teminal node (uleb128 flags, uleb128 addr)
118 // byte for child node count
119 // each child: zero terminated substring, uleb128 node offset
120 bool updateOffset(uint32_t& offset) {
121 uint32_t nodeSize = 1; // byte for length of export info
122 if ( fHaveExportInfo )
123 nodeSize += uleb128_size(fFlags) + uleb128_size(fAddress);
124
125 // add children
126 ++nodeSize; // byte for count of chidren
127 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
128 Edge& e = *it;
129 nodeSize += strlen(e.fSubString) + 1 + uleb128_size(e.fChild->fTrieOffset);
130 }
131 bool result = (fTrieOffset != offset);
132 fTrieOffset = offset;
133 //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString);
134 offset += nodeSize;
135 // return true if fTrieOffset was changed
136 return result;
137 }
138
139 void appendToStream(std::vector<uint8_t>& out) {
140 if ( fHaveExportInfo ) {
141 // nodes with export info: size, flags, address
142 out.push_back(uleb128_size(fFlags) + uleb128_size(fAddress));
143 append_uleb128(fFlags, out);
144 append_uleb128(fAddress, out);
145 }
146 else {
147 // no export info
148 out.push_back(0);
149 }
150 // write number of children
151 out.push_back(fChildren.size());
152 // write each child
153 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
154 Edge& e = *it;
155 append_string(e.fSubString, out);
156 append_uleb128(e.fChild->fTrieOffset, out);
157 }
158 }
159
160 private:
161 static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) {
162 uint8_t byte;
163 do {
164 byte = value & 0x7F;
165 value &= ~0x7F;
166 if ( value != 0 )
167 byte |= 0x80;
168 out.push_back(byte);
169 value = value >> 7;
170 } while( byte >= 0x80 );
171 }
172
173 static void append_string(const char* str, std::vector<uint8_t>& out) {
174 for (const char* s = str; *s != '\0'; ++s)
175 out.push_back(*s);
176 out.push_back('\0');
177 }
178
179 static unsigned int uleb128_size(uint64_t value) {
180 uint32_t result = 0;
181 do {
182 value = value >> 7;
183 ++result;
184 } while ( value != 0 );
185 return result;
186 }
187
188
189 };
190
191 inline uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end) {
192 uint64_t result = 0;
193 int bit = 0;
194 do {
195 if (p == end)
196 throw "malformed uleb128 extends beyond trie";
197
198 uint64_t slice = *p & 0x7f;
199
200 if (bit >= 64 || slice << bit >> bit != slice)
201 throw "uleb128 too big for 64-bits";
202 else {
203 result |= (slice << bit);
204 bit += 7;
205 }
206 }
207 while (*p++ & 0x80);
208 return result;
209 }
210
211
212
213 struct Entry
214 {
215 const char* name;
216 uint64_t address;
217 uint64_t flags;
218 };
219
220
221 inline void makeTrie(const std::vector<Entry>& input, std::vector<uint8_t>& output)
222 {
223 Node start(strdup(""));
224
225 // make nodes for all exported symbols
226 for (std::vector<Entry>::const_iterator it = input.begin(); it != input.end(); ++it) {
227 start.addSymbol(it->name, it->address, it->flags);
228 }
229
230 // create vector of nodes
231 std::vector<Node*> orderedNodes;
232 orderedNodes.reserve(input.size()*2);
233 for (std::vector<Entry>::const_iterator it = input.begin(); it != input.end(); ++it) {
234 start.addOrderedNodes(it->name, orderedNodes);
235 }
236
237 // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized
238 bool more;
239 do {
240 uint32_t offset = 0;
241 more = false;
242 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
243 if ( (*it)->updateOffset(offset) )
244 more = true;
245 }
246 } while ( more );
247
248 // create trie stream
249 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
250 (*it)->appendToStream(output);
251 }
252 }
253
254 struct EntryWithOffset
255 {
256 uintptr_t nodeOffset;
257 Entry entry;
258
259 bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); }
260 };
261
262
263
264 static inline void processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
265 char* cummulativeString, int curStrOffset, std::vector<EntryWithOffset>& output)
266 {
267 if ( p >= end )
268 throwf("malformed trie, node %p outside trie (%p -> %p)", p, start, end);
269 const uint8_t terminalSize = *p++;
270 const uint8_t* children = p + terminalSize;
271 if ( terminalSize != 0 ) {
272 EntryWithOffset e;
273 e.nodeOffset = p-start;
274 e.entry.name = strdup(cummulativeString);
275 e.entry.flags = read_uleb128(p, end);
276 e.entry.address = read_uleb128(p, end);
277 output.push_back(e);
278 }
279 const uint8_t childrenCount = *children++;
280 const uint8_t* s = children;
281 for (uint8_t i=0; i < childrenCount; ++i) {
282 int edgeStrLen = 0;
283 while (*s != '\0') {
284 cummulativeString[curStrOffset+edgeStrLen] = *s++;
285 ++edgeStrLen;
286 }
287 cummulativeString[curStrOffset+edgeStrLen] = *s++;
288 uint32_t childNodeOffet = read_uleb128(s, end);
289 processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output);
290 }
291 }
292
293
294 inline void parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output)
295 {
296 // empty trie has no entries
297 if ( start == end )
298 return;
299
300 char cummulativeString[4000];
301 std::vector<EntryWithOffset> entries;
302 processExportNode(start, start, end, cummulativeString, 0, entries);
303 // to preserve tie layout order, sort by node offset
304 std::sort(entries.begin(), entries.end());
305 // copy to output
306 output.reserve(entries.size());
307 for (std::vector<EntryWithOffset>::iterator it=entries.begin(); it != entries.end(); ++it)
308 output.push_back(it->entry);
309 }
310
311
312
313
314 }; // namespace trie
315 }; // namespace mach_o
316
317
318 #endif // __MACH_O_TRIE__
319
320