dyld-750.6.tar.gz
[apple/dyld.git] / launch-cache / MachOTrie.hpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- 
2  *
3  * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
4  *
5  * @APPLE_LICENSE_HEADER_START@
6  * 
7  * This file contains Original Code and/or Modifications of Original Code
8  * as defined in and that are subject to the Apple Public Source License
9  * Version 2.0 (the 'License'). You may not use this file except in
10  * compliance with the License. Please obtain a copy of the License at
11  * http://www.opensource.apple.com/apsl/ and read it before using this
12  * file.
13  * 
14  * The Original Code and all software distributed under the License are
15  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19  * Please see the License for the specific language governing rights and
20  * limitations under the License.
21  * 
22  * @APPLE_LICENSE_HEADER_END@
23 */
24 #ifndef __MACH_O_TRIE__
25 #define __MACH_O_TRIE__
26
27 #include <algorithm>
28 #include <vector>
29
30 #include "MachOFileAbstraction.hpp"
31
32
33 namespace mach_o {
34 namespace trie {
35
36 struct Edge
37 {
38                                         Edge(const char* s, struct Node* n) : fSubString(s), fChild(n) { }
39                                         ~Edge() {  }
40         const char*             fSubString;
41         struct Node*    fChild;
42         
43 };
44
45 struct Node
46 {
47                                                 Node(const char* s) : fCummulativeString(s), fAddress(0), fFlags(0), 
48                                                                                         fOther(0), fImportedName(NULL), fOrdered(false), 
49                                                                                         fHaveExportInfo(false), fTrieOffset(0) {}
50                                                 ~Node() { }
51         const char*                     fCummulativeString;
52         std::vector<Edge>       fChildren;
53         uint64_t                        fAddress;
54         uint64_t                        fFlags;
55         uint64_t                        fOther;
56         const char*                     fImportedName;
57         bool                            fOrdered;
58         bool                            fHaveExportInfo;
59         uint32_t                        fTrieOffset;
60         
61         void addSymbol(const char* fullStr, uint64_t address, uint64_t flags, uint64_t other, const char* importName) {
62                 const char* partialStr = &fullStr[strlen(fCummulativeString)];
63                 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
64                         Edge& e = *it;
65                         long subStringLen = strlen(e.fSubString);
66                         if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
67                                 // already have matching edge, go down that path
68                                 e.fChild->addSymbol(fullStr, address, flags, other, importName);
69                                 return;
70                         }
71                         else {
72                                 for (long i=subStringLen-1; i > 0; --i) {
73                                         if ( strncmp(e.fSubString, partialStr, i) == 0 ) {
74                                                 // found a common substring, splice in new node
75                                                 //  was A -> C,  now A -> B -> C
76                                                 char* bNodeCummStr = strdup(e.fChild->fCummulativeString);
77                                                 bNodeCummStr[strlen(bNodeCummStr)+i-subStringLen] = '\0';
78                                                 //node* aNode = this;
79                                                 Node* bNode = new Node(bNodeCummStr);
80                                                 Node* cNode = e.fChild;
81                                                 char* abEdgeStr = strdup(e.fSubString);
82                                                 abEdgeStr[i] = '\0';
83                                                 char* bcEdgeStr = strdup(&e.fSubString[i]);
84                                                 Edge& abEdge = e;
85                                                 abEdge.fSubString = abEdgeStr;
86                                                 abEdge.fChild = bNode;
87                                                 Edge bcEdge(bcEdgeStr, cNode);
88                                                 bNode->fChildren.push_back(bcEdge);
89                                                 bNode->addSymbol(fullStr, address, flags, other, importName);
90                                                 return;
91                                         }
92                                 }
93                         }
94                 }
95
96                 // no commonality with any existing child, make a new edge that is this whole string
97                 Node* newNode = new Node(strdup(fullStr));
98                 Edge newEdge(strdup(partialStr), newNode);
99                 fChildren.push_back(newEdge);
100                 newNode->fAddress = address;
101                 newNode->fFlags = flags;
102                 newNode->fOther = other;
103                 if ( (flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && (importName != NULL) && (strcmp(fullStr,importName) != 0) )
104                         newNode->fImportedName = importName;
105                 else
106                         newNode->fImportedName = NULL;
107                 newNode->fHaveExportInfo = true;
108         }
109         
110         void addOrderedNodes(const char* name, std::vector<Node*>& orderedNodes) {
111                 if ( !fOrdered ) {
112                         orderedNodes.push_back(this);
113                         //fprintf(stderr, "ordered %p %s\n", this, fCummulativeString);
114                         fOrdered = true;
115                 }
116                 const char* partialStr = &name[strlen(fCummulativeString)];
117                 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
118                         Edge& e = *it;
119                         long subStringLen = strlen(e.fSubString);
120                         if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
121                                 // already have matching edge, go down that path
122                                 e.fChild->addOrderedNodes(name, orderedNodes);
123                                 return;
124                         }
125                 }
126         }
127
128         // byte for terminal node size in bytes, or 0x00 if not terminal node
129         // teminal node (uleb128 flags, uleb128 addr [uleb128 other])
130         // byte for child node count
131         //  each child: zero terminated substring, uleb128 node offset
132         bool updateOffset(uint32_t& offset) {
133                 uint32_t nodeSize = 1; // length of export info when no export info
134                 if ( fHaveExportInfo ) {
135                         if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
136                                 nodeSize = uleb128_size(fFlags) + uleb128_size(fOther); // ordinal
137                                 if ( fImportedName != NULL )
138                                         nodeSize += strlen(fImportedName);
139                                 ++nodeSize; // trailing zero in imported name
140                         }
141                         else {
142                                 nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress);
143                                 if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
144                                         nodeSize += uleb128_size(fOther);
145                         }
146                         // do have export info, overall node size so far is uleb128 of export info + export info
147                         nodeSize += uleb128_size(nodeSize); 
148                 }
149                 // add children
150                 ++nodeSize; // byte for count of chidren
151                 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
152                         Edge& e = *it;
153                         nodeSize += strlen(e.fSubString) + 1 + uleb128_size(e.fChild->fTrieOffset);
154                 }
155                 bool result = (fTrieOffset != offset);
156                 fTrieOffset = offset;
157                 //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString);
158                 offset += nodeSize;
159                 // return true if fTrieOffset was changed
160                 return result;
161         }
162
163         void appendToStream(std::vector<uint8_t>& out) {
164                 if ( fHaveExportInfo ) {
165                         if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
166                                 if ( fImportedName != NULL ) {
167                                         // nodes with re-export info: size, flags, ordinal, string
168                                         uint32_t nodeSize = (uint32_t)(uleb128_size(fFlags) + uleb128_size(fOther) + strlen(fImportedName) + 1);
169                                         out.push_back(nodeSize);
170                                         append_uleb128(fFlags, out);
171                                         append_uleb128(fOther, out);
172                                         append_string(fImportedName, out);
173                                 }
174                                 else {
175                                         // nodes with re-export info: size, flags, ordinal, empty-string
176                                         uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + 1;
177                                         out.push_back(nodeSize);
178                                         append_uleb128(fFlags, out);
179                                         append_uleb128(fOther, out);
180                                         out.push_back(0);
181                                 }
182                         }
183                         else if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
184                                 // nodes with export info: size, flags, address, other
185                                 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress) + uleb128_size(fOther);
186                                 out.push_back(nodeSize);
187                                 append_uleb128(fFlags, out);
188                                 append_uleb128(fAddress, out);
189                                 append_uleb128(fOther, out);
190                         }
191                         else {
192                                 // nodes with export info: size, flags, address
193                                 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress);
194                                 out.push_back(nodeSize);
195                                 append_uleb128(fFlags, out);
196                                 append_uleb128(fAddress, out);
197                         }
198                 }
199                 else {
200                         // no export info uleb128 of zero is one byte of zero
201                         out.push_back(0);
202                 }
203                 // write number of children
204                 out.push_back(fChildren.size());
205                 // write each child
206                 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
207                         Edge& e = *it;
208                         append_string(e.fSubString, out);
209                         append_uleb128(e.fChild->fTrieOffset, out);
210                 }
211         }
212         
213 private:
214         static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) {
215                 uint8_t byte;
216                 do {
217                         byte = value & 0x7F;
218                         value &= ~0x7F;
219                         if ( value != 0 )
220                                 byte |= 0x80;
221                         out.push_back(byte);
222                         value = value >> 7;
223                 } while( byte >= 0x80 );
224         }
225         
226         static void append_string(const char* str, std::vector<uint8_t>& out) {
227                 for (const char* s = str; *s != '\0'; ++s)
228                         out.push_back(*s);
229                 out.push_back('\0');
230         }
231         
232         static unsigned int     uleb128_size(uint64_t value) {
233                 uint32_t result = 0;
234                 do {
235                         value = value >> 7;
236                         ++result;
237                 } while ( value != 0 );
238                 return result;
239         }
240         
241
242 };
243
244 inline uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end) {
245         uint64_t result = 0;
246         int              bit = 0;
247         do {
248                 if (p == end)
249 #if __EXCEPTIONS
250                         throw "malformed uleb128 extends beyond trie";
251 #else
252                         return result;
253 #endif
254                 uint64_t slice = *p & 0x7f;
255
256                 if (bit >= 64 || slice << bit >> bit != slice)
257 #if __EXCEPTIONS
258                         throw "uleb128 too big for 64-bits";
259 #else
260                         return result;
261 #endif
262                 else {
263                         result |= (slice << bit);
264                         bit += 7;
265                 }
266         } 
267         while (*p++ & 0x80);
268         return result;
269 }
270         
271
272
273 struct Entry
274 {
275         const char*             name;
276         uint64_t                address;
277         uint64_t                flags;
278         uint64_t                other;
279         const char*             importName;
280 };
281
282
283
284 inline void makeTrie(const std::vector<Entry>& entries, std::vector<uint8_t>& output)
285 {
286         Node start(strdup(""));
287         
288         // make nodes for all exported symbols
289         for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
290                 start.addSymbol(it->name, it->address, it->flags, it->other, it->importName);
291         }
292
293         // create vector of nodes
294         std::vector<Node*> orderedNodes;
295         orderedNodes.reserve(entries.size()*2);
296         for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
297                 start.addOrderedNodes(it->name, orderedNodes);
298         }
299         
300         // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized
301         bool more;
302         do {
303                 uint32_t offset = 0;
304                 more = false;
305                 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
306                         if ( (*it)->updateOffset(offset) )
307                                 more = true;
308                 }
309         } while ( more );
310         
311         // create trie stream
312         for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
313                 (*it)->appendToStream(output);
314         }
315 }
316
317 struct EntryWithOffset
318 {
319         uintptr_t               nodeOffset;
320         Entry                   entry;
321         
322         bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); }
323 };
324
325
326
327 static inline void processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end, 
328                                                                         char* cummulativeString, int curStrOffset, 
329                                                                         std::vector<EntryWithOffset>& output) 
330 {
331         if ( p >= end )
332 #if __EXCEPTIONS
333                 throw "malformed trie, node past end";
334 #else
335                 return;
336 #endif
337         const uint8_t terminalSize = read_uleb128(p, end);
338         const uint8_t* children = p + terminalSize;
339         if ( terminalSize != 0 ) {
340                 EntryWithOffset e;
341                 e.nodeOffset = p-start;
342                 e.entry.name = strdup(cummulativeString);
343                 e.entry.flags = read_uleb128(p, end);
344                 if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
345                         e.entry.address = 0;
346                         e.entry.other = read_uleb128(p, end); // dylib ordinal
347                         e.entry.importName = (char*)p;
348                 }
349                 else {
350                         e.entry.address = read_uleb128(p, end); 
351                         if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
352                                 e.entry.other = read_uleb128(p, end); 
353                         else
354                                 e.entry.other = 0;
355                         e.entry.importName = NULL;
356                 }
357                 output.push_back(e);
358         }
359         const uint8_t childrenCount = *children++;
360         const uint8_t* s = children;
361         for (uint8_t i=0; i < childrenCount; ++i) {
362                 int edgeStrLen = 0;
363                 while (*s != '\0') {
364                         cummulativeString[curStrOffset+edgeStrLen] = *s++;
365                         ++edgeStrLen;
366                 }
367                 cummulativeString[curStrOffset+edgeStrLen] = *s++;
368                 uint32_t childNodeOffet = (uint32_t)read_uleb128(s, end);
369                 processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output);        
370         }
371 }
372
373
374 inline void parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output)
375 {
376         // empty trie has no entries
377         if ( start == end )
378                 return;
379         char cummulativeString[32000];
380         std::vector<EntryWithOffset> entries;
381         processExportNode(start, start, end, cummulativeString, 0, entries);
382         // to preserve tie layout order, sort by node offset
383         std::sort(entries.begin(), entries.end());
384         // copy to output
385         output.reserve(entries.size());
386         for (std::vector<EntryWithOffset>::iterator it=entries.begin(); it != entries.end(); ++it)
387                 output.push_back(it->entry);
388 }
389
390
391
392
393 }; // namespace trie
394 }; // namespace mach_o
395
396
397 #endif  // __MACH_O_TRIE__
398
399