1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
24 #ifndef __MACH_O_TRIE__
25 #define __MACH_O_TRIE__
30 #include "MachOFileAbstraction.hpp"
38 Edge(const char* s, struct Node* n) : fSubString(s), fChild(n) { }
40 const char* fSubString;
47 Node(const char* s) : fCummulativeString(s), fAddress(0), fFlags(0),
48 fOther(0), fImportedName(NULL), fOrdered(false),
49 fHaveExportInfo(false), fTrieOffset(0) {}
51 const char* fCummulativeString;
52 std::vector<Edge> fChildren;
56 const char* fImportedName;
61 void addSymbol(const char* fullStr, uint64_t address, uint64_t flags, uint64_t other, const char* importName) {
62 const char* partialStr = &fullStr[strlen(fCummulativeString)];
63 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
65 long subStringLen = strlen(e.fSubString);
66 if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
67 // already have matching edge, go down that path
68 e.fChild->addSymbol(fullStr, address, flags, other, importName);
72 for (long i=subStringLen-1; i > 0; --i) {
73 if ( strncmp(e.fSubString, partialStr, i) == 0 ) {
74 // found a common substring, splice in new node
75 // was A -> C, now A -> B -> C
76 char* bNodeCummStr = strdup(e.fChild->fCummulativeString);
77 bNodeCummStr[strlen(bNodeCummStr)+i-subStringLen] = '\0';
79 Node* bNode = new Node(bNodeCummStr);
80 Node* cNode = e.fChild;
81 char* abEdgeStr = strdup(e.fSubString);
83 char* bcEdgeStr = strdup(&e.fSubString[i]);
85 abEdge.fSubString = abEdgeStr;
86 abEdge.fChild = bNode;
87 Edge bcEdge(bcEdgeStr, cNode);
88 bNode->fChildren.push_back(bcEdge);
89 bNode->addSymbol(fullStr, address, flags, other, importName);
96 // no commonality with any existing child, make a new edge that is this whole string
97 Node* newNode = new Node(strdup(fullStr));
98 Edge newEdge(strdup(partialStr), newNode);
99 fChildren.push_back(newEdge);
100 newNode->fAddress = address;
101 newNode->fFlags = flags;
102 newNode->fOther = other;
103 if ( (flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && (importName != NULL) && (strcmp(fullStr,importName) != 0) )
104 newNode->fImportedName = importName;
106 newNode->fImportedName = NULL;
107 newNode->fHaveExportInfo = true;
110 void addOrderedNodes(const char* name, std::vector<Node*>& orderedNodes) {
112 orderedNodes.push_back(this);
113 //fprintf(stderr, "ordered %p %s\n", this, fCummulativeString);
116 const char* partialStr = &name[strlen(fCummulativeString)];
117 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
119 long subStringLen = strlen(e.fSubString);
120 if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
121 // already have matching edge, go down that path
122 e.fChild->addOrderedNodes(name, orderedNodes);
128 // byte for terminal node size in bytes, or 0x00 if not terminal node
129 // teminal node (uleb128 flags, uleb128 addr [uleb128 other])
130 // byte for child node count
131 // each child: zero terminated substring, uleb128 node offset
132 bool updateOffset(uint32_t& offset) {
133 uint32_t nodeSize = 1; // length of export info when no export info
134 if ( fHaveExportInfo ) {
135 if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
136 nodeSize = uleb128_size(fFlags) + uleb128_size(fOther); // ordinal
137 if ( fImportedName != NULL )
138 nodeSize += strlen(fImportedName);
139 ++nodeSize; // trailing zero in imported name
142 nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress);
143 if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
144 nodeSize += uleb128_size(fOther);
146 // do have export info, overall node size so far is uleb128 of export info + export info
147 nodeSize += uleb128_size(nodeSize);
150 ++nodeSize; // byte for count of chidren
151 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
153 nodeSize += strlen(e.fSubString) + 1 + uleb128_size(e.fChild->fTrieOffset);
155 bool result = (fTrieOffset != offset);
156 fTrieOffset = offset;
157 //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString);
159 // return true if fTrieOffset was changed
163 void appendToStream(std::vector<uint8_t>& out) {
164 if ( fHaveExportInfo ) {
165 if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
166 if ( fImportedName != NULL ) {
167 // nodes with re-export info: size, flags, ordinal, string
168 uint32_t nodeSize = (uint32_t)(uleb128_size(fFlags) + uleb128_size(fOther) + strlen(fImportedName) + 1);
169 out.push_back(nodeSize);
170 append_uleb128(fFlags, out);
171 append_uleb128(fOther, out);
172 append_string(fImportedName, out);
175 // nodes with re-export info: size, flags, ordinal, empty-string
176 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + 1;
177 out.push_back(nodeSize);
178 append_uleb128(fFlags, out);
179 append_uleb128(fOther, out);
183 else if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
184 // nodes with export info: size, flags, address, other
185 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress) + uleb128_size(fOther);
186 out.push_back(nodeSize);
187 append_uleb128(fFlags, out);
188 append_uleb128(fAddress, out);
189 append_uleb128(fOther, out);
192 // nodes with export info: size, flags, address
193 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress);
194 out.push_back(nodeSize);
195 append_uleb128(fFlags, out);
196 append_uleb128(fAddress, out);
200 // no export info uleb128 of zero is one byte of zero
203 // write number of children
204 out.push_back(fChildren.size());
206 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
208 append_string(e.fSubString, out);
209 append_uleb128(e.fChild->fTrieOffset, out);
214 static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) {
223 } while( byte >= 0x80 );
226 static void append_string(const char* str, std::vector<uint8_t>& out) {
227 for (const char* s = str; *s != '\0'; ++s)
232 static unsigned int uleb128_size(uint64_t value) {
237 } while ( value != 0 );
244 inline uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end) {
250 throw "malformed uleb128 extends beyond trie";
254 uint64_t slice = *p & 0x7f;
256 if (bit >= 64 || slice << bit >> bit != slice)
258 throw "uleb128 too big for 64-bits";
263 result |= (slice << bit);
279 const char* importName;
284 inline void makeTrie(const std::vector<Entry>& entries, std::vector<uint8_t>& output)
286 Node start(strdup(""));
288 // make nodes for all exported symbols
289 for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
290 start.addSymbol(it->name, it->address, it->flags, it->other, it->importName);
293 // create vector of nodes
294 std::vector<Node*> orderedNodes;
295 orderedNodes.reserve(entries.size()*2);
296 for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
297 start.addOrderedNodes(it->name, orderedNodes);
300 // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized
305 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
306 if ( (*it)->updateOffset(offset) )
311 // create trie stream
312 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
313 (*it)->appendToStream(output);
317 struct EntryWithOffset
319 uintptr_t nodeOffset;
322 bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); }
327 static inline void processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
328 char* cummulativeString, int curStrOffset,
329 std::vector<EntryWithOffset>& output)
333 throw "malformed trie, node past end";
337 const uint8_t terminalSize = read_uleb128(p, end);
338 const uint8_t* children = p + terminalSize;
339 if ( terminalSize != 0 ) {
341 e.nodeOffset = p-start;
342 e.entry.name = strdup(cummulativeString);
343 e.entry.flags = read_uleb128(p, end);
344 if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
346 e.entry.other = read_uleb128(p, end); // dylib ordinal
347 e.entry.importName = (char*)p;
350 e.entry.address = read_uleb128(p, end);
351 if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
352 e.entry.other = read_uleb128(p, end);
355 e.entry.importName = NULL;
359 const uint8_t childrenCount = *children++;
360 const uint8_t* s = children;
361 for (uint8_t i=0; i < childrenCount; ++i) {
364 cummulativeString[curStrOffset+edgeStrLen] = *s++;
367 cummulativeString[curStrOffset+edgeStrLen] = *s++;
368 uint32_t childNodeOffet = (uint32_t)read_uleb128(s, end);
369 processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output);
374 inline void parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output)
376 // empty trie has no entries
379 char cummulativeString[32000];
380 std::vector<EntryWithOffset> entries;
381 processExportNode(start, start, end, cummulativeString, 0, entries);
382 // to preserve tie layout order, sort by node offset
383 std::sort(entries.begin(), entries.end());
385 output.reserve(entries.size());
386 for (std::vector<EntryWithOffset>::iterator it=entries.begin(); it != entries.end(); ++it)
387 output.push_back(it->entry);
394 }; // namespace mach_o
397 #endif // __MACH_O_TRIE__