]> git.saurik.com Git - apple/dyld.git/blob - dyld3/shared-cache/Trie.hpp
dyld-750.5.tar.gz
[apple/dyld.git] / dyld3 / shared-cache / Trie.hpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 /*
26 //This is the exposed iterface for the Trie template
27 //TODO: add erase methods
28 //TODO: re-enable iterators
29
30 template <typename V>
31 struct Trie {
32 struct Entry
33 {
34 std::string name;
35 V info;
36
37 Entry(void) {}
38 Entry(const std::string& N, V I) : name(N), info(I) {}
39 };
40
41 struct const_iterator : std::iterator<std::bidirectional_iterator_tag, const Entry>;
42
43 const_iterator begin() const;
44 const_iterator end() const;
45
46 Trie(void);
47 Trie(const uint8_t* start, const uint8_t* end);
48 Trie(const std::vector<Entry>& entries);
49
50 void emit(std::vector<uint8_t>& output);
51 */
52
53
54 #ifndef __TRIE__
55 #define __TRIE__
56 #define TRIE_DEBUG (0)
57
58 #include <algorithm>
59 #include <vector>
60 #include <memory>
61 #include <string>
62 #include <map>
63 #include <iterator>
64
65 #include <mach-o/loader.h>
66
67 #if __cplusplus <= 201103L
68 namespace std {
69 template<typename T, typename... Args>
70 std::unique_ptr<T> make_unique(Args&&... args)
71 {
72 return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
73 }
74 }
75 #endif
76
77 namespace TrieUtils {
78
79 static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) {
80 uint8_t byte;
81 do {
82 byte = value & 0x7F;
83 value &= ~0x7F;
84 if ( value != 0 )
85 byte |= 0x80;
86 out.push_back(byte);
87 value = value >> 7;
88 } while( byte >= 0x80 );
89 }
90
91 static void append_string(std::string str, std::vector<uint8_t>& out) {
92 for(char& c : str)
93 out.push_back(c);
94 out.push_back('\0');
95 }
96
97 static unsigned int uleb128_size(uint64_t value) {
98 uint32_t result = 0;
99 do {
100 value = value >> 7;
101 ++result;
102 } while ( value != 0 );
103 return result;
104 }
105
106 static
107 inline bool parse_uleb128(const uint8_t*& p, const uint8_t* end, uint64_t& result) {
108 result = 0;
109 int bit = 0;
110 do {
111 if (p == end)
112 return false; // malformed uleb128 extends beyond input
113 uint64_t slice = *p & 0x7f;
114
115 if (bit >= 64 || slice << bit >> bit != slice)
116 return false; // malformed
117 else {
118 result |= (slice << bit);
119 bit += 7;
120 }
121 }
122 while (*p++ & 0x80);
123 return true;
124 }
125 };
126
127 template <typename V>
128 struct Trie {
129 uint32_t count;
130 uint32_t nodeCount;
131
132 struct Entry
133 {
134 std::string name;
135 V info;
136
137 Entry(void) {}
138 Entry(const std::string& N, V I) : name(N), info(I) {}
139 };
140
141 Trie(const std::vector<Entry>& entries) : count(0), nodeCount(1) {
142 // make nodes for all exported symbols
143 for (auto& entry : entries) {
144 addEntry(entry);
145 }
146 }
147
148 void emit(std::vector<uint8_t>& output) {
149 // create vector of nodes
150 std::vector<Node*> orderedNodes;
151 orderedNodes.reserve(nodeCount);
152 orderTrie(&root, orderedNodes);
153
154 // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized
155 bool more;
156 do {
157 uint32_t offset = 0;
158 more = false;
159 for (auto& node : orderedNodes) {
160 if (node->updateOffset(offset)) {
161 more = true;
162 }
163 }
164 } while ( more );
165
166 // create trie stream
167 for (auto& node : orderedNodes) {
168 node->appendToStream(output);
169 }
170 }
171
172 static
173 inline bool parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output)
174 {
175 // empty trie has no entries
176 if ( start == end )
177 return false;
178 char cummulativeString[32768];
179 std::vector<EntryWithOffset> entries;
180 if ( !processExportNode(start, start, end, cummulativeString, 0, entries) )
181 return false;
182 // to preserve tie layout order, sort by node offset
183 std::sort(entries.begin(), entries.end());
184 // copy to output
185 output.reserve(entries.size());
186 for (auto& entryWithOffset : entries) {
187 output.push_back(entryWithOffset.entry);
188 }
189 return true;
190 }
191
192 private:
193 struct Node
194 {
195 //This needs to be a map to unsure deterministic ordering of tries.
196 std::map<std::string,std::unique_ptr<Node> > fChildren;
197 bool fIsTerminal;
198 uint32_t fTrieOffset;
199 V fInfo;
200
201 Node(void) : fIsTerminal(false), fTrieOffset(0) {}
202 Node(V v) :fInfo(v), fIsTerminal(true), fTrieOffset(0) {}
203 Node(Node&&) = default;
204
205 // byte for terminal node size in bytes, or 0x00 if not terminal node
206 // teminal node (uleb128 flags, uleb128 addr [uleb128 other])
207 // byte for child node count
208 // each child: zero terminated substring, uleb128 node offset
209 bool updateOffset(uint32_t& offset) {
210 uint32_t nodeSize = 1; // length of export info when no export info
211 if ( fIsTerminal ) {
212 nodeSize = fInfo.encodedSize();
213 // do have export info, overall node size so far is uleb128 of export info + export info
214 nodeSize += TrieUtils::uleb128_size(nodeSize);
215 }
216 // add children
217 ++nodeSize; // byte for count of chidren
218
219 for (auto &edge : fChildren) {
220 nodeSize += edge.first.length() + 1 + TrieUtils::uleb128_size(edge.second->fTrieOffset);
221 }
222
223 bool result = (fTrieOffset != offset);
224 fTrieOffset = offset;
225 //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString);
226 offset += nodeSize;
227 // return true if fTrieOffset was changed
228 return result;
229 }
230
231 void appendToStream(std::vector<uint8_t>& out) {
232 if ( fIsTerminal ) {
233 fInfo.appendToStream(out);
234 }
235 else {
236 // no export info uleb128 of zero is one byte of zero
237 out.push_back(0);
238 }
239 // write number of children
240 out.push_back(fChildren.size());
241 // write each child
242 for (auto &edge : fChildren) {
243 TrieUtils::append_string(edge.first, out);
244 TrieUtils::append_uleb128(edge.second->fTrieOffset, out);
245 }
246 }
247 };
248
249 Node root;
250
251 struct EntryWithOffset
252 {
253 uintptr_t nodeOffset;
254 Entry entry;
255
256 bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); }
257 };
258
259 void addEntry(const std::string& fullStr, std::string::const_iterator start, V v) {
260 Node *currentNode = &root;
261 bool done = false;
262
263 while (!done && !currentNode->fChildren.empty() ) {
264 done = true;
265
266 for (auto &entry : currentNode->fChildren) {
267 auto res = std::mismatch(entry.first.begin(), entry.first.end(), start);
268
269 if (res.first == entry.first.end()) {
270 //Matched a full edge, go down it
271 done = false;
272 currentNode = entry.second.get();
273 start = res.second;
274 break;
275 } else if (res.first != entry.first.begin()) {
276 // found a common substring, splice in new node
277 // was A -> C, now A -> B -> C
278
279 //Build the new strings
280 std::string abEdgeStr(entry.first.begin(), res.first);
281 std::string bcEdgeStr(res.first, entry.first.end());
282
283 //Copy out the exist node and delete it from the currentNode
284 std::unique_ptr<Node> nodeC;
285 std::swap(nodeC, entry.second);
286 currentNode->fChildren.erase(entry.first);
287
288 //Build the new node and insert it
289 std::unique_ptr<Node> nodeB = std::make_unique<Node>();
290 Node *newNode = nodeB.get();
291
292 nodeB->fChildren.insert(std::make_pair(bcEdgeStr, std::move(nodeC)));
293 currentNode->fChildren.insert(std::make_pair(abEdgeStr, std::move(nodeB)));
294
295 currentNode = newNode;
296 start = res.second;
297 ++nodeCount;
298 break;
299 }
300 }
301 }
302
303 // no commonality with any existing child, make a new edge that is this whole string
304 std::string edgeStr(start, fullStr.end());
305 v.willInsertAs(fullStr);
306
307 if (edgeStr.empty()) {
308 currentNode->fIsTerminal = true;
309 currentNode->fInfo = v;
310 } else {
311 currentNode->fChildren.emplace(edgeStr, std::make_unique<Node>(v));
312 ++nodeCount;
313 }
314 ++count;
315 }
316
317 void addEntry(Entry entry) {
318 addEntry(entry.name, entry.name.begin(), entry.info);
319 }
320
321 #if TRIE_DEBUG
322 void printTrie(Node& node, std::string cummulativeString) {
323 if (node.fTerminal) {
324 printf("%s: \n", cummulativeString.c_str());
325 }
326 for (auto &edge : node.fChildren) {
327 printTrie(*edge.second, cummulativeString+edge.first);
328 }
329 }
330
331 public:
332 void printTrie(void) {
333 printTrie(root, "");
334 }
335 private:
336 #endif
337
338 static inline bool processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
339 char* cummulativeString, int curStrOffset,
340 std::vector<EntryWithOffset>& output)
341 {
342 if ( p >= end )
343 return false;
344 uint64_t terminalSize;
345 if ( !TrieUtils::parse_uleb128(p, end, terminalSize) )
346 return false;
347 const uint8_t* children = p + terminalSize;
348 if ( children >= end )
349 return false;
350 if ( terminalSize != 0 ) {
351 EntryWithOffset e;
352 e.nodeOffset = p-start;
353 e.entry.name = cummulativeString;
354 e.entry.info.loadData(p,end);
355 output.push_back(e);
356 }
357 const uint8_t childrenCount = *children++;
358 const uint8_t* s = children;
359 for (uint8_t i=0; i < childrenCount; ++i) {
360 int edgeStrLen = 0;
361 while (*s != '\0') {
362 cummulativeString[curStrOffset+edgeStrLen] = *s++;
363 ++edgeStrLen;
364 }
365 cummulativeString[curStrOffset+edgeStrLen] = *s++;
366 uint64_t childNodeOffet;
367 if ( !TrieUtils::parse_uleb128(s, end, childNodeOffet) )
368 return false;
369 if ( !processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output) )
370 return false;
371 }
372 return true;
373 }
374
375 void orderTrie(Node* node, std::vector<Node*>& orderedNodes) {
376 orderedNodes.push_back(node);
377 for (auto &edge : node->fChildren) {
378 orderTrie(edge.second.get(), orderedNodes);
379 }
380 }
381 }; // struct Trie
382
383 struct ExportInfo {
384 uint64_t address;
385 uint64_t flags;
386 uint64_t other;
387 std::string importName;
388
389 ExportInfo() : address(0), flags(0), other(0) { }
390
391 uint32_t encodedSize(void) {
392 uint32_t size = 0;
393 if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
394 size = TrieUtils::uleb128_size(flags) + TrieUtils::uleb128_size(other); // ordinal
395 if ( !importName.empty() )
396 size += importName.length();
397 ++size; // trailing zero in imported name
398 }
399 else {
400 size = TrieUtils::uleb128_size(flags) + TrieUtils::uleb128_size(address);
401 if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
402 size += TrieUtils::uleb128_size(other);
403 }
404 return size;
405 }
406
407 void appendToStream(std::vector<uint8_t>& out) {
408 if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
409 if ( !importName.empty() ) {
410 // nodes with re-export info: size, flags, ordinal, string
411 uint32_t nodeSize = (uint32_t)(TrieUtils::uleb128_size(flags) + TrieUtils::uleb128_size(other) + importName.length() + 1);
412 out.push_back(nodeSize);
413 TrieUtils::append_uleb128(flags, out);
414 TrieUtils::append_uleb128(other, out);
415 TrieUtils::append_string(importName, out);
416 }
417 else {
418 // nodes with re-export info: size, flags, ordinal, empty-string
419 uint32_t nodeSize = TrieUtils::uleb128_size(flags) + TrieUtils::uleb128_size(other) + 1;
420 out.push_back(nodeSize);
421 TrieUtils::append_uleb128(flags, out);
422 TrieUtils::append_uleb128(other, out);
423 out.push_back(0);
424 }
425 }
426 else if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
427 // nodes with export info: size, flags, address, other
428 uint32_t nodeSize = TrieUtils::uleb128_size(flags) + TrieUtils::uleb128_size(address) + TrieUtils::uleb128_size(other);
429 out.push_back(nodeSize);
430 TrieUtils::append_uleb128(flags, out);
431 TrieUtils::append_uleb128(address, out);
432 TrieUtils::append_uleb128(other, out);
433 }
434 else {
435 // nodes with export info: size, flags, address
436 uint32_t nodeSize = TrieUtils::uleb128_size(flags) + TrieUtils::uleb128_size(address);
437 out.push_back(nodeSize);
438 TrieUtils::append_uleb128(flags, out);
439 TrieUtils::append_uleb128(address, out);
440 }
441 }
442
443 void loadData(const uint8_t* p, const uint8_t* const end) {
444 TrieUtils::parse_uleb128(p, end, flags);
445 if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
446 TrieUtils::parse_uleb128(p, end, other); // dylib ordinal
447 importName = (char*)p;
448 }
449 else {
450 TrieUtils::parse_uleb128(p, end, address);
451 if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
452 TrieUtils::parse_uleb128(p, end, other);
453 }
454 }
455
456 void willInsertAs(const std::string& name) {
457 // Symbols re-exported under the same name do not need an explict import name, delete it to save space
458 if ((name == importName) ) {
459 importName = "";
460 }
461 }
462 };
463
464 typedef Trie<ExportInfo> ExportInfoTrie;
465
466
467 // Used by accelerator tables in dyld shared cache
468 struct DylibIndex {
469 uint32_t index;
470
471 DylibIndex() : index(0) {}
472 DylibIndex(uint32_t i) : index(i) {}
473
474 uint32_t encodedSize(void) {
475 return TrieUtils::uleb128_size(index);
476 }
477
478 void appendToStream(std::vector<uint8_t>& out) {
479 uint32_t nodeSize = TrieUtils::uleb128_size(index);
480 out.push_back(nodeSize);
481 TrieUtils::append_uleb128(index, out);
482 }
483
484 void loadData(const uint8_t* p, const uint8_t* const end) {
485 uint64_t temp;
486 TrieUtils::parse_uleb128(p, end, temp);
487 index = (uint32_t)temp;
488 }
489
490 void willInsertAs(const std::string& name) {
491 }
492 };
493 typedef Trie<DylibIndex> DylibIndexTrie;
494
495
496 #endif // __TRIE__
497
498