dyld-832.7.1.tar.gz
[apple/dyld.git] / dyld3 / shared-cache / Trie.hpp
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2  *
3  * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
4  *
5  * @APPLE_LICENSE_HEADER_START@
6  *
7  * This file contains Original Code and/or Modifications of Original Code
8  * as defined in and that are subject to the Apple Public Source License
9  * Version 2.0 (the 'License'). You may not use this file except in
10  * compliance with the License. Please obtain a copy of the License at
11  * http://www.opensource.apple.com/apsl/ and read it before using this
12  * file.
13  *
14  * The Original Code and all software distributed under the License are
15  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19  * Please see the License for the specific language governing rights and
20  * limitations under the License.
21  *
22  * @APPLE_LICENSE_HEADER_END@
23  */
24
25 /*
26  //This is the exposed iterface for the Trie template
27  //TODO: add erase methods
28  //TODO: re-enable iterators
29
30  template <typename V>
31  struct Trie {
32         struct Entry
33         {
34     std::string         name;
35     V                           info;
36
37     Entry(void) {}
38     Entry(const std::string& N, V I) : name(N), info(I) {}
39   };
40
41  struct const_iterator : std::iterator<std::bidirectional_iterator_tag, const Entry>;
42
43  const_iterator begin() const;
44  const_iterator end() const;
45
46  Trie(void);
47  Trie(const uint8_t* start, const uint8_t* end);
48  Trie(const std::vector<Entry>& entries);
49
50  void emit(std::vector<uint8_t>& output);
51  */
52
53
54 #ifndef __TRIE__
55 #define __TRIE__
56 #define TRIE_DEBUG (0)
57
58 #include <algorithm>
59 #include <vector>
60 #include <memory>
61 #include <string>
62 #include <map>
63 #include <iterator>
64
65 #include <mach-o/loader.h>
66
67 #if __cplusplus <= 201103L
68 namespace std {
69         template<typename T, typename... Args>
70         std::unique_ptr<T> make_unique(Args&&... args)
71         {
72                 return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
73         }
74 }
75 #endif
76
77 namespace TrieUtils {
78
79 static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) {
80         uint8_t byte;
81         do {
82                 byte = value & 0x7F;
83                 value &= ~0x7F;
84                 if ( value != 0 )
85                         byte |= 0x80;
86                 out.push_back(byte);
87                 value = value >> 7;
88         } while( byte >= 0x80 );
89 }
90
91 static void append_string(std::string str, std::vector<uint8_t>& out) {
92         for(char& c : str)
93                 out.push_back(c);
94         out.push_back('\0');
95 }
96
97 static unsigned int     uleb128_size(uint64_t value) {
98         uint32_t result = 0;
99         do {
100                 value = value >> 7;
101                 ++result;
102         } while ( value != 0 );
103         return result;
104 }
105
106 static
107 inline bool parse_uleb128(const uint8_t*& p, const uint8_t* end, uint64_t& result) {
108         result = 0;
109         int              bit = 0;
110         do {
111                 if (p == end)
112                         return false; // malformed uleb128 extends beyond input
113                 uint64_t slice = *p & 0x7f;
114
115                 if (bit >= 64 || slice << bit >> bit != slice)
116                         return false; // malformed
117                 else {
118                         result |= (slice << bit);
119                         bit += 7;
120                 }
121         }
122         while (*p++ & 0x80);
123         return true;
124 }
125 };
126
127 template <typename V>
128 struct Trie {
129         uint32_t count;
130         uint32_t nodeCount;
131
132         struct Entry
133         {
134                 std::string             name;
135                 V                               info;
136
137                 Entry(void) {}
138                 Entry(const std::string& N, V I) : name(N), info(I) {}
139         };
140
141         Trie(const std::vector<Entry>& entries) : count(0), nodeCount(1) {
142                 // make nodes for all exported symbols
143                 for (auto& entry : entries) {
144                         addEntry(entry);
145                 }
146         }
147
148         void emit(std::vector<uint8_t>& output) {
149                 // create vector of nodes
150                 std::vector<Node*> orderedNodes;
151                 orderedNodes.reserve(nodeCount);
152                 orderTrie(&root, orderedNodes);
153
154                 // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized
155                 bool more;
156                 do {
157                         uint32_t offset = 0;
158                         more = false;
159                         for (auto& node : orderedNodes) {
160                                 if (node->updateOffset(offset)) {
161                                         more = true;
162                                 }
163                         }
164                 } while ( more );
165
166                 // create trie stream
167                 for (auto& node : orderedNodes) {
168                         node->appendToStream(output);
169                 }
170         }
171
172         static
173         inline bool parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output)
174         {
175                 // empty trie has no entries
176                 if ( start == end )
177                         return false;
178                 char cummulativeString[32768];
179                 std::vector<EntryWithOffset> entries;
180                 if ( !processExportNode(start, start, end, cummulativeString, 0, entries) )
181                         return false;
182                 // to preserve tie layout order, sort by node offset
183                 std::sort(entries.begin(), entries.end());
184                 // copy to output
185                 output.reserve(entries.size());
186                 for (auto& entryWithOffset : entries) {
187                         output.push_back(entryWithOffset.entry);
188                 }
189                 return true;
190         }
191
192 private:
193         struct Node
194         {
195                 //This needs to be a map to unsure deterministic ordering of tries.
196                 std::map<std::string,std::unique_ptr<Node> > fChildren;
197                 bool                            fIsTerminal;
198                 uint32_t                        fTrieOffset;
199                 V                                       fInfo;
200
201                 Node(void) : fIsTerminal(false), fTrieOffset(0) {}
202                 Node(V v) :fInfo(v), fIsTerminal(true), fTrieOffset(0) {}
203                 Node(Node&&) = default;
204
205                 // byte for terminal node size in bytes, or 0x00 if not terminal node
206                 // teminal node (uleb128 flags, uleb128 addr [uleb128 other])
207                 // byte for child node count
208                 //  each child: zero terminated substring, uleb128 node offset
209                 bool updateOffset(uint32_t& offset) {
210                         uint32_t nodeSize = 1; // length of export info when no export info
211                         if ( fIsTerminal ) {
212                                 nodeSize = fInfo.encodedSize();
213                                 // do have export info, overall node size so far is uleb128 of export info + export info
214                                 nodeSize += TrieUtils::uleb128_size(nodeSize);
215                         }
216                         // add children
217                         ++nodeSize; // byte for count of chidren
218
219                         for (auto &edge : fChildren) {
220                                 nodeSize += edge.first.length() + 1 + TrieUtils::uleb128_size(edge.second->fTrieOffset);
221                         }
222
223                         bool result = (fTrieOffset != offset);
224                         fTrieOffset = offset;
225                         //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString);
226                         offset += nodeSize;
227                         // return true if fTrieOffset was changed
228                         return result;
229                 }
230
231                 void appendToStream(std::vector<uint8_t>& out) {
232                         if ( fIsTerminal ) {
233                                 fInfo.appendToStream(out);
234                         }
235                         else {
236                                 // no export info uleb128 of zero is one byte of zero
237                                 out.push_back(0);
238                         }
239                         // write number of children
240                         out.push_back(fChildren.size());
241                         // write each child
242                         for (auto &edge : fChildren) {
243                                 TrieUtils::append_string(edge.first, out);
244                                 TrieUtils::append_uleb128(edge.second->fTrieOffset, out);
245                         }
246                 }
247         };
248
249         Node root;
250
251         struct EntryWithOffset
252         {
253                 uintptr_t               nodeOffset;
254                 Entry                   entry;
255
256                 bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); }
257         };
258
259         void addEntry(const std::string& fullStr, std::string::const_iterator start, V v) {
260                 Node *currentNode = &root;
261                 bool done = false;
262
263                 while (!done && !currentNode->fChildren.empty() ) {
264                         done = true;
265
266                         for (auto &entry : currentNode->fChildren) {
267                                 auto res = std::mismatch(entry.first.begin(), entry.first.end(), start);
268
269                                 if (res.first ==  entry.first.end()) {
270                                         //Matched a full edge, go down it
271                                         done = false;
272                                         currentNode = entry.second.get();
273                                         start = res.second;
274                                         break;
275                                 } else if (res.first != entry.first.begin()) {
276                                         // found a common substring, splice in new node
277                                         //  was A -> C,  now A -> B -> C
278
279                                         //Build the new strings
280                                         std::string abEdgeStr(entry.first.begin(), res.first);
281                                         std::string bcEdgeStr(res.first, entry.first.end());
282
283                                         //Copy out the exist node and delete it from the currentNode
284                                         std::unique_ptr<Node> nodeC;
285                                         std::swap(nodeC, entry.second);
286                                         currentNode->fChildren.erase(entry.first);
287
288                                         //Build the new node and insert it
289                                         std::unique_ptr<Node> nodeB = std::make_unique<Node>();
290                                         Node *newNode = nodeB.get();
291
292                                         nodeB->fChildren.insert(std::make_pair(bcEdgeStr, std::move(nodeC)));
293                                         currentNode->fChildren.insert(std::make_pair(abEdgeStr, std::move(nodeB)));
294
295                                         currentNode = newNode;
296                                         start = res.second;
297                                         ++nodeCount;
298                                         break;
299                                 }
300                         }
301                 }
302
303                 // no commonality with any existing child, make a new edge that is this whole string
304                 std::string edgeStr(start, fullStr.end());
305                 v.willInsertAs(fullStr);
306
307                 if (edgeStr.empty()) {
308                         currentNode->fIsTerminal = true;
309                         currentNode->fInfo = v;
310                 } else {
311                         currentNode->fChildren.emplace(edgeStr, std::make_unique<Node>(v));
312                         ++nodeCount;
313                 }
314                 ++count;
315         }
316
317         void addEntry(Entry entry) {
318                 addEntry(entry.name, entry.name.begin(), entry.info);
319         }
320
321 #if TRIE_DEBUG
322         void printTrie(Node& node, std::string cummulativeString) {
323                 if (node.fTerminal) {
324                         printf("%s: \n", cummulativeString.c_str());
325                 }
326                 for (auto &edge : node.fChildren) {
327                         printTrie(*edge.second, cummulativeString+edge.first);
328                 }
329         }
330
331 public:
332         void printTrie(void) {
333                 printTrie(root, "");
334         }
335 private:
336 #endif
337
338         static inline bool processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
339                                                                                  char* cummulativeString, int curStrOffset,
340                                                                                  std::vector<EntryWithOffset>& output)
341         {
342                 if ( p >= end )
343                         return false;
344                 uint64_t terminalSize;
345                 if  ( !TrieUtils::parse_uleb128(p, end, terminalSize) )
346                         return false;
347                 const uint8_t* children = p + terminalSize;
348                 if ( children >= end )
349                         return false;
350                 if ( terminalSize != 0 ) {
351                         EntryWithOffset e;
352                         e.nodeOffset = p-start;
353                         e.entry.name = cummulativeString;
354                         e.entry.info.loadData(p,end);
355                         output.push_back(e);
356                 }
357                 const uint8_t childrenCount = *children++;
358                 const uint8_t* s = children;
359                 for (uint8_t i=0; i < childrenCount; ++i) {
360                         int edgeStrLen = 0;
361                         while (*s != '\0') {
362                                 cummulativeString[curStrOffset+edgeStrLen] = *s++;
363                                 ++edgeStrLen;
364                         }
365                         cummulativeString[curStrOffset+edgeStrLen] = *s++;
366                         uint64_t childNodeOffet;
367                         if ( !TrieUtils::parse_uleb128(s, end, childNodeOffet) )
368                                 return false;
369                         if ( !processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output) )
370                                 return false;
371                 }
372                 return true;
373         }
374
375         void orderTrie(Node* node, std::vector<Node*>& orderedNodes) {
376                 orderedNodes.push_back(node);
377                 for (auto &edge : node->fChildren) {
378                         orderTrie(edge.second.get(), orderedNodes);
379                 }
380         }
381 }; // struct Trie
382
383 struct ExportInfo {
384         uint64_t                address;
385         uint64_t                flags;
386         uint64_t                other;
387         std::string             importName;
388
389         ExportInfo() : address(0), flags(0), other(0)  { }
390
391         uint32_t encodedSize(void) {
392                 uint32_t size = 0;
393                 if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
394                         size = TrieUtils::uleb128_size(flags) + TrieUtils::uleb128_size(other); // ordinal
395                         if ( !importName.empty() )
396                                 size += importName.length();
397                         ++size; // trailing zero in imported name
398                 }
399                 else {
400                         size = TrieUtils::uleb128_size(flags) + TrieUtils::uleb128_size(address);
401                         if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
402                                 size += TrieUtils::uleb128_size(other);
403                 }
404                 return size;
405         }
406
407         void appendToStream(std::vector<uint8_t>& out) {
408                 if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
409                         if ( !importName.empty() ) {
410                                 // nodes with re-export info: size, flags, ordinal, string
411                                 uint32_t nodeSize = (uint32_t)(TrieUtils::uleb128_size(flags) + TrieUtils::uleb128_size(other) + importName.length() + 1);
412                                 out.push_back(nodeSize);
413                                 TrieUtils::append_uleb128(flags, out);
414                                 TrieUtils::append_uleb128(other, out);
415                                 TrieUtils::append_string(importName, out);
416                         }
417                         else {
418                                 // nodes with re-export info: size, flags, ordinal, empty-string
419                                 uint32_t nodeSize = TrieUtils::uleb128_size(flags) + TrieUtils::uleb128_size(other) + 1;
420                                 out.push_back(nodeSize);
421                                 TrieUtils::append_uleb128(flags, out);
422                                 TrieUtils::append_uleb128(other, out);
423                                 out.push_back(0);
424                         }
425                 }
426                 else if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
427                         // nodes with export info: size, flags, address, other
428                         uint32_t nodeSize = TrieUtils::uleb128_size(flags) + TrieUtils::uleb128_size(address) + TrieUtils::uleb128_size(other);
429                         out.push_back(nodeSize);
430                         TrieUtils::append_uleb128(flags, out);
431                         TrieUtils::append_uleb128(address, out);
432                         TrieUtils::append_uleb128(other, out);
433                 }
434                 else {
435                         // nodes with export info: size, flags, address
436                         uint32_t nodeSize = TrieUtils::uleb128_size(flags) + TrieUtils::uleb128_size(address);
437                         out.push_back(nodeSize);
438                         TrieUtils::append_uleb128(flags, out);
439                         TrieUtils::append_uleb128(address, out);
440                 }
441         }
442
443         void loadData(const uint8_t* p, const uint8_t* const end) {
444                 TrieUtils::parse_uleb128(p, end, flags);
445                 if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
446                         TrieUtils::parse_uleb128(p, end, other); // dylib ordinal
447                         importName = (char*)p;
448                 }
449                 else {
450                         TrieUtils::parse_uleb128(p, end, address);
451                         if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
452                                 TrieUtils::parse_uleb128(p, end, other);
453                 }
454         }
455
456         void willInsertAs(const std::string& name) {
457                 // Symbols re-exported under the same name do not need an explict import name, delete it to save space
458                 if ((name == importName) ) {
459                         importName = "";
460                 }
461         }
462 };
463
464 typedef Trie<ExportInfo> ExportInfoTrie;
465
466
467 // Used by accelerator tables in dyld shared cache
468 struct DylibIndex {
469         uint32_t                index;
470
471         DylibIndex() : index(0) {}
472         DylibIndex(uint32_t i) : index(i) {}
473
474         uint32_t encodedSize(void) {
475                 return TrieUtils::uleb128_size(index);
476         }
477         
478         void appendToStream(std::vector<uint8_t>& out) {
479         uint32_t nodeSize = TrieUtils::uleb128_size(index);
480         out.push_back(nodeSize);
481         TrieUtils::append_uleb128(index, out);
482         }
483         
484         void loadData(const uint8_t* p, const uint8_t* const end) {
485                 uint64_t temp;
486                 TrieUtils::parse_uleb128(p, end, temp);
487                 index = (uint32_t)temp;
488         }
489         
490         void willInsertAs(const std::string& name) {
491         }
492 };
493 typedef Trie<DylibIndex> DylibIndexTrie;
494
495
496 #endif  // __TRIE__
497
498