]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | // |
4 | // rbbitblb.h | |
5 | // | |
6 | ||
7 | /* | |
8 | ********************************************************************** | |
2ca993e8 | 9 | * Copyright (c) 2002-2016, International Business Machines |
b75a7d8f A |
10 | * Corporation and others. All Rights Reserved. |
11 | ********************************************************************** | |
12 | */ | |
13 | ||
14 | #ifndef RBBITBLB_H | |
15 | #define RBBITBLB_H | |
16 | ||
17 | #include "unicode/utypes.h" | |
18 | #include "unicode/uobject.h" | |
19 | #include "unicode/rbbi.h" | |
0f5d89e8 | 20 | #include "rbbirb.h" |
b75a7d8f A |
21 | #include "rbbinode.h" |
22 | ||
23 | ||
24 | U_NAMESPACE_BEGIN | |
25 | ||
26 | class RBBIRuleScanner; | |
27 | class RBBIRuleBuilder; | |
0f5d89e8 | 28 | class UVector32; |
b75a7d8f A |
29 | |
30 | // | |
31 | // class RBBITableBuilder is part of the RBBI rule compiler. | |
32 | // It builds the state transition table used by the RBBI runtime | |
33 | // from the expression syntax tree generated by the rule scanner. | |
34 | // | |
35 | // This class is part of the RBBI implementation only. | |
36 | // There is no user-visible public API here. | |
37 | // | |
38 | ||
39 | class RBBITableBuilder : public UMemory { | |
40 | public: | |
0f5d89e8 | 41 | RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status); |
b75a7d8f A |
42 | ~RBBITableBuilder(); |
43 | ||
0f5d89e8 A |
44 | void buildForwardTable(); |
45 | ||
46 | /** Return the runtime size in bytes of the built state table. */ | |
47 | int32_t getTableSize() const; | |
48 | ||
49 | /** Fill in the runtime state table. Sufficient memory must exist at the specified location. | |
50 | */ | |
51 | void exportTable(void *where); | |
52 | ||
53 | /** | |
54 | * Find duplicate (redundant) character classes. Begin looking with categories.first. | |
55 | * Duplicate, if found are returned in the categories parameter. | |
56 | * This is an iterator-like function, used to identify character classes | |
57 | * (state table columns) that can be eliminated. | |
58 | * @param categories in/out parameter, specifies where to start looking for duplicates, | |
59 | * and returns the first pair of duplicates found, if any. | |
60 | * @return true if duplicate char classes were found, false otherwise. | |
61 | */ | |
62 | bool findDuplCharClassFrom(IntPair *categories); | |
63 | ||
64 | /** Remove a column from the state table. Used when two character categories | |
65 | * have been found equivalent, and merged together, to eliminate the uneeded table column. | |
66 | */ | |
67 | void removeColumn(int32_t column); | |
68 | ||
69 | /** Check for, and remove dupicate states (table rows). */ | |
70 | void removeDuplicateStates(); | |
71 | ||
72 | /** Build the safe reverse table from the already-constructed forward table. */ | |
73 | void buildSafeReverseTable(UErrorCode &status); | |
74 | ||
75 | /** Return the runtime size in bytes of the built safe reverse state table. */ | |
76 | int32_t getSafeTableSize() const; | |
77 | ||
78 | /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location. | |
79 | */ | |
80 | void exportSafeTable(void *where); | |
b75a7d8f | 81 | |
374ca955 | 82 | |
b75a7d8f A |
83 | private: |
84 | void calcNullable(RBBINode *n); | |
85 | void calcFirstPos(RBBINode *n); | |
86 | void calcLastPos(RBBINode *n); | |
87 | void calcFollowPos(RBBINode *n); | |
374ca955 | 88 | void calcChainedFollowPos(RBBINode *n); |
73c04bcf | 89 | void bofFixup(); |
b75a7d8f A |
90 | void buildStateTable(); |
91 | void flagAcceptingStates(); | |
92 | void flagLookAheadStates(); | |
93 | void flagTaggedStates(); | |
374ca955 | 94 | void mergeRuleStatusVals(); |
b75a7d8f | 95 | |
0f5d89e8 A |
96 | /** |
97 | * Merge redundant state table columns, eliminating character classes with identical behavior. | |
98 | * Done after the state tables are generated, just before converting to their run-time format. | |
99 | */ | |
100 | int32_t mergeColumns(); | |
101 | ||
2ca993e8 A |
102 | void addRuleRootNodes(UVector *dest, RBBINode *node); |
103 | ||
0f5d89e8 A |
104 | /** |
105 | * Find duplicate (redundant) states, beginning at the specified pair, | |
106 | * within this state table. This is an iterator-like function, used to | |
107 | * identify states (state table rows) that can be eliminated. | |
108 | * @param states in/out parameter, specifies where to start looking for duplicates, | |
109 | * and returns the first pair of duplicates found, if any. | |
110 | * @return true if duplicate states were found, false otherwise. | |
111 | */ | |
112 | bool findDuplicateState(IntPair *states); | |
113 | ||
114 | /** Remove a duplicate state. | |
115 | * @param duplStates The duplicate states. The first is kept, the second is removed. | |
116 | * All references to the second in the state table are retargeted | |
117 | * to the first. | |
118 | */ | |
119 | void removeState(IntPair duplStates); | |
120 | ||
121 | /** Find the next duplicate state in the safe reverse table. An iterator function. | |
122 | * @param states in/out parameter, specifies where to start looking for duplicates, | |
123 | * and returns the first pair of duplicates found, if any. | |
124 | * @return true if a duplicate pair of states was found. | |
125 | */ | |
126 | bool findDuplicateSafeState(IntPair *states); | |
127 | ||
128 | /** Remove a duplicate state from the safe table. | |
129 | * @param duplStates The duplicate states. The first is kept, the second is removed. | |
130 | * All references to the second in the state table are retargeted | |
131 | * to the first. | |
132 | */ | |
133 | void removeSafeState(IntPair duplStates); | |
134 | ||
b75a7d8f A |
135 | // Set functions for UVector. |
136 | // TODO: make a USet subclass of UVector | |
137 | ||
138 | void setAdd(UVector *dest, UVector *source); | |
139 | UBool setEquals(UVector *a, UVector *b); | |
140 | ||
374ca955 A |
141 | void sortedAdd(UVector **dest, int32_t val); |
142 | ||
143 | public: | |
144 | #ifdef RBBI_DEBUG | |
b75a7d8f | 145 | void printSet(UVector *s); |
374ca955 | 146 | void printPosSets(RBBINode *n /* = NULL*/); |
b75a7d8f | 147 | void printStates(); |
374ca955 | 148 | void printRuleStatusTable(); |
0f5d89e8 | 149 | void printReverseTable(); |
374ca955 A |
150 | #else |
151 | #define printSet(s) | |
152 | #define printPosSets(n) | |
153 | #define printStates() | |
154 | #define printRuleStatusTable() | |
0f5d89e8 | 155 | #define printReverseTable() |
374ca955 | 156 | #endif |
b75a7d8f A |
157 | |
158 | private: | |
159 | RBBIRuleBuilder *fRB; | |
160 | RBBINode *&fTree; // The root node of the parse tree to build a | |
161 | // table for. | |
162 | UErrorCode *fStatus; | |
163 | ||
0f5d89e8 | 164 | /** State Descriptors, UVector<RBBIStateDescriptor> */ |
b75a7d8f A |
165 | UVector *fDStates; // D states (Aho's terminology) |
166 | // Index is state number | |
167 | // Contents are RBBIStateDescriptor pointers. | |
168 | ||
0f5d89e8 A |
169 | /** Synthesized safe table, UVector of UnicodeString, one string per table row. */ |
170 | UVector *fSafeTable; | |
171 | ||
374ca955 | 172 | |
b75a7d8f A |
173 | RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class |
174 | RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class | |
175 | }; | |
176 | ||
177 | // | |
178 | // RBBIStateDescriptor - The DFA is constructed as a set of these descriptors, | |
179 | // one for each state. | |
180 | class RBBIStateDescriptor : public UMemory { | |
181 | public: | |
182 | UBool fMarked; | |
183 | int32_t fAccepting; | |
184 | int32_t fLookAhead; | |
374ca955 A |
185 | UVector *fTagVals; |
186 | int32_t fTagsIdx; | |
b75a7d8f A |
187 | UVector *fPositions; // Set of parse tree positions associated |
188 | // with this state. Unordered (it's a set). | |
189 | // UVector contents are RBBINode * | |
190 | ||
0f5d89e8 | 191 | UVector32 *fDtran; // Transitions out of this state. |
b75a7d8f A |
192 | // indexed by input character |
193 | // contents is int index of dest state | |
194 | // in RBBITableBuilder.fDStates | |
195 | ||
196 | RBBIStateDescriptor(int maxInputSymbol, UErrorCode *fStatus); | |
197 | ~RBBIStateDescriptor(); | |
198 | ||
199 | private: | |
200 | RBBIStateDescriptor(const RBBIStateDescriptor &other); // forbid copying of this class | |
201 | RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other); // forbid copying of this class | |
202 | }; | |
203 | ||
204 | ||
205 | ||
206 | U_NAMESPACE_END | |
207 | #endif |