]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | // |
4 | // rbbitblb.h | |
5 | // | |
6 | ||
7 | /* | |
8 | ********************************************************************** | |
2ca993e8 | 9 | * Copyright (c) 2002-2016, International Business Machines |
b75a7d8f A |
10 | * Corporation and others. All Rights Reserved. |
11 | ********************************************************************** | |
12 | */ | |
13 | ||
14 | #ifndef RBBITBLB_H | |
15 | #define RBBITBLB_H | |
16 | ||
17 | #include "unicode/utypes.h" | |
3d1f044b A |
18 | |
19 | #if !UCONFIG_NO_BREAK_ITERATION | |
20 | ||
b75a7d8f A |
21 | #include "unicode/uobject.h" |
22 | #include "unicode/rbbi.h" | |
0f5d89e8 | 23 | #include "rbbirb.h" |
b75a7d8f A |
24 | #include "rbbinode.h" |
25 | ||
26 | ||
27 | U_NAMESPACE_BEGIN | |
28 | ||
29 | class RBBIRuleScanner; | |
30 | class RBBIRuleBuilder; | |
0f5d89e8 | 31 | class UVector32; |
b75a7d8f A |
32 | |
33 | // | |
34 | // class RBBITableBuilder is part of the RBBI rule compiler. | |
35 | // It builds the state transition table used by the RBBI runtime | |
36 | // from the expression syntax tree generated by the rule scanner. | |
37 | // | |
38 | // This class is part of the RBBI implementation only. | |
39 | // There is no user-visible public API here. | |
40 | // | |
41 | ||
42 | class RBBITableBuilder : public UMemory { | |
43 | public: | |
0f5d89e8 | 44 | RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status); |
b75a7d8f A |
45 | ~RBBITableBuilder(); |
46 | ||
0f5d89e8 A |
47 | void buildForwardTable(); |
48 | ||
49 | /** Return the runtime size in bytes of the built state table. */ | |
50 | int32_t getTableSize() const; | |
51 | ||
52 | /** Fill in the runtime state table. Sufficient memory must exist at the specified location. | |
53 | */ | |
54 | void exportTable(void *where); | |
55 | ||
56 | /** | |
57 | * Find duplicate (redundant) character classes. Begin looking with categories.first. | |
58 | * Duplicate, if found are returned in the categories parameter. | |
59 | * This is an iterator-like function, used to identify character classes | |
60 | * (state table columns) that can be eliminated. | |
61 | * @param categories in/out parameter, specifies where to start looking for duplicates, | |
62 | * and returns the first pair of duplicates found, if any. | |
63 | * @return true if duplicate char classes were found, false otherwise. | |
64 | */ | |
65 | bool findDuplCharClassFrom(IntPair *categories); | |
66 | ||
67 | /** Remove a column from the state table. Used when two character categories | |
68 | * have been found equivalent, and merged together, to eliminate the uneeded table column. | |
69 | */ | |
70 | void removeColumn(int32_t column); | |
71 | ||
3d1f044b A |
72 | /** |
73 | * Check for, and remove dupicate states (table rows). | |
74 | * @return the number of states removed. | |
75 | */ | |
76 | int32_t removeDuplicateStates(); | |
0f5d89e8 A |
77 | |
78 | /** Build the safe reverse table from the already-constructed forward table. */ | |
79 | void buildSafeReverseTable(UErrorCode &status); | |
80 | ||
81 | /** Return the runtime size in bytes of the built safe reverse state table. */ | |
82 | int32_t getSafeTableSize() const; | |
83 | ||
84 | /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location. | |
85 | */ | |
86 | void exportSafeTable(void *where); | |
b75a7d8f | 87 | |
374ca955 | 88 | |
b75a7d8f A |
89 | private: |
90 | void calcNullable(RBBINode *n); | |
91 | void calcFirstPos(RBBINode *n); | |
92 | void calcLastPos(RBBINode *n); | |
93 | void calcFollowPos(RBBINode *n); | |
374ca955 | 94 | void calcChainedFollowPos(RBBINode *n); |
73c04bcf | 95 | void bofFixup(); |
b75a7d8f A |
96 | void buildStateTable(); |
97 | void flagAcceptingStates(); | |
98 | void flagLookAheadStates(); | |
99 | void flagTaggedStates(); | |
374ca955 | 100 | void mergeRuleStatusVals(); |
b75a7d8f | 101 | |
0f5d89e8 A |
102 | /** |
103 | * Merge redundant state table columns, eliminating character classes with identical behavior. | |
104 | * Done after the state tables are generated, just before converting to their run-time format. | |
105 | */ | |
106 | int32_t mergeColumns(); | |
107 | ||
2ca993e8 A |
108 | void addRuleRootNodes(UVector *dest, RBBINode *node); |
109 | ||
0f5d89e8 A |
110 | /** |
111 | * Find duplicate (redundant) states, beginning at the specified pair, | |
112 | * within this state table. This is an iterator-like function, used to | |
113 | * identify states (state table rows) that can be eliminated. | |
114 | * @param states in/out parameter, specifies where to start looking for duplicates, | |
115 | * and returns the first pair of duplicates found, if any. | |
116 | * @return true if duplicate states were found, false otherwise. | |
117 | */ | |
118 | bool findDuplicateState(IntPair *states); | |
119 | ||
120 | /** Remove a duplicate state. | |
121 | * @param duplStates The duplicate states. The first is kept, the second is removed. | |
122 | * All references to the second in the state table are retargeted | |
123 | * to the first. | |
124 | */ | |
125 | void removeState(IntPair duplStates); | |
126 | ||
127 | /** Find the next duplicate state in the safe reverse table. An iterator function. | |
128 | * @param states in/out parameter, specifies where to start looking for duplicates, | |
129 | * and returns the first pair of duplicates found, if any. | |
130 | * @return true if a duplicate pair of states was found. | |
131 | */ | |
132 | bool findDuplicateSafeState(IntPair *states); | |
133 | ||
134 | /** Remove a duplicate state from the safe table. | |
135 | * @param duplStates The duplicate states. The first is kept, the second is removed. | |
136 | * All references to the second in the state table are retargeted | |
137 | * to the first. | |
138 | */ | |
139 | void removeSafeState(IntPair duplStates); | |
140 | ||
b75a7d8f A |
141 | // Set functions for UVector. |
142 | // TODO: make a USet subclass of UVector | |
143 | ||
144 | void setAdd(UVector *dest, UVector *source); | |
145 | UBool setEquals(UVector *a, UVector *b); | |
146 | ||
374ca955 A |
147 | void sortedAdd(UVector **dest, int32_t val); |
148 | ||
149 | public: | |
150 | #ifdef RBBI_DEBUG | |
b75a7d8f | 151 | void printSet(UVector *s); |
374ca955 | 152 | void printPosSets(RBBINode *n /* = NULL*/); |
b75a7d8f | 153 | void printStates(); |
374ca955 | 154 | void printRuleStatusTable(); |
0f5d89e8 | 155 | void printReverseTable(); |
374ca955 A |
156 | #else |
157 | #define printSet(s) | |
158 | #define printPosSets(n) | |
159 | #define printStates() | |
160 | #define printRuleStatusTable() | |
0f5d89e8 | 161 | #define printReverseTable() |
374ca955 | 162 | #endif |
b75a7d8f A |
163 | |
164 | private: | |
165 | RBBIRuleBuilder *fRB; | |
166 | RBBINode *&fTree; // The root node of the parse tree to build a | |
167 | // table for. | |
168 | UErrorCode *fStatus; | |
169 | ||
0f5d89e8 | 170 | /** State Descriptors, UVector<RBBIStateDescriptor> */ |
b75a7d8f A |
171 | UVector *fDStates; // D states (Aho's terminology) |
172 | // Index is state number | |
173 | // Contents are RBBIStateDescriptor pointers. | |
174 | ||
0f5d89e8 A |
175 | /** Synthesized safe table, UVector of UnicodeString, one string per table row. */ |
176 | UVector *fSafeTable; | |
177 | ||
374ca955 | 178 | |
b75a7d8f A |
179 | RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class |
180 | RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class | |
181 | }; | |
182 | ||
183 | // | |
184 | // RBBIStateDescriptor - The DFA is constructed as a set of these descriptors, | |
185 | // one for each state. | |
186 | class RBBIStateDescriptor : public UMemory { | |
187 | public: | |
188 | UBool fMarked; | |
189 | int32_t fAccepting; | |
190 | int32_t fLookAhead; | |
374ca955 A |
191 | UVector *fTagVals; |
192 | int32_t fTagsIdx; | |
b75a7d8f A |
193 | UVector *fPositions; // Set of parse tree positions associated |
194 | // with this state. Unordered (it's a set). | |
195 | // UVector contents are RBBINode * | |
196 | ||
0f5d89e8 | 197 | UVector32 *fDtran; // Transitions out of this state. |
b75a7d8f A |
198 | // indexed by input character |
199 | // contents is int index of dest state | |
200 | // in RBBITableBuilder.fDStates | |
201 | ||
202 | RBBIStateDescriptor(int maxInputSymbol, UErrorCode *fStatus); | |
203 | ~RBBIStateDescriptor(); | |
204 | ||
205 | private: | |
206 | RBBIStateDescriptor(const RBBIStateDescriptor &other); // forbid copying of this class | |
207 | RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other); // forbid copying of this class | |
208 | }; | |
209 | ||
210 | ||
211 | ||
212 | U_NAMESPACE_END | |
3d1f044b A |
213 | |
214 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | |
215 | ||
b75a7d8f | 216 | #endif |