]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/rbbitblb.h
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / common / rbbitblb.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 // rbbitblb.h
5 //
6
7 /*
8 **********************************************************************
9 * Copyright (c) 2002-2016, International Business Machines
10 * Corporation and others. All Rights Reserved.
11 **********************************************************************
12 */
13
14 #ifndef RBBITBLB_H
15 #define RBBITBLB_H
16
17 #include "unicode/utypes.h"
18 #include "unicode/uobject.h"
19 #include "unicode/rbbi.h"
20 #include "rbbirb.h"
21 #include "rbbinode.h"
22
23
24 U_NAMESPACE_BEGIN
25
26 class RBBIRuleScanner;
27 class RBBIRuleBuilder;
28 class UVector32;
29
30 //
31 // class RBBITableBuilder is part of the RBBI rule compiler.
32 // It builds the state transition table used by the RBBI runtime
33 // from the expression syntax tree generated by the rule scanner.
34 //
35 // This class is part of the RBBI implementation only.
36 // There is no user-visible public API here.
37 //
38
39 class RBBITableBuilder : public UMemory {
40 public:
41 RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status);
42 ~RBBITableBuilder();
43
44 void buildForwardTable();
45
46 /** Return the runtime size in bytes of the built state table. */
47 int32_t getTableSize() const;
48
49 /** Fill in the runtime state table. Sufficient memory must exist at the specified location.
50 */
51 void exportTable(void *where);
52
53 /**
54 * Find duplicate (redundant) character classes. Begin looking with categories.first.
55 * Duplicate, if found are returned in the categories parameter.
56 * This is an iterator-like function, used to identify character classes
57 * (state table columns) that can be eliminated.
58 * @param categories in/out parameter, specifies where to start looking for duplicates,
59 * and returns the first pair of duplicates found, if any.
60 * @return true if duplicate char classes were found, false otherwise.
61 */
62 bool findDuplCharClassFrom(IntPair *categories);
63
64 /** Remove a column from the state table. Used when two character categories
65 * have been found equivalent, and merged together, to eliminate the uneeded table column.
66 */
67 void removeColumn(int32_t column);
68
69 /** Check for, and remove dupicate states (table rows). */
70 void removeDuplicateStates();
71
72 /** Build the safe reverse table from the already-constructed forward table. */
73 void buildSafeReverseTable(UErrorCode &status);
74
75 /** Return the runtime size in bytes of the built safe reverse state table. */
76 int32_t getSafeTableSize() const;
77
78 /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location.
79 */
80 void exportSafeTable(void *where);
81
82
83 private:
84 void calcNullable(RBBINode *n);
85 void calcFirstPos(RBBINode *n);
86 void calcLastPos(RBBINode *n);
87 void calcFollowPos(RBBINode *n);
88 void calcChainedFollowPos(RBBINode *n);
89 void bofFixup();
90 void buildStateTable();
91 void flagAcceptingStates();
92 void flagLookAheadStates();
93 void flagTaggedStates();
94 void mergeRuleStatusVals();
95
96 /**
97 * Merge redundant state table columns, eliminating character classes with identical behavior.
98 * Done after the state tables are generated, just before converting to their run-time format.
99 */
100 int32_t mergeColumns();
101
102 void addRuleRootNodes(UVector *dest, RBBINode *node);
103
104 /**
105 * Find duplicate (redundant) states, beginning at the specified pair,
106 * within this state table. This is an iterator-like function, used to
107 * identify states (state table rows) that can be eliminated.
108 * @param states in/out parameter, specifies where to start looking for duplicates,
109 * and returns the first pair of duplicates found, if any.
110 * @return true if duplicate states were found, false otherwise.
111 */
112 bool findDuplicateState(IntPair *states);
113
114 /** Remove a duplicate state.
115 * @param duplStates The duplicate states. The first is kept, the second is removed.
116 * All references to the second in the state table are retargeted
117 * to the first.
118 */
119 void removeState(IntPair duplStates);
120
121 /** Find the next duplicate state in the safe reverse table. An iterator function.
122 * @param states in/out parameter, specifies where to start looking for duplicates,
123 * and returns the first pair of duplicates found, if any.
124 * @return true if a duplicate pair of states was found.
125 */
126 bool findDuplicateSafeState(IntPair *states);
127
128 /** Remove a duplicate state from the safe table.
129 * @param duplStates The duplicate states. The first is kept, the second is removed.
130 * All references to the second in the state table are retargeted
131 * to the first.
132 */
133 void removeSafeState(IntPair duplStates);
134
135 // Set functions for UVector.
136 // TODO: make a USet subclass of UVector
137
138 void setAdd(UVector *dest, UVector *source);
139 UBool setEquals(UVector *a, UVector *b);
140
141 void sortedAdd(UVector **dest, int32_t val);
142
143 public:
144 #ifdef RBBI_DEBUG
145 void printSet(UVector *s);
146 void printPosSets(RBBINode *n /* = NULL*/);
147 void printStates();
148 void printRuleStatusTable();
149 void printReverseTable();
150 #else
151 #define printSet(s)
152 #define printPosSets(n)
153 #define printStates()
154 #define printRuleStatusTable()
155 #define printReverseTable()
156 #endif
157
158 private:
159 RBBIRuleBuilder *fRB;
160 RBBINode *&fTree; // The root node of the parse tree to build a
161 // table for.
162 UErrorCode *fStatus;
163
164 /** State Descriptors, UVector<RBBIStateDescriptor> */
165 UVector *fDStates; // D states (Aho's terminology)
166 // Index is state number
167 // Contents are RBBIStateDescriptor pointers.
168
169 /** Synthesized safe table, UVector of UnicodeString, one string per table row. */
170 UVector *fSafeTable;
171
172
173 RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class
174 RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class
175 };
176
177 //
178 // RBBIStateDescriptor - The DFA is constructed as a set of these descriptors,
179 // one for each state.
180 class RBBIStateDescriptor : public UMemory {
181 public:
182 UBool fMarked;
183 int32_t fAccepting;
184 int32_t fLookAhead;
185 UVector *fTagVals;
186 int32_t fTagsIdx;
187 UVector *fPositions; // Set of parse tree positions associated
188 // with this state. Unordered (it's a set).
189 // UVector contents are RBBINode *
190
191 UVector32 *fDtran; // Transitions out of this state.
192 // indexed by input character
193 // contents is int index of dest state
194 // in RBBITableBuilder.fDStates
195
196 RBBIStateDescriptor(int maxInputSymbol, UErrorCode *fStatus);
197 ~RBBIStateDescriptor();
198
199 private:
200 RBBIStateDescriptor(const RBBIStateDescriptor &other); // forbid copying of this class
201 RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other); // forbid copying of this class
202 };
203
204
205
206 U_NAMESPACE_END
207 #endif