]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/rbbitblb.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
8 **********************************************************************
9 * Copyright (c) 2002-2016, International Business Machines
10 * Corporation and others. All Rights Reserved.
11 **********************************************************************
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_BREAK_ITERATION
21 #include "unicode/uobject.h"
22 #include "unicode/rbbi.h"
29 class RBBIRuleScanner
;
30 class RBBIRuleBuilder
;
34 // class RBBITableBuilder is part of the RBBI rule compiler.
35 // It builds the state transition table used by the RBBI runtime
36 // from the expression syntax tree generated by the rule scanner.
38 // This class is part of the RBBI implementation only.
39 // There is no user-visible public API here.
42 class RBBITableBuilder
: public UMemory
{
44 RBBITableBuilder(RBBIRuleBuilder
*rb
, RBBINode
**rootNode
, UErrorCode
&status
);
47 void buildForwardTable();
49 /** Return the runtime size in bytes of the built state table. */
50 int32_t getTableSize() const;
52 /** Fill in the runtime state table. Sufficient memory must exist at the specified location.
54 void exportTable(void *where
);
57 * Find duplicate (redundant) character classes. Begin looking with categories.first.
58 * Duplicate, if found are returned in the categories parameter.
59 * This is an iterator-like function, used to identify character classes
60 * (state table columns) that can be eliminated.
61 * @param categories in/out parameter, specifies where to start looking for duplicates,
62 * and returns the first pair of duplicates found, if any.
63 * @return true if duplicate char classes were found, false otherwise.
65 bool findDuplCharClassFrom(IntPair
*categories
);
67 /** Remove a column from the state table. Used when two character categories
68 * have been found equivalent, and merged together, to eliminate the uneeded table column.
70 void removeColumn(int32_t column
);
73 * Check for, and remove dupicate states (table rows).
74 * @return the number of states removed.
76 int32_t removeDuplicateStates();
78 /** Build the safe reverse table from the already-constructed forward table. */
79 void buildSafeReverseTable(UErrorCode
&status
);
81 /** Return the runtime size in bytes of the built safe reverse state table. */
82 int32_t getSafeTableSize() const;
84 /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location.
86 void exportSafeTable(void *where
);
90 void calcNullable(RBBINode
*n
);
91 void calcFirstPos(RBBINode
*n
);
92 void calcLastPos(RBBINode
*n
);
93 void calcFollowPos(RBBINode
*n
);
94 void calcChainedFollowPos(RBBINode
*n
);
96 void buildStateTable();
97 void flagAcceptingStates();
98 void flagLookAheadStates();
99 void flagTaggedStates();
100 void mergeRuleStatusVals();
103 * Merge redundant state table columns, eliminating character classes with identical behavior.
104 * Done after the state tables are generated, just before converting to their run-time format.
106 int32_t mergeColumns();
108 void addRuleRootNodes(UVector
*dest
, RBBINode
*node
);
111 * Find duplicate (redundant) states, beginning at the specified pair,
112 * within this state table. This is an iterator-like function, used to
113 * identify states (state table rows) that can be eliminated.
114 * @param states in/out parameter, specifies where to start looking for duplicates,
115 * and returns the first pair of duplicates found, if any.
116 * @return true if duplicate states were found, false otherwise.
118 bool findDuplicateState(IntPair
*states
);
120 /** Remove a duplicate state.
121 * @param duplStates The duplicate states. The first is kept, the second is removed.
122 * All references to the second in the state table are retargeted
125 void removeState(IntPair duplStates
);
127 /** Find the next duplicate state in the safe reverse table. An iterator function.
128 * @param states in/out parameter, specifies where to start looking for duplicates,
129 * and returns the first pair of duplicates found, if any.
130 * @return true if a duplicate pair of states was found.
132 bool findDuplicateSafeState(IntPair
*states
);
134 /** Remove a duplicate state from the safe table.
135 * @param duplStates The duplicate states. The first is kept, the second is removed.
136 * All references to the second in the state table are retargeted
139 void removeSafeState(IntPair duplStates
);
141 // Set functions for UVector.
142 // TODO: make a USet subclass of UVector
144 void setAdd(UVector
*dest
, UVector
*source
);
145 UBool
setEquals(UVector
*a
, UVector
*b
);
147 void sortedAdd(UVector
**dest
, int32_t val
);
151 void printSet(UVector
*s
);
152 void printPosSets(RBBINode
*n
/* = NULL*/);
154 void printRuleStatusTable();
155 void printReverseTable();
158 #define printPosSets(n)
159 #define printStates()
160 #define printRuleStatusTable()
161 #define printReverseTable()
165 RBBIRuleBuilder
*fRB
;
166 RBBINode
*&fTree
; // The root node of the parse tree to build a
170 /** State Descriptors, UVector<RBBIStateDescriptor> */
171 UVector
*fDStates
; // D states (Aho's terminology)
172 // Index is state number
173 // Contents are RBBIStateDescriptor pointers.
175 /** Synthesized safe table, UVector of UnicodeString, one string per table row. */
179 RBBITableBuilder(const RBBITableBuilder
&other
); // forbid copying of this class
180 RBBITableBuilder
&operator=(const RBBITableBuilder
&other
); // forbid copying of this class
184 // RBBIStateDescriptor - The DFA is constructed as a set of these descriptors,
185 // one for each state.
186 class RBBIStateDescriptor
: public UMemory
{
193 UVector
*fPositions
; // Set of parse tree positions associated
194 // with this state. Unordered (it's a set).
195 // UVector contents are RBBINode *
197 UVector32
*fDtran
; // Transitions out of this state.
198 // indexed by input character
199 // contents is int index of dest state
200 // in RBBITableBuilder.fDStates
202 RBBIStateDescriptor(int maxInputSymbol
, UErrorCode
*fStatus
);
203 ~RBBIStateDescriptor();
206 RBBIStateDescriptor(const RBBIStateDescriptor
&other
); // forbid copying of this class
207 RBBIStateDescriptor
&operator=(const RBBIStateDescriptor
&other
); // forbid copying of this class
214 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */