]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/rbbitblb.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
8 **********************************************************************
9 * Copyright (c) 2002-2016, International Business Machines
10 * Corporation and others. All Rights Reserved.
11 **********************************************************************
17 #include "unicode/utypes.h"
18 #include "unicode/uobject.h"
19 #include "unicode/rbbi.h"
26 class RBBIRuleScanner
;
27 class RBBIRuleBuilder
;
31 // class RBBITableBuilder is part of the RBBI rule compiler.
32 // It builds the state transition table used by the RBBI runtime
33 // from the expression syntax tree generated by the rule scanner.
35 // This class is part of the RBBI implementation only.
36 // There is no user-visible public API here.
39 class RBBITableBuilder
: public UMemory
{
41 RBBITableBuilder(RBBIRuleBuilder
*rb
, RBBINode
**rootNode
, UErrorCode
&status
);
44 void buildForwardTable();
46 /** Return the runtime size in bytes of the built state table. */
47 int32_t getTableSize() const;
49 /** Fill in the runtime state table. Sufficient memory must exist at the specified location.
51 void exportTable(void *where
);
54 * Find duplicate (redundant) character classes. Begin looking with categories.first.
55 * Duplicate, if found are returned in the categories parameter.
56 * This is an iterator-like function, used to identify character classes
57 * (state table columns) that can be eliminated.
58 * @param categories in/out parameter, specifies where to start looking for duplicates,
59 * and returns the first pair of duplicates found, if any.
60 * @return true if duplicate char classes were found, false otherwise.
62 bool findDuplCharClassFrom(IntPair
*categories
);
64 /** Remove a column from the state table. Used when two character categories
65 * have been found equivalent, and merged together, to eliminate the uneeded table column.
67 void removeColumn(int32_t column
);
69 /** Check for, and remove dupicate states (table rows). */
70 void removeDuplicateStates();
72 /** Build the safe reverse table from the already-constructed forward table. */
73 void buildSafeReverseTable(UErrorCode
&status
);
75 /** Return the runtime size in bytes of the built safe reverse state table. */
76 int32_t getSafeTableSize() const;
78 /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location.
80 void exportSafeTable(void *where
);
84 void calcNullable(RBBINode
*n
);
85 void calcFirstPos(RBBINode
*n
);
86 void calcLastPos(RBBINode
*n
);
87 void calcFollowPos(RBBINode
*n
);
88 void calcChainedFollowPos(RBBINode
*n
);
90 void buildStateTable();
91 void flagAcceptingStates();
92 void flagLookAheadStates();
93 void flagTaggedStates();
94 void mergeRuleStatusVals();
97 * Merge redundant state table columns, eliminating character classes with identical behavior.
98 * Done after the state tables are generated, just before converting to their run-time format.
100 int32_t mergeColumns();
102 void addRuleRootNodes(UVector
*dest
, RBBINode
*node
);
105 * Find duplicate (redundant) states, beginning at the specified pair,
106 * within this state table. This is an iterator-like function, used to
107 * identify states (state table rows) that can be eliminated.
108 * @param states in/out parameter, specifies where to start looking for duplicates,
109 * and returns the first pair of duplicates found, if any.
110 * @return true if duplicate states were found, false otherwise.
112 bool findDuplicateState(IntPair
*states
);
114 /** Remove a duplicate state.
115 * @param duplStates The duplicate states. The first is kept, the second is removed.
116 * All references to the second in the state table are retargeted
119 void removeState(IntPair duplStates
);
121 /** Find the next duplicate state in the safe reverse table. An iterator function.
122 * @param states in/out parameter, specifies where to start looking for duplicates,
123 * and returns the first pair of duplicates found, if any.
124 * @return true if a duplicate pair of states was found.
126 bool findDuplicateSafeState(IntPair
*states
);
128 /** Remove a duplicate state from the safe table.
129 * @param duplStates The duplicate states. The first is kept, the second is removed.
130 * All references to the second in the state table are retargeted
133 void removeSafeState(IntPair duplStates
);
135 // Set functions for UVector.
136 // TODO: make a USet subclass of UVector
138 void setAdd(UVector
*dest
, UVector
*source
);
139 UBool
setEquals(UVector
*a
, UVector
*b
);
141 void sortedAdd(UVector
**dest
, int32_t val
);
145 void printSet(UVector
*s
);
146 void printPosSets(RBBINode
*n
/* = NULL*/);
148 void printRuleStatusTable();
149 void printReverseTable();
152 #define printPosSets(n)
153 #define printStates()
154 #define printRuleStatusTable()
155 #define printReverseTable()
159 RBBIRuleBuilder
*fRB
;
160 RBBINode
*&fTree
; // The root node of the parse tree to build a
164 /** State Descriptors, UVector<RBBIStateDescriptor> */
165 UVector
*fDStates
; // D states (Aho's terminology)
166 // Index is state number
167 // Contents are RBBIStateDescriptor pointers.
169 /** Synthesized safe table, UVector of UnicodeString, one string per table row. */
173 RBBITableBuilder(const RBBITableBuilder
&other
); // forbid copying of this class
174 RBBITableBuilder
&operator=(const RBBITableBuilder
&other
); // forbid copying of this class
178 // RBBIStateDescriptor - The DFA is constructed as a set of these descriptors,
179 // one for each state.
180 class RBBIStateDescriptor
: public UMemory
{
187 UVector
*fPositions
; // Set of parse tree positions associated
188 // with this state. Unordered (it's a set).
189 // UVector contents are RBBINode *
191 UVector32
*fDtran
; // Transitions out of this state.
192 // indexed by input character
193 // contents is int index of dest state
194 // in RBBITableBuilder.fDStates
196 RBBIStateDescriptor(int maxInputSymbol
, UErrorCode
*fStatus
);
197 ~RBBIStateDescriptor();
200 RBBIStateDescriptor(const RBBIStateDescriptor
&other
); // forbid copying of this class
201 RBBIStateDescriptor
&operator=(const RBBIStateDescriptor
&other
); // forbid copying of this class