]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | // | |
4 | /* | |
5 | ********************************************************************** | |
6 | * Legacy version of RBBISymbolTable and RBBIRuleBuilder from ICU 57, | |
7 | * only for use by Apple RuleBasedTokenizer | |
8 | ********************************************************************** | |
9 | */ | |
10 | ||
11 | #ifndef RBBIRB57_H | |
12 | #define RBBIRB57_H | |
13 | ||
14 | #include "unicode/utypes.h" | |
15 | #include "unicode/uobject.h" | |
16 | #include "unicode/uniset.h" | |
17 | #include "unicode/parseerr.h" | |
18 | #include "uhash.h" | |
19 | #include "uvector.h" | |
20 | #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that | |
21 | // looks up references to $variables within a set. | |
22 | #include "rbbidata57.h" | |
23 | #include "rbbisetb57.h" | |
24 | #include "rbbirb.h" | |
25 | ||
26 | ||
27 | ||
28 | U_NAMESPACE_BEGIN | |
29 | ||
30 | class RBBIRuleScanner57; | |
31 | struct RBBIRuleTableEl; | |
32 | class RBBISetBuilder57; | |
33 | class RBBINode; | |
34 | class RBBITableBuilder57; | |
35 | ||
36 | ||
37 | ||
38 | //-------------------------------------------------------------------------------- | |
39 | // | |
40 | // RBBISymbolTable57. Implements SymbolTable interface that is used by the | |
41 | // UnicodeSet parser to resolve references to $variables. | |
42 | // | |
43 | //-------------------------------------------------------------------------------- | |
44 | // class RBBISymbolTableEntry - from standard rbbirb.h | |
45 | ||
46 | ||
47 | class RBBISymbolTable57 : public UMemory, public SymbolTable { | |
48 | private: | |
49 | const UnicodeString &fRules; | |
50 | UHashtable *fHashTable; | |
51 | RBBIRuleScanner57 *fRuleScanner; | |
52 | ||
53 | // These next two fields are part of the mechanism for passing references to | |
54 | // already-constructed UnicodeSets back to the UnicodeSet constructor | |
55 | // when the pattern includes $variable references. | |
56 | const UnicodeString ffffString; // = "/uffff" | |
57 | UnicodeSet *fCachedSetLookup; | |
58 | ||
59 | public: | |
60 | // API inherited from class SymbolTable | |
61 | virtual const UnicodeString* lookup(const UnicodeString& s) const; | |
62 | virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const; | |
63 | virtual UnicodeString parseReference(const UnicodeString& text, | |
64 | ParsePosition& pos, int32_t limit) const; | |
65 | ||
66 | // Additional Functions | |
67 | RBBISymbolTable57(RBBIRuleScanner57 *, const UnicodeString &fRules, UErrorCode &status); | |
68 | virtual ~RBBISymbolTable57(); | |
69 | ||
70 | virtual RBBINode *lookupNode(const UnicodeString &key) const; | |
71 | virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err); | |
72 | ||
73 | #ifdef RBBI_DEBUG | |
74 | virtual void rbbiSymtablePrint() const; | |
75 | #else | |
76 | // A do-nothing inline function for non-debug builds. Member funcs can't be empty | |
77 | // or the call sites won't compile. | |
78 | int32_t fFakeField; | |
79 | #define rbbiSymtablePrint() fFakeField=0; | |
80 | #endif | |
81 | ||
82 | private: | |
83 | RBBISymbolTable57(const RBBISymbolTable57 &other); // forbid copying of this class | |
84 | RBBISymbolTable57 &operator=(const RBBISymbolTable57 &other); // forbid copying of this class | |
85 | }; | |
86 | ||
87 | ||
88 | //-------------------------------------------------------------------------------- | |
89 | // | |
90 | // class RBBIRuleBuilder57 The top-level class handling RBBI rule compiling. | |
91 | // | |
92 | //-------------------------------------------------------------------------------- | |
93 | class RBBIRuleBuilder57 : public UMemory { | |
94 | public: | |
95 | ||
96 | // Create a rule based break iterator from a set of rules. | |
97 | // This function is the main entry point into the rule builder. The | |
98 | // public ICU API for creating RBBIs uses this function to do the actual work. | |
99 | // | |
100 | static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules, | |
101 | UParseError *parseError, | |
102 | UErrorCode &status); | |
103 | ||
104 | public: | |
105 | // The "public" functions and data members that appear below are accessed | |
106 | // (and shared) by the various parts that make up the rule builder. They | |
107 | // are NOT intended to be accessed by anything outside of the | |
108 | // rule builder implementation. | |
109 | RBBIRuleBuilder57(const UnicodeString &rules, | |
110 | UParseError *parseErr, | |
111 | UErrorCode &status | |
112 | ); | |
113 | ||
114 | virtual ~RBBIRuleBuilder57(); | |
115 | char *fDebugEnv; // controls debug trace output | |
116 | UErrorCode *fStatus; // Error reporting. Keeping status | |
117 | UParseError *fParseError; // here avoids passing it everywhere. | |
118 | const UnicodeString &fRules; // The rule string that we are compiling | |
119 | ||
120 | RBBIRuleScanner57 *fScanner; // The scanner. | |
121 | RBBINode *fForwardTree; // The parse trees, generated by the scanner, | |
122 | RBBINode *fReverseTree; // then manipulated by subsequent steps. | |
123 | RBBINode *fSafeFwdTree; | |
124 | RBBINode *fSafeRevTree; | |
125 | ||
126 | RBBINode **fDefaultTree; // For rules not qualified with a ! | |
127 | // the tree to which they belong to. | |
128 | ||
129 | UBool fChainRules; // True for chained Unicode TR style rules. | |
130 | // False for traditional regexp rules. | |
131 | ||
132 | UBool fLBCMNoChain; // True: suppress chaining of rules on | |
133 | // chars with LineBreak property == CM. | |
134 | ||
135 | UBool fLookAheadHardBreak; // True: Look ahead matches cause an | |
136 | // immediate break, no continuing for the | |
137 | // longest match. | |
138 | ||
139 | UBool fRINoChain; // True: suppress chaining of rules on chars | |
140 | // with (grapheme/word/line)break property == RI. | |
141 | ||
142 | RBBISetBuilder57 *fSetBuilder; // Set and Character Category builder. | |
143 | UVector *fUSetNodes; // Vector of all uset nodes. | |
144 | ||
145 | RBBITableBuilder57 *fForwardTables; // State transition tables | |
146 | RBBITableBuilder57 *fReverseTables; | |
147 | RBBITableBuilder57 *fSafeFwdTables; | |
148 | RBBITableBuilder57 *fSafeRevTables; | |
149 | ||
150 | UVector *fRuleStatusVals; // The values that can be returned | |
151 | // from getRuleStatus(). | |
152 | ||
153 | RBBIDataHeader57 *flattenData(); // Create the flattened (runtime format) | |
154 | // data tables.. | |
155 | private: | |
156 | RBBIRuleBuilder57(const RBBIRuleBuilder57 &other); // forbid copying of this class | |
157 | RBBIRuleBuilder57 &operator=(const RBBIRuleBuilder57 &other); // forbid copying of this class | |
158 | }; | |
159 | ||
160 | ||
161 | ||
162 | ||
163 | // struct RBBISetTableEl - from standard rbbirb.h | |
164 | ||
165 | // RBBIDebugPrintf - from standard rbbirb.h | |
166 | ||
167 | U_NAMESPACE_END | |
168 | #endif | |
169 | ||
170 | ||
171 |