]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | // |
4 | // rbbirb.h | |
5 | // | |
46f4442e | 6 | // Copyright (C) 2002-2008, International Business Machines Corporation and others. |
b75a7d8f A |
7 | // All Rights Reserved. |
8 | // | |
374ca955 A |
9 | // This file contains declarations for several classes from the |
10 | // Rule Based Break Iterator rule builder. | |
b75a7d8f A |
11 | // |
12 | ||
13 | ||
14 | #ifndef RBBIRB_H | |
15 | #define RBBIRB_H | |
16 | ||
17 | #include "unicode/utypes.h" | |
18 | #include "unicode/uobject.h" | |
19 | #include "unicode/rbbi.h" | |
20 | #include "unicode/uniset.h" | |
21 | #include "unicode/parseerr.h" | |
22 | #include "uhash.h" | |
23 | #include "uvector.h" | |
374ca955 | 24 | #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that |
b75a7d8f A |
25 | // looks up references to $variables within a set. |
26 | ||
27 | ||
28 | ||
29 | U_NAMESPACE_BEGIN | |
30 | ||
31 | class RBBIRuleScanner; | |
32 | struct RBBIRuleTableEl; | |
33 | class RBBISetBuilder; | |
34 | class RBBINode; | |
35 | class RBBITableBuilder; | |
36 | ||
37 | ||
38 | ||
39 | //-------------------------------------------------------------------------------- | |
40 | // | |
41 | // RBBISymbolTable. Implements SymbolTable interface that is used by the | |
42 | // UnicodeSet parser to resolve references to $variables. | |
43 | // | |
44 | //-------------------------------------------------------------------------------- | |
45 | class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one | |
46 | public: // of these structs for each entry. | |
47 | RBBISymbolTableEntry(); | |
48 | UnicodeString key; | |
49 | RBBINode *val; | |
50 | ~RBBISymbolTableEntry(); | |
51 | ||
52 | private: | |
53 | RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class | |
54 | RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class | |
55 | }; | |
56 | ||
57 | ||
58 | class RBBISymbolTable : public UMemory, public SymbolTable { | |
59 | private: | |
60 | const UnicodeString &fRules; | |
61 | UHashtable *fHashTable; | |
62 | RBBIRuleScanner *fRuleScanner; | |
63 | ||
64 | // These next two fields are part of the mechanism for passing references to | |
65 | // already-constructed UnicodeSets back to the UnicodeSet constructor | |
66 | // when the pattern includes $variable references. | |
67 | const UnicodeString ffffString; // = "/uffff" | |
68 | UnicodeSet *fCachedSetLookup; | |
69 | ||
70 | public: | |
71 | // API inherited from class SymbolTable | |
72 | virtual const UnicodeString* lookup(const UnicodeString& s) const; | |
73 | virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const; | |
74 | virtual UnicodeString parseReference(const UnicodeString& text, | |
75 | ParsePosition& pos, int32_t limit) const; | |
76 | ||
77 | // Additional Functions | |
78 | RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status); | |
79 | virtual ~RBBISymbolTable(); | |
80 | ||
81 | virtual RBBINode *lookupNode(const UnicodeString &key) const; | |
82 | virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err); | |
83 | ||
374ca955 A |
84 | #ifdef RBBI_DEBUG |
85 | virtual void rbbiSymtablePrint() const; | |
86 | #else | |
87 | // A do-nothing inline function for non-debug builds. Member funcs can't be empty | |
88 | // or the call sites won't compile. | |
73c04bcf | 89 | int32_t fFakeField; |
374ca955 A |
90 | #define rbbiSymtablePrint() fFakeField=0; |
91 | #endif | |
b75a7d8f A |
92 | |
93 | private: | |
94 | RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class | |
95 | RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class | |
96 | }; | |
97 | ||
98 | ||
99 | //-------------------------------------------------------------------------------- | |
100 | // | |
101 | // class RBBIRuleBuilder The top-level class handling RBBI rule compiling. | |
102 | // | |
103 | //-------------------------------------------------------------------------------- | |
104 | class RBBIRuleBuilder : public UMemory { | |
105 | public: | |
106 | ||
107 | // Create a rule based break iterator from a set of rules. | |
108 | // This function is the main entry point into the rule builder. The | |
109 | // public ICU API for creating RBBIs uses this function to do the actual work. | |
110 | // | |
111 | static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules, | |
46f4442e | 112 | UParseError *parseError, |
b75a7d8f A |
113 | UErrorCode &status); |
114 | ||
115 | public: | |
116 | // The "public" functions and data members that appear below are accessed | |
117 | // (and shared) by the various parts that make up the rule builder. They | |
118 | // are NOT intended to be accessed by anything outside of the | |
119 | // rule builder implementation. | |
120 | RBBIRuleBuilder(const UnicodeString &rules, | |
46f4442e | 121 | UParseError *parseErr, |
b75a7d8f A |
122 | UErrorCode &status |
123 | ); | |
124 | ||
125 | virtual ~RBBIRuleBuilder(); | |
126 | char *fDebugEnv; // controls debug trace output | |
127 | UErrorCode *fStatus; // Error reporting. Keeping status | |
128 | UParseError *fParseError; // here avoids passing it everywhere. | |
129 | const UnicodeString &fRules; // The rule string that we are compiling | |
130 | ||
131 | RBBIRuleScanner *fScanner; // The scanner. | |
132 | RBBINode *fForwardTree; // The parse trees, generated by the scanner, | |
133 | RBBINode *fReverseTree; // then manipulated by subsequent steps. | |
374ca955 A |
134 | RBBINode *fSafeFwdTree; |
135 | RBBINode *fSafeRevTree; | |
136 | ||
137 | RBBINode **fDefaultTree; // For rules not qualified with a ! | |
138 | // the tree to which they belong to. | |
139 | ||
140 | UBool fChainRules; // True for chained Unicode TR style rules. | |
141 | // False for traditional regexp rules. | |
142 | ||
143 | UBool fLBCMNoChain; // True: suppress chaining of rules on | |
144 | // chars with LineBreak property == CM. | |
145 | ||
146 | UBool fLookAheadHardBreak; // True: Look ahead matches cause an | |
147 | // immediate break, no continuing for the | |
148 | // longest match. | |
b75a7d8f A |
149 | |
150 | RBBISetBuilder *fSetBuilder; // Set and Character Category builder. | |
151 | UVector *fUSetNodes; // Vector of all uset nodes. | |
152 | ||
153 | RBBITableBuilder *fForwardTables; // State transition tables | |
154 | RBBITableBuilder *fReverseTables; | |
374ca955 A |
155 | RBBITableBuilder *fSafeFwdTables; |
156 | RBBITableBuilder *fSafeRevTables; | |
157 | ||
158 | UVector *fRuleStatusVals; // The values that can be returned | |
159 | // from getRuleStatus(). | |
b75a7d8f A |
160 | |
161 | RBBIDataHeader *flattenData(); // Create the flattened (runtime format) | |
162 | // data tables.. | |
163 | private: | |
164 | RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class | |
165 | RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class | |
166 | }; | |
167 | ||
168 | ||
169 | ||
170 | ||
171 | //---------------------------------------------------------------------------- | |
172 | // | |
173 | // RBBISetTableEl is an entry in the hash table of UnicodeSets that have | |
174 | // been encountered. The val Node will be of nodetype uset | |
175 | // and contain pointers to the actual UnicodeSets. | |
176 | // The Key is the source string for initializing the set. | |
177 | // | |
178 | // The hash table is used to avoid creating duplicate | |
179 | // unnamed (not $var references) UnicodeSets. | |
180 | // | |
181 | // Memory Management: | |
182 | // The Hash Table owns these RBBISetTableEl structs and | |
183 | // the key strings. It does NOT own the val nodes. | |
184 | // | |
185 | //---------------------------------------------------------------------------- | |
186 | struct RBBISetTableEl { | |
187 | UnicodeString *key; | |
188 | RBBINode *val; | |
189 | }; | |
190 | ||
191 | ||
192 | //---------------------------------------------------------------------------- | |
193 | // | |
194 | // RBBIDebugPrintf Printf equivalent, for debugging output. | |
195 | // Conditional compilation of the implementation lets us | |
196 | // get rid of the stdio dependency in environments where it | |
197 | // is unavailable. | |
198 | // | |
199 | //---------------------------------------------------------------------------- | |
200 | #ifdef RBBI_DEBUG | |
201 | #include <stdio.h> | |
202 | #define RBBIDebugPrintf printf | |
374ca955 | 203 | #define RBBIDebugPuts puts |
b75a7d8f | 204 | #else |
73c04bcf | 205 | #undef RBBIDebugPrintf |
374ca955 | 206 | #define RBBIDebugPuts(arg) |
b75a7d8f A |
207 | #endif |
208 | ||
209 | U_NAMESPACE_END | |
210 | #endif | |
211 | ||
212 | ||
213 |