]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/rbbirb57.h
ICU-66108.tar.gz
[apple/icu.git] / icuSources / common / rbbirb57.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 /*
5 **********************************************************************
6 * Legacy version of RBBISymbolTable and RBBIRuleBuilder from ICU 57,
7 * only for use by Apple RuleBasedTokenizer
8 **********************************************************************
9 */
10
11 #ifndef RBBIRB57_H
12 #define RBBIRB57_H
13
14 #include "unicode/utypes.h"
15 #include "unicode/uobject.h"
16 #include "unicode/uniset.h"
17 #include "unicode/parseerr.h"
18 #include "uhash.h"
19 #include "uvector.h"
20 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
21 // looks up references to $variables within a set.
22 #include "rbbidata57.h"
23 #include "rbbisetb57.h"
24 #include "rbbirb.h"
25
26
27
28 U_NAMESPACE_BEGIN
29
30 class RBBIRuleScanner57;
31 struct RBBIRuleTableEl;
32 class RBBISetBuilder57;
33 class RBBINode;
34 class RBBITableBuilder57;
35
36
37
38 //--------------------------------------------------------------------------------
39 //
40 // RBBISymbolTable57. Implements SymbolTable interface that is used by the
41 // UnicodeSet parser to resolve references to $variables.
42 //
43 //--------------------------------------------------------------------------------
44 // class RBBISymbolTableEntry - from standard rbbirb.h
45
46
47 class RBBISymbolTable57 : public UMemory, public SymbolTable {
48 private:
49 const UnicodeString &fRules;
50 UHashtable *fHashTable;
51 RBBIRuleScanner57 *fRuleScanner;
52
53 // These next two fields are part of the mechanism for passing references to
54 // already-constructed UnicodeSets back to the UnicodeSet constructor
55 // when the pattern includes $variable references.
56 const UnicodeString ffffString; // = "/uffff"
57 UnicodeSet *fCachedSetLookup;
58
59 public:
60 // API inherited from class SymbolTable
61 virtual const UnicodeString* lookup(const UnicodeString& s) const;
62 virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
63 virtual UnicodeString parseReference(const UnicodeString& text,
64 ParsePosition& pos, int32_t limit) const;
65
66 // Additional Functions
67 RBBISymbolTable57(RBBIRuleScanner57 *, const UnicodeString &fRules, UErrorCode &status);
68 virtual ~RBBISymbolTable57();
69
70 virtual RBBINode *lookupNode(const UnicodeString &key) const;
71 virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
72
73 #ifdef RBBI_DEBUG
74 virtual void rbbiSymtablePrint() const;
75 #else
76 // A do-nothing inline function for non-debug builds. Member funcs can't be empty
77 // or the call sites won't compile.
78 int32_t fFakeField;
79 #define rbbiSymtablePrint() fFakeField=0;
80 #endif
81
82 private:
83 RBBISymbolTable57(const RBBISymbolTable57 &other); // forbid copying of this class
84 RBBISymbolTable57 &operator=(const RBBISymbolTable57 &other); // forbid copying of this class
85 };
86
87
88 //--------------------------------------------------------------------------------
89 //
90 // class RBBIRuleBuilder57 The top-level class handling RBBI rule compiling.
91 //
92 //--------------------------------------------------------------------------------
93 class RBBIRuleBuilder57 : public UMemory {
94 public:
95
96 // Create a rule based break iterator from a set of rules.
97 // This function is the main entry point into the rule builder. The
98 // public ICU API for creating RBBIs uses this function to do the actual work.
99 //
100 static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
101 UParseError *parseError,
102 UErrorCode &status);
103
104 public:
105 // The "public" functions and data members that appear below are accessed
106 // (and shared) by the various parts that make up the rule builder. They
107 // are NOT intended to be accessed by anything outside of the
108 // rule builder implementation.
109 RBBIRuleBuilder57(const UnicodeString &rules,
110 UParseError *parseErr,
111 UErrorCode &status
112 );
113
114 virtual ~RBBIRuleBuilder57();
115 char *fDebugEnv; // controls debug trace output
116 UErrorCode *fStatus; // Error reporting. Keeping status
117 UParseError *fParseError; // here avoids passing it everywhere.
118 const UnicodeString &fRules; // The rule string that we are compiling
119
120 RBBIRuleScanner57 *fScanner; // The scanner.
121 RBBINode *fForwardTree; // The parse trees, generated by the scanner,
122 RBBINode *fReverseTree; // then manipulated by subsequent steps.
123 RBBINode *fSafeFwdTree;
124 RBBINode *fSafeRevTree;
125
126 RBBINode **fDefaultTree; // For rules not qualified with a !
127 // the tree to which they belong to.
128
129 UBool fChainRules; // True for chained Unicode TR style rules.
130 // False for traditional regexp rules.
131
132 UBool fLBCMNoChain; // True: suppress chaining of rules on
133 // chars with LineBreak property == CM.
134
135 UBool fLookAheadHardBreak; // True: Look ahead matches cause an
136 // immediate break, no continuing for the
137 // longest match.
138
139 UBool fRINoChain; // True: suppress chaining of rules on chars
140 // with (grapheme/word/line)break property == RI.
141
142 RBBISetBuilder57 *fSetBuilder; // Set and Character Category builder.
143 UVector *fUSetNodes; // Vector of all uset nodes.
144
145 RBBITableBuilder57 *fForwardTables; // State transition tables
146 RBBITableBuilder57 *fReverseTables;
147 RBBITableBuilder57 *fSafeFwdTables;
148 RBBITableBuilder57 *fSafeRevTables;
149
150 UVector *fRuleStatusVals; // The values that can be returned
151 // from getRuleStatus().
152
153 RBBIDataHeader57 *flattenData(); // Create the flattened (runtime format)
154 // data tables..
155 private:
156 RBBIRuleBuilder57(const RBBIRuleBuilder57 &other); // forbid copying of this class
157 RBBIRuleBuilder57 &operator=(const RBBIRuleBuilder57 &other); // forbid copying of this class
158 };
159
160
161
162
163 // struct RBBISetTableEl - from standard rbbirb.h
164
165 // RBBIDebugPrintf - from standard rbbirb.h
166
167 U_NAMESPACE_END
168 #endif
169
170
171