1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
5 **********************************************************************
6 * Legacy version of RBBISymbolTable and RBBIRuleBuilder from ICU 57,
7 * only for use by Apple RuleBasedTokenizer
8 **********************************************************************
14 #include "unicode/utypes.h"
15 #include "unicode/uobject.h"
16 #include "unicode/uniset.h"
17 #include "unicode/parseerr.h"
20 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
21 // looks up references to $variables within a set.
22 #include "rbbidata57.h"
23 #include "rbbisetb57.h"
30 class RBBIRuleScanner57
;
31 struct RBBIRuleTableEl
;
32 class RBBISetBuilder57
;
34 class RBBITableBuilder57
;
38 //--------------------------------------------------------------------------------
40 // RBBISymbolTable57. Implements SymbolTable interface that is used by the
41 // UnicodeSet parser to resolve references to $variables.
43 //--------------------------------------------------------------------------------
44 // class RBBISymbolTableEntry - from standard rbbirb.h
47 class RBBISymbolTable57
: public UMemory
, public SymbolTable
{
49 const UnicodeString
&fRules
;
50 UHashtable
*fHashTable
;
51 RBBIRuleScanner57
*fRuleScanner
;
53 // These next two fields are part of the mechanism for passing references to
54 // already-constructed UnicodeSets back to the UnicodeSet constructor
55 // when the pattern includes $variable references.
56 const UnicodeString ffffString
; // = "/uffff"
57 UnicodeSet
*fCachedSetLookup
;
60 // API inherited from class SymbolTable
61 virtual const UnicodeString
* lookup(const UnicodeString
& s
) const;
62 virtual const UnicodeFunctor
* lookupMatcher(UChar32 ch
) const;
63 virtual UnicodeString
parseReference(const UnicodeString
& text
,
64 ParsePosition
& pos
, int32_t limit
) const;
66 // Additional Functions
67 RBBISymbolTable57(RBBIRuleScanner57
*, const UnicodeString
&fRules
, UErrorCode
&status
);
68 virtual ~RBBISymbolTable57();
70 virtual RBBINode
*lookupNode(const UnicodeString
&key
) const;
71 virtual void addEntry (const UnicodeString
&key
, RBBINode
*val
, UErrorCode
&err
);
74 virtual void rbbiSymtablePrint() const;
76 // A do-nothing inline function for non-debug builds. Member funcs can't be empty
77 // or the call sites won't compile.
79 #define rbbiSymtablePrint() fFakeField=0;
83 RBBISymbolTable57(const RBBISymbolTable57
&other
); // forbid copying of this class
84 RBBISymbolTable57
&operator=(const RBBISymbolTable57
&other
); // forbid copying of this class
88 //--------------------------------------------------------------------------------
90 // class RBBIRuleBuilder57 The top-level class handling RBBI rule compiling.
92 //--------------------------------------------------------------------------------
93 class RBBIRuleBuilder57
: public UMemory
{
96 // Create a rule based break iterator from a set of rules.
97 // This function is the main entry point into the rule builder. The
98 // public ICU API for creating RBBIs uses this function to do the actual work.
100 static BreakIterator
* createRuleBasedBreakIterator( const UnicodeString
&rules
,
101 UParseError
*parseError
,
105 // The "public" functions and data members that appear below are accessed
106 // (and shared) by the various parts that make up the rule builder. They
107 // are NOT intended to be accessed by anything outside of the
108 // rule builder implementation.
109 RBBIRuleBuilder57(const UnicodeString
&rules
,
110 UParseError
*parseErr
,
114 virtual ~RBBIRuleBuilder57();
115 char *fDebugEnv
; // controls debug trace output
116 UErrorCode
*fStatus
; // Error reporting. Keeping status
117 UParseError
*fParseError
; // here avoids passing it everywhere.
118 const UnicodeString
&fRules
; // The rule string that we are compiling
120 RBBIRuleScanner57
*fScanner
; // The scanner.
121 RBBINode
*fForwardTree
; // The parse trees, generated by the scanner,
122 RBBINode
*fReverseTree
; // then manipulated by subsequent steps.
123 RBBINode
*fSafeFwdTree
;
124 RBBINode
*fSafeRevTree
;
126 RBBINode
**fDefaultTree
; // For rules not qualified with a !
127 // the tree to which they belong to.
129 UBool fChainRules
; // True for chained Unicode TR style rules.
130 // False for traditional regexp rules.
132 UBool fLBCMNoChain
; // True: suppress chaining of rules on
133 // chars with LineBreak property == CM.
135 UBool fLookAheadHardBreak
; // True: Look ahead matches cause an
136 // immediate break, no continuing for the
139 UBool fRINoChain
; // True: suppress chaining of rules on chars
140 // with (grapheme/word/line)break property == RI.
142 RBBISetBuilder57
*fSetBuilder
; // Set and Character Category builder.
143 UVector
*fUSetNodes
; // Vector of all uset nodes.
145 RBBITableBuilder57
*fForwardTables
; // State transition tables
146 RBBITableBuilder57
*fReverseTables
;
147 RBBITableBuilder57
*fSafeFwdTables
;
148 RBBITableBuilder57
*fSafeRevTables
;
150 UVector
*fRuleStatusVals
; // The values that can be returned
151 // from getRuleStatus().
153 RBBIDataHeader57
*flattenData(); // Create the flattened (runtime format)
156 RBBIRuleBuilder57(const RBBIRuleBuilder57
&other
); // forbid copying of this class
157 RBBIRuleBuilder57
&operator=(const RBBIRuleBuilder57
&other
); // forbid copying of this class
163 // struct RBBISetTableEl - from standard rbbirb.h
165 // RBBIDebugPrintf - from standard rbbirb.h