]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/common/rbbirb57.h
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / common / rbbirb57.h
... / ...
CommitLineData
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3//
4/*
5**********************************************************************
6* Legacy version of RBBISymbolTable and RBBIRuleBuilder from ICU 57,
7* only for use by Apple RuleBasedTokenizer
8**********************************************************************
9*/
10
11#ifndef RBBIRB57_H
12#define RBBIRB57_H
13
14#include "unicode/utypes.h"
15#include "unicode/uobject.h"
16#include "unicode/uniset.h"
17#include "unicode/parseerr.h"
18#include "uhash.h"
19#include "uvector.h"
20#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
21 // looks up references to $variables within a set.
22#include "rbbidata57.h"
23#include "rbbisetb57.h"
24#include "rbbirb.h"
25
26
27
28U_NAMESPACE_BEGIN
29
30class RBBIRuleScanner57;
31struct RBBIRuleTableEl;
32class RBBISetBuilder57;
33class RBBINode;
34class RBBITableBuilder57;
35
36
37
38//--------------------------------------------------------------------------------
39//
40// RBBISymbolTable57. Implements SymbolTable interface that is used by the
41// UnicodeSet parser to resolve references to $variables.
42//
43//--------------------------------------------------------------------------------
44// class RBBISymbolTableEntry - from standard rbbirb.h
45
46
47class RBBISymbolTable57 : public UMemory, public SymbolTable {
48private:
49 const UnicodeString &fRules;
50 UHashtable *fHashTable;
51 RBBIRuleScanner57 *fRuleScanner;
52
53 // These next two fields are part of the mechanism for passing references to
54 // already-constructed UnicodeSets back to the UnicodeSet constructor
55 // when the pattern includes $variable references.
56 const UnicodeString ffffString; // = "/uffff"
57 UnicodeSet *fCachedSetLookup;
58
59public:
60 // API inherited from class SymbolTable
61 virtual const UnicodeString* lookup(const UnicodeString& s) const;
62 virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
63 virtual UnicodeString parseReference(const UnicodeString& text,
64 ParsePosition& pos, int32_t limit) const;
65
66 // Additional Functions
67 RBBISymbolTable57(RBBIRuleScanner57 *, const UnicodeString &fRules, UErrorCode &status);
68 virtual ~RBBISymbolTable57();
69
70 virtual RBBINode *lookupNode(const UnicodeString &key) const;
71 virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
72
73#ifdef RBBI_DEBUG
74 virtual void rbbiSymtablePrint() const;
75#else
76 // A do-nothing inline function for non-debug builds. Member funcs can't be empty
77 // or the call sites won't compile.
78 int32_t fFakeField;
79 #define rbbiSymtablePrint() fFakeField=0;
80#endif
81
82private:
83 RBBISymbolTable57(const RBBISymbolTable57 &other); // forbid copying of this class
84 RBBISymbolTable57 &operator=(const RBBISymbolTable57 &other); // forbid copying of this class
85};
86
87
88//--------------------------------------------------------------------------------
89//
90// class RBBIRuleBuilder57 The top-level class handling RBBI rule compiling.
91//
92//--------------------------------------------------------------------------------
93class RBBIRuleBuilder57 : public UMemory {
94public:
95
96 // Create a rule based break iterator from a set of rules.
97 // This function is the main entry point into the rule builder. The
98 // public ICU API for creating RBBIs uses this function to do the actual work.
99 //
100 static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
101 UParseError *parseError,
102 UErrorCode &status);
103
104public:
105 // The "public" functions and data members that appear below are accessed
106 // (and shared) by the various parts that make up the rule builder. They
107 // are NOT intended to be accessed by anything outside of the
108 // rule builder implementation.
109 RBBIRuleBuilder57(const UnicodeString &rules,
110 UParseError *parseErr,
111 UErrorCode &status
112 );
113
114 virtual ~RBBIRuleBuilder57();
115 char *fDebugEnv; // controls debug trace output
116 UErrorCode *fStatus; // Error reporting. Keeping status
117 UParseError *fParseError; // here avoids passing it everywhere.
118 const UnicodeString &fRules; // The rule string that we are compiling
119
120 RBBIRuleScanner57 *fScanner; // The scanner.
121 RBBINode *fForwardTree; // The parse trees, generated by the scanner,
122 RBBINode *fReverseTree; // then manipulated by subsequent steps.
123 RBBINode *fSafeFwdTree;
124 RBBINode *fSafeRevTree;
125
126 RBBINode **fDefaultTree; // For rules not qualified with a !
127 // the tree to which they belong to.
128
129 UBool fChainRules; // True for chained Unicode TR style rules.
130 // False for traditional regexp rules.
131
132 UBool fLBCMNoChain; // True: suppress chaining of rules on
133 // chars with LineBreak property == CM.
134
135 UBool fLookAheadHardBreak; // True: Look ahead matches cause an
136 // immediate break, no continuing for the
137 // longest match.
138
139 UBool fRINoChain; // True: suppress chaining of rules on chars
140 // with (grapheme/word/line)break property == RI.
141
142 RBBISetBuilder57 *fSetBuilder; // Set and Character Category builder.
143 UVector *fUSetNodes; // Vector of all uset nodes.
144
145 RBBITableBuilder57 *fForwardTables; // State transition tables
146 RBBITableBuilder57 *fReverseTables;
147 RBBITableBuilder57 *fSafeFwdTables;
148 RBBITableBuilder57 *fSafeRevTables;
149
150 UVector *fRuleStatusVals; // The values that can be returned
151 // from getRuleStatus().
152
153 RBBIDataHeader57 *flattenData(); // Create the flattened (runtime format)
154 // data tables..
155private:
156 RBBIRuleBuilder57(const RBBIRuleBuilder57 &other); // forbid copying of this class
157 RBBIRuleBuilder57 &operator=(const RBBIRuleBuilder57 &other); // forbid copying of this class
158};
159
160
161
162
163// struct RBBISetTableEl - from standard rbbirb.h
164
165// RBBIDebugPrintf - from standard rbbirb.h
166
167U_NAMESPACE_END
168#endif
169
170
171