]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/rbbirb.h
ICU-59131.0.1.tar.gz
[apple/icu.git] / icuSources / common / rbbirb.h
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3//
4// rbbirb.h
5//
46f4442e 6// Copyright (C) 2002-2008, International Business Machines Corporation and others.
b75a7d8f
A
7// All Rights Reserved.
8//
374ca955
A
9// This file contains declarations for several classes from the
10// Rule Based Break Iterator rule builder.
b75a7d8f
A
11//
12
13
14#ifndef RBBIRB_H
15#define RBBIRB_H
16
17#include "unicode/utypes.h"
18#include "unicode/uobject.h"
19#include "unicode/rbbi.h"
20#include "unicode/uniset.h"
21#include "unicode/parseerr.h"
22#include "uhash.h"
23#include "uvector.h"
374ca955 24#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
b75a7d8f
A
25 // looks up references to $variables within a set.
26
27
28
29U_NAMESPACE_BEGIN
30
31class RBBIRuleScanner;
32struct RBBIRuleTableEl;
33class RBBISetBuilder;
34class RBBINode;
35class RBBITableBuilder;
36
37
38
39//--------------------------------------------------------------------------------
40//
41// RBBISymbolTable. Implements SymbolTable interface that is used by the
42// UnicodeSet parser to resolve references to $variables.
43//
44//--------------------------------------------------------------------------------
45class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
46public: // of these structs for each entry.
47 RBBISymbolTableEntry();
48 UnicodeString key;
49 RBBINode *val;
50 ~RBBISymbolTableEntry();
51
52private:
53 RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
54 RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
55};
56
57
58class RBBISymbolTable : public UMemory, public SymbolTable {
59private:
60 const UnicodeString &fRules;
61 UHashtable *fHashTable;
62 RBBIRuleScanner *fRuleScanner;
63
64 // These next two fields are part of the mechanism for passing references to
65 // already-constructed UnicodeSets back to the UnicodeSet constructor
66 // when the pattern includes $variable references.
67 const UnicodeString ffffString; // = "/uffff"
68 UnicodeSet *fCachedSetLookup;
69
70public:
71 // API inherited from class SymbolTable
72 virtual const UnicodeString* lookup(const UnicodeString& s) const;
73 virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
74 virtual UnicodeString parseReference(const UnicodeString& text,
75 ParsePosition& pos, int32_t limit) const;
76
77 // Additional Functions
78 RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
79 virtual ~RBBISymbolTable();
80
81 virtual RBBINode *lookupNode(const UnicodeString &key) const;
82 virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
83
374ca955
A
84#ifdef RBBI_DEBUG
85 virtual void rbbiSymtablePrint() const;
86#else
87 // A do-nothing inline function for non-debug builds. Member funcs can't be empty
88 // or the call sites won't compile.
73c04bcf 89 int32_t fFakeField;
374ca955
A
90 #define rbbiSymtablePrint() fFakeField=0;
91#endif
b75a7d8f
A
92
93private:
94 RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
95 RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
96};
97
98
99//--------------------------------------------------------------------------------
100//
101// class RBBIRuleBuilder The top-level class handling RBBI rule compiling.
102//
103//--------------------------------------------------------------------------------
104class RBBIRuleBuilder : public UMemory {
105public:
106
107 // Create a rule based break iterator from a set of rules.
108 // This function is the main entry point into the rule builder. The
109 // public ICU API for creating RBBIs uses this function to do the actual work.
110 //
111 static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
46f4442e 112 UParseError *parseError,
b75a7d8f
A
113 UErrorCode &status);
114
115public:
116 // The "public" functions and data members that appear below are accessed
117 // (and shared) by the various parts that make up the rule builder. They
118 // are NOT intended to be accessed by anything outside of the
119 // rule builder implementation.
120 RBBIRuleBuilder(const UnicodeString &rules,
46f4442e 121 UParseError *parseErr,
b75a7d8f
A
122 UErrorCode &status
123 );
124
125 virtual ~RBBIRuleBuilder();
126 char *fDebugEnv; // controls debug trace output
127 UErrorCode *fStatus; // Error reporting. Keeping status
128 UParseError *fParseError; // here avoids passing it everywhere.
129 const UnicodeString &fRules; // The rule string that we are compiling
130
131 RBBIRuleScanner *fScanner; // The scanner.
132 RBBINode *fForwardTree; // The parse trees, generated by the scanner,
133 RBBINode *fReverseTree; // then manipulated by subsequent steps.
374ca955
A
134 RBBINode *fSafeFwdTree;
135 RBBINode *fSafeRevTree;
136
137 RBBINode **fDefaultTree; // For rules not qualified with a !
138 // the tree to which they belong to.
139
140 UBool fChainRules; // True for chained Unicode TR style rules.
141 // False for traditional regexp rules.
142
143 UBool fLBCMNoChain; // True: suppress chaining of rules on
144 // chars with LineBreak property == CM.
145
146 UBool fLookAheadHardBreak; // True: Look ahead matches cause an
147 // immediate break, no continuing for the
148 // longest match.
b75a7d8f
A
149
150 RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
151 UVector *fUSetNodes; // Vector of all uset nodes.
152
153 RBBITableBuilder *fForwardTables; // State transition tables
154 RBBITableBuilder *fReverseTables;
374ca955
A
155 RBBITableBuilder *fSafeFwdTables;
156 RBBITableBuilder *fSafeRevTables;
157
158 UVector *fRuleStatusVals; // The values that can be returned
159 // from getRuleStatus().
b75a7d8f
A
160
161 RBBIDataHeader *flattenData(); // Create the flattened (runtime format)
162 // data tables..
163private:
164 RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
165 RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
166};
167
168
169
170
171//----------------------------------------------------------------------------
172//
173// RBBISetTableEl is an entry in the hash table of UnicodeSets that have
174// been encountered. The val Node will be of nodetype uset
175// and contain pointers to the actual UnicodeSets.
176// The Key is the source string for initializing the set.
177//
178// The hash table is used to avoid creating duplicate
179// unnamed (not $var references) UnicodeSets.
180//
181// Memory Management:
182// The Hash Table owns these RBBISetTableEl structs and
183// the key strings. It does NOT own the val nodes.
184//
185//----------------------------------------------------------------------------
186struct RBBISetTableEl {
187 UnicodeString *key;
188 RBBINode *val;
189};
190
191
192//----------------------------------------------------------------------------
193//
194// RBBIDebugPrintf Printf equivalent, for debugging output.
195// Conditional compilation of the implementation lets us
196// get rid of the stdio dependency in environments where it
197// is unavailable.
198//
199//----------------------------------------------------------------------------
200#ifdef RBBI_DEBUG
201#include <stdio.h>
202#define RBBIDebugPrintf printf
374ca955 203#define RBBIDebugPuts puts
b75a7d8f 204#else
73c04bcf 205#undef RBBIDebugPrintf
374ca955 206#define RBBIDebugPuts(arg)
b75a7d8f
A
207#endif
208
209U_NAMESPACE_END
210#endif
211
212
213