]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/rbbirb.h
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / common / rbbirb.h
CommitLineData
b75a7d8f
A
1//
2// rbbirb.h
3//
374ca955 4// Copyright (C) 2002-2004, International Business Machines Corporation and others.
b75a7d8f
A
5// All Rights Reserved.
6//
374ca955
A
7// This file contains declarations for several classes from the
8// Rule Based Break Iterator rule builder.
b75a7d8f
A
9//
10
11
12#ifndef RBBIRB_H
13#define RBBIRB_H
14
15#include "unicode/utypes.h"
16#include "unicode/uobject.h"
17#include "unicode/rbbi.h"
18#include "unicode/uniset.h"
19#include "unicode/parseerr.h"
20#include "uhash.h"
21#include "uvector.h"
374ca955 22#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
b75a7d8f
A
23 // looks up references to $variables within a set.
24
25
26
27U_NAMESPACE_BEGIN
28
29class RBBIRuleScanner;
30struct RBBIRuleTableEl;
31class RBBISetBuilder;
32class RBBINode;
33class RBBITableBuilder;
34
35
36
37//--------------------------------------------------------------------------------
38//
39// RBBISymbolTable. Implements SymbolTable interface that is used by the
40// UnicodeSet parser to resolve references to $variables.
41//
42//--------------------------------------------------------------------------------
43class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
44public: // of these structs for each entry.
45 RBBISymbolTableEntry();
46 UnicodeString key;
47 RBBINode *val;
48 ~RBBISymbolTableEntry();
49
50private:
51 RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
52 RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
53};
54
55
56class RBBISymbolTable : public UMemory, public SymbolTable {
57private:
58 const UnicodeString &fRules;
59 UHashtable *fHashTable;
60 RBBIRuleScanner *fRuleScanner;
61
62 // These next two fields are part of the mechanism for passing references to
63 // already-constructed UnicodeSets back to the UnicodeSet constructor
64 // when the pattern includes $variable references.
65 const UnicodeString ffffString; // = "/uffff"
66 UnicodeSet *fCachedSetLookup;
67
68public:
69 // API inherited from class SymbolTable
70 virtual const UnicodeString* lookup(const UnicodeString& s) const;
71 virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
72 virtual UnicodeString parseReference(const UnicodeString& text,
73 ParsePosition& pos, int32_t limit) const;
74
75 // Additional Functions
76 RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
77 virtual ~RBBISymbolTable();
78
79 virtual RBBINode *lookupNode(const UnicodeString &key) const;
80 virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
81
374ca955
A
82#ifdef RBBI_DEBUG
83 virtual void rbbiSymtablePrint() const;
84#else
85 // A do-nothing inline function for non-debug builds. Member funcs can't be empty
86 // or the call sites won't compile.
87 int fFakeField;
88 #define rbbiSymtablePrint() fFakeField=0;
89#endif
b75a7d8f
A
90
91private:
92 RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
93 RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
94};
95
96
97//--------------------------------------------------------------------------------
98//
99// class RBBIRuleBuilder The top-level class handling RBBI rule compiling.
100//
101//--------------------------------------------------------------------------------
102class RBBIRuleBuilder : public UMemory {
103public:
104
105 // Create a rule based break iterator from a set of rules.
106 // This function is the main entry point into the rule builder. The
107 // public ICU API for creating RBBIs uses this function to do the actual work.
108 //
109 static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
110 UParseError &parseError,
111 UErrorCode &status);
112
113public:
114 // The "public" functions and data members that appear below are accessed
115 // (and shared) by the various parts that make up the rule builder. They
116 // are NOT intended to be accessed by anything outside of the
117 // rule builder implementation.
118 RBBIRuleBuilder(const UnicodeString &rules,
119 UParseError &parseErr,
120 UErrorCode &status
121 );
122
123 virtual ~RBBIRuleBuilder();
124 char *fDebugEnv; // controls debug trace output
125 UErrorCode *fStatus; // Error reporting. Keeping status
126 UParseError *fParseError; // here avoids passing it everywhere.
127 const UnicodeString &fRules; // The rule string that we are compiling
128
129 RBBIRuleScanner *fScanner; // The scanner.
130 RBBINode *fForwardTree; // The parse trees, generated by the scanner,
131 RBBINode *fReverseTree; // then manipulated by subsequent steps.
374ca955
A
132 RBBINode *fSafeFwdTree;
133 RBBINode *fSafeRevTree;
134
135 RBBINode **fDefaultTree; // For rules not qualified with a !
136 // the tree to which they belong to.
137
138 UBool fChainRules; // True for chained Unicode TR style rules.
139 // False for traditional regexp rules.
140
141 UBool fLBCMNoChain; // True: suppress chaining of rules on
142 // chars with LineBreak property == CM.
143
144 UBool fLookAheadHardBreak; // True: Look ahead matches cause an
145 // immediate break, no continuing for the
146 // longest match.
b75a7d8f
A
147
148 RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
149 UVector *fUSetNodes; // Vector of all uset nodes.
150
151 RBBITableBuilder *fForwardTables; // State transition tables
152 RBBITableBuilder *fReverseTables;
374ca955
A
153 RBBITableBuilder *fSafeFwdTables;
154 RBBITableBuilder *fSafeRevTables;
155
156 UVector *fRuleStatusVals; // The values that can be returned
157 // from getRuleStatus().
b75a7d8f
A
158
159 RBBIDataHeader *flattenData(); // Create the flattened (runtime format)
160 // data tables..
161private:
162 RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
163 RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
164};
165
166
167
168
169//----------------------------------------------------------------------------
170//
171// RBBISetTableEl is an entry in the hash table of UnicodeSets that have
172// been encountered. The val Node will be of nodetype uset
173// and contain pointers to the actual UnicodeSets.
174// The Key is the source string for initializing the set.
175//
176// The hash table is used to avoid creating duplicate
177// unnamed (not $var references) UnicodeSets.
178//
179// Memory Management:
180// The Hash Table owns these RBBISetTableEl structs and
181// the key strings. It does NOT own the val nodes.
182//
183//----------------------------------------------------------------------------
184struct RBBISetTableEl {
185 UnicodeString *key;
186 RBBINode *val;
187};
188
189
190//----------------------------------------------------------------------------
191//
192// RBBIDebugPrintf Printf equivalent, for debugging output.
193// Conditional compilation of the implementation lets us
194// get rid of the stdio dependency in environments where it
195// is unavailable.
196//
197//----------------------------------------------------------------------------
198#ifdef RBBI_DEBUG
199#include <stdio.h>
200#define RBBIDebugPrintf printf
374ca955 201#define RBBIDebugPuts puts
b75a7d8f
A
202#else
203inline void RBBIDebugPrintf(...) {}
374ca955 204#define RBBIDebugPuts(arg)
b75a7d8f
A
205#endif
206
207U_NAMESPACE_END
208#endif
209
210
211