]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/rbbirb.h
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / common / rbbirb.h
1 //
2 // rbbirb.h
3 //
4 // Copyright (C) 2002, International Business Machines Corporation and others.
5 // All Rights Reserved.
6 //
7 // This file contains declarations for several from the Rule Based Break Iterator rule builder.
8 //
9
10
11 #ifndef RBBIRB_H
12 #define RBBIRB_H
13
14 #include "unicode/utypes.h"
15 #include "unicode/uobject.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/uniset.h"
18 #include "unicode/parseerr.h"
19 #include "uhash.h"
20 #include "uvector.h"
21 #include "symtable.h" // For UnicodeSet parsing, is the interface that
22 // looks up references to $variables within a set.
23
24
25
26 U_NAMESPACE_BEGIN
27
28 class RBBIRuleScanner;
29 struct RBBIRuleTableEl;
30 class RBBISetBuilder;
31 class RBBINode;
32 class RBBITableBuilder;
33
34
35
36 //--------------------------------------------------------------------------------
37 //
38 // RBBISymbolTable. Implements SymbolTable interface that is used by the
39 // UnicodeSet parser to resolve references to $variables.
40 //
41 //--------------------------------------------------------------------------------
42 class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
43 public: // of these structs for each entry.
44 RBBISymbolTableEntry();
45 UnicodeString key;
46 RBBINode *val;
47 ~RBBISymbolTableEntry();
48
49 private:
50 RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
51 RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
52 };
53
54
55 class RBBISymbolTable : public UMemory, public SymbolTable {
56 private:
57 const UnicodeString &fRules;
58 UHashtable *fHashTable;
59 RBBIRuleScanner *fRuleScanner;
60
61 // These next two fields are part of the mechanism for passing references to
62 // already-constructed UnicodeSets back to the UnicodeSet constructor
63 // when the pattern includes $variable references.
64 const UnicodeString ffffString; // = "/uffff"
65 UnicodeSet *fCachedSetLookup;
66
67 public:
68 // API inherited from class SymbolTable
69 virtual const UnicodeString* lookup(const UnicodeString& s) const;
70 virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
71 virtual UnicodeString parseReference(const UnicodeString& text,
72 ParsePosition& pos, int32_t limit) const;
73
74 // Additional Functions
75 RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
76 virtual ~RBBISymbolTable();
77
78 virtual RBBINode *lookupNode(const UnicodeString &key) const;
79 virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
80
81 virtual void print() const;
82
83 private:
84 RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
85 RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
86 };
87
88
89 //--------------------------------------------------------------------------------
90 //
91 // class RBBIRuleBuilder The top-level class handling RBBI rule compiling.
92 //
93 //--------------------------------------------------------------------------------
94 class RBBIRuleBuilder : public UMemory {
95 public:
96
97 // Create a rule based break iterator from a set of rules.
98 // This function is the main entry point into the rule builder. The
99 // public ICU API for creating RBBIs uses this function to do the actual work.
100 //
101 static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
102 UParseError &parseError,
103 UErrorCode &status);
104
105 public:
106 // The "public" functions and data members that appear below are accessed
107 // (and shared) by the various parts that make up the rule builder. They
108 // are NOT intended to be accessed by anything outside of the
109 // rule builder implementation.
110 RBBIRuleBuilder(const UnicodeString &rules,
111 UParseError &parseErr,
112 UErrorCode &status
113 );
114
115 virtual ~RBBIRuleBuilder();
116 char *fDebugEnv; // controls debug trace output
117 UErrorCode *fStatus; // Error reporting. Keeping status
118 UParseError *fParseError; // here avoids passing it everywhere.
119 const UnicodeString &fRules; // The rule string that we are compiling
120
121 RBBIRuleScanner *fScanner; // The scanner.
122 RBBINode *fForwardTree; // The parse trees, generated by the scanner,
123 RBBINode *fReverseTree; // then manipulated by subsequent steps.
124
125 RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
126 UVector *fUSetNodes; // Vector of all uset nodes.
127
128 RBBITableBuilder *fForwardTables; // State transition tables
129 RBBITableBuilder *fReverseTables;
130
131 RBBIDataHeader *flattenData(); // Create the flattened (runtime format)
132 // data tables..
133 private:
134 RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
135 RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
136 };
137
138
139
140
141 //----------------------------------------------------------------------------
142 //
143 // RBBISetTableEl is an entry in the hash table of UnicodeSets that have
144 // been encountered. The val Node will be of nodetype uset
145 // and contain pointers to the actual UnicodeSets.
146 // The Key is the source string for initializing the set.
147 //
148 // The hash table is used to avoid creating duplicate
149 // unnamed (not $var references) UnicodeSets.
150 //
151 // Memory Management:
152 // The Hash Table owns these RBBISetTableEl structs and
153 // the key strings. It does NOT own the val nodes.
154 //
155 //----------------------------------------------------------------------------
156 struct RBBISetTableEl {
157 UnicodeString *key;
158 RBBINode *val;
159 };
160
161
162 //----------------------------------------------------------------------------
163 //
164 // RBBIDebugPrintf Printf equivalent, for debugging output.
165 // Conditional compilation of the implementation lets us
166 // get rid of the stdio dependency in environments where it
167 // is unavailable.
168 //
169 //----------------------------------------------------------------------------
170 #ifdef RBBI_DEBUG
171 #include <stdio.h>
172 #define RBBIDebugPrintf printf
173 #else
174 inline void RBBIDebugPrintf(...) {}
175 #endif
176
177 U_NAMESPACE_END
178 #endif
179
180
181