]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/rbbirb.h
4 // Copyright (C) 2002, International Business Machines Corporation and others.
5 // All Rights Reserved.
7 // This file contains declarations for several from the Rule Based Break Iterator rule builder.
14 #include "unicode/utypes.h"
15 #include "unicode/uobject.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/uniset.h"
18 #include "unicode/parseerr.h"
21 #include "symtable.h" // For UnicodeSet parsing, is the interface that
22 // looks up references to $variables within a set.
28 class RBBIRuleScanner
;
29 struct RBBIRuleTableEl
;
32 class RBBITableBuilder
;
36 //--------------------------------------------------------------------------------
38 // RBBISymbolTable. Implements SymbolTable interface that is used by the
39 // UnicodeSet parser to resolve references to $variables.
41 //--------------------------------------------------------------------------------
42 class RBBISymbolTableEntry
: public UMemory
{ // The symbol table hash table contains one
43 public: // of these structs for each entry.
44 RBBISymbolTableEntry();
47 ~RBBISymbolTableEntry();
50 RBBISymbolTableEntry(const RBBISymbolTableEntry
&other
); // forbid copying of this class
51 RBBISymbolTableEntry
&operator=(const RBBISymbolTableEntry
&other
); // forbid copying of this class
55 class RBBISymbolTable
: public UMemory
, public SymbolTable
{
57 const UnicodeString
&fRules
;
58 UHashtable
*fHashTable
;
59 RBBIRuleScanner
*fRuleScanner
;
61 // These next two fields are part of the mechanism for passing references to
62 // already-constructed UnicodeSets back to the UnicodeSet constructor
63 // when the pattern includes $variable references.
64 const UnicodeString ffffString
; // = "/uffff"
65 UnicodeSet
*fCachedSetLookup
;
68 // API inherited from class SymbolTable
69 virtual const UnicodeString
* lookup(const UnicodeString
& s
) const;
70 virtual const UnicodeFunctor
* lookupMatcher(UChar32 ch
) const;
71 virtual UnicodeString
parseReference(const UnicodeString
& text
,
72 ParsePosition
& pos
, int32_t limit
) const;
74 // Additional Functions
75 RBBISymbolTable(RBBIRuleScanner
*, const UnicodeString
&fRules
, UErrorCode
&status
);
76 virtual ~RBBISymbolTable();
78 virtual RBBINode
*lookupNode(const UnicodeString
&key
) const;
79 virtual void addEntry (const UnicodeString
&key
, RBBINode
*val
, UErrorCode
&err
);
81 virtual void print() const;
84 RBBISymbolTable(const RBBISymbolTable
&other
); // forbid copying of this class
85 RBBISymbolTable
&operator=(const RBBISymbolTable
&other
); // forbid copying of this class
89 //--------------------------------------------------------------------------------
91 // class RBBIRuleBuilder The top-level class handling RBBI rule compiling.
93 //--------------------------------------------------------------------------------
94 class RBBIRuleBuilder
: public UMemory
{
97 // Create a rule based break iterator from a set of rules.
98 // This function is the main entry point into the rule builder. The
99 // public ICU API for creating RBBIs uses this function to do the actual work.
101 static BreakIterator
* createRuleBasedBreakIterator( const UnicodeString
&rules
,
102 UParseError
&parseError
,
106 // The "public" functions and data members that appear below are accessed
107 // (and shared) by the various parts that make up the rule builder. They
108 // are NOT intended to be accessed by anything outside of the
109 // rule builder implementation.
110 RBBIRuleBuilder(const UnicodeString
&rules
,
111 UParseError
&parseErr
,
115 virtual ~RBBIRuleBuilder();
116 char *fDebugEnv
; // controls debug trace output
117 UErrorCode
*fStatus
; // Error reporting. Keeping status
118 UParseError
*fParseError
; // here avoids passing it everywhere.
119 const UnicodeString
&fRules
; // The rule string that we are compiling
121 RBBIRuleScanner
*fScanner
; // The scanner.
122 RBBINode
*fForwardTree
; // The parse trees, generated by the scanner,
123 RBBINode
*fReverseTree
; // then manipulated by subsequent steps.
125 RBBISetBuilder
*fSetBuilder
; // Set and Character Category builder.
126 UVector
*fUSetNodes
; // Vector of all uset nodes.
128 RBBITableBuilder
*fForwardTables
; // State transition tables
129 RBBITableBuilder
*fReverseTables
;
131 RBBIDataHeader
*flattenData(); // Create the flattened (runtime format)
134 RBBIRuleBuilder(const RBBIRuleBuilder
&other
); // forbid copying of this class
135 RBBIRuleBuilder
&operator=(const RBBIRuleBuilder
&other
); // forbid copying of this class
141 //----------------------------------------------------------------------------
143 // RBBISetTableEl is an entry in the hash table of UnicodeSets that have
144 // been encountered. The val Node will be of nodetype uset
145 // and contain pointers to the actual UnicodeSets.
146 // The Key is the source string for initializing the set.
148 // The hash table is used to avoid creating duplicate
149 // unnamed (not $var references) UnicodeSets.
151 // Memory Management:
152 // The Hash Table owns these RBBISetTableEl structs and
153 // the key strings. It does NOT own the val nodes.
155 //----------------------------------------------------------------------------
156 struct RBBISetTableEl
{
162 //----------------------------------------------------------------------------
164 // RBBIDebugPrintf Printf equivalent, for debugging output.
165 // Conditional compilation of the implementation lets us
166 // get rid of the stdio dependency in environments where it
169 //----------------------------------------------------------------------------
172 #define RBBIDebugPrintf printf
174 inline void RBBIDebugPrintf(...) {}