2 // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class
5 ***************************************************************************
6 * Copyright (C) 2002-2005 International Business Machines Corporation *
7 * and others. All rights reserved. *
8 ***************************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_BREAK_ITERATION
15 #include "unicode/unistr.h"
16 #include "unicode/uniset.h"
17 #include "unicode/uchar.h"
18 #include "unicode/parsepos.h"
27 // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
28 // when the hash table is deleted.
31 static void U_CALLCONV
RBBISymbolTableEntry_deleter(void *p
) {
32 RBBISymbolTableEntry
*px
= (RBBISymbolTableEntry
*)p
;
41 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner
*rs
, const UnicodeString
&rules
, UErrorCode
&status
)
42 :fRules(rules
), fRuleScanner(rs
), ffffString(UChar(0xffff))
45 fCachedSetLookup
= NULL
;
47 fHashTable
= uhash_open(uhash_hashUnicodeString
, uhash_compareUnicodeString
, NULL
, &status
);
48 // uhash_open checks status
49 if (U_FAILURE(status
)) {
52 uhash_setValueDeleter(fHashTable
, RBBISymbolTableEntry_deleter
);
57 RBBISymbolTable::~RBBISymbolTable()
59 uhash_close(fHashTable
);
64 // RBBISymbolTable::lookup This function from the abstract symbol table inteface
65 // looks up a variable name and returns a UnicodeString
66 // containing the substitution text.
68 // The variable name does NOT include the leading $.
70 const UnicodeString
*RBBISymbolTable::lookup(const UnicodeString
& s
) const
72 RBBISymbolTableEntry
*el
;
76 const UnicodeString
*retString
;
77 RBBISymbolTable
*This
= (RBBISymbolTable
*)this; // cast off const
79 el
= (RBBISymbolTableEntry
*)uhash_get(fHashTable
, &s
);
85 exprNode
= varRefNode
->fLeftChild
; // Root node of expression for variable
86 if (exprNode
->fType
== RBBINode::setRef
) {
87 // The $variable refers to a single UnicodeSet
88 // return the ffffString, which will subsequently be interpreted as a
89 // stand-in character for the set by RBBISymbolTable::lookupMatcher()
90 usetNode
= exprNode
->fLeftChild
;
91 This
->fCachedSetLookup
= usetNode
->fInputSet
;
92 retString
= &ffffString
;
96 // The variable refers to something other than just a set.
97 // return the original source string for the expression
98 retString
= &exprNode
->fText
;
99 This
->fCachedSetLookup
= NULL
;
107 // RBBISymbolTable::lookupMatcher This function from the abstract symbol table
108 // interface maps a single stand-in character to a
109 // pointer to a Unicode Set. The Unicode Set code uses this
110 // mechanism to get all references to the same $variable
111 // name to refer to a single common Unicode Set instance.
113 // This implementation cheats a little, and does not maintain a map of stand-in chars
114 // to sets. Instead, it takes advantage of the fact that the UnicodeSet
115 // constructor will always call this function right after calling lookup(),
116 // and we just need to remember what set to return between these two calls.
117 const UnicodeFunctor
*RBBISymbolTable::lookupMatcher(UChar32 ch
) const
119 UnicodeSet
*retVal
= NULL
;
120 RBBISymbolTable
*This
= (RBBISymbolTable
*)this; // cast off const
122 retVal
= fCachedSetLookup
;
123 This
->fCachedSetLookup
= 0;
129 // RBBISymbolTable::parseReference This function from the abstract symbol table interface
130 // looks for a $variable name in the source text.
131 // It does not look it up, only scans for it.
132 // It is used by the UnicodeSet parser.
134 // This implementation is lifted pretty much verbatim
135 // from the rules based transliterator implementation.
136 // I didn't see an obvious way of sharing it.
138 UnicodeString
RBBISymbolTable::parseReference(const UnicodeString
& text
,
139 ParsePosition
& pos
, int32_t limit
) const
141 int32_t start
= pos
.getIndex();
143 UnicodeString result
;
145 UChar c
= text
.charAt(i
);
146 if ((i
==start
&& !u_isIDStart(c
)) || !u_isIDPart(c
)) {
151 if (i
== start
) { // No valid name chars
152 return result
; // Indicate failure with empty string
155 text
.extractBetween(start
, i
, result
);
162 // RBBISymbolTable::lookupNode Given a key (a variable name), return the
163 // corresponding RBBI Node. If there is no entry
164 // in the table for this name, return NULL.
166 RBBINode
*RBBISymbolTable::lookupNode(const UnicodeString
&key
) const{
168 RBBINode
*retNode
= NULL
;
169 RBBISymbolTableEntry
*el
;
171 el
= (RBBISymbolTableEntry
*)uhash_get(fHashTable
, &key
);
180 // RBBISymbolTable::addEntry Add a new entry to the symbol table.
181 // Indicate an error if the name already exists -
182 // this will only occur in the case of duplicate
183 // variable assignments.
185 void RBBISymbolTable::addEntry (const UnicodeString
&key
, RBBINode
*val
, UErrorCode
&err
) {
186 RBBISymbolTableEntry
*e
;
187 /* test for buffer overflows */
188 if (U_FAILURE(err
)) {
191 e
= (RBBISymbolTableEntry
*)uhash_get(fHashTable
, &key
);
193 err
= U_BRK_VARIABLE_REDFINITION
;
197 e
= new RBBISymbolTableEntry
;
199 err
= U_MEMORY_ALLOCATION_ERROR
;
204 uhash_put( fHashTable
, &e
->key
, e
, &err
);
208 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL
) {}
210 RBBISymbolTableEntry::~RBBISymbolTableEntry() {
211 // The "val" of a symbol table entry is a variable reference node.
212 // The l. child of the val is the rhs expression from the assignment.
213 // Unlike other node types, children of variable reference nodes are not
214 // automatically recursively deleted. We do it manually here.
215 delete val
->fLeftChild
;
216 val
->fLeftChild
= NULL
;
220 // Note: the key UnicodeString is destructed by virtue of being in the object by value.
225 // RBBISymbolTable::print Debugging function, dump out the symbol table contents.
228 void RBBISymbolTable::rbbiSymtablePrint() const {
229 RBBIDebugPrintf("Variable Definitions\n"
230 "Name Node Val String Val\n"
231 "----------------------------------------------------------------------\n");
234 const UHashElement
*e
= NULL
;
236 e
= uhash_nextElement(fHashTable
, &pos
);
240 RBBISymbolTableEntry
*s
= (RBBISymbolTableEntry
*)e
->value
.pointer
;
242 RBBI_DEBUG_printUnicodeString(s
->key
, 15);
243 RBBIDebugPrintf(" %8p ", (void *)s
->val
);
244 RBBI_DEBUG_printUnicodeString(s
->val
->fLeftChild
->fText
);
245 RBBIDebugPrintf("\n");
248 RBBIDebugPrintf("\nParsed Variable Definitions\n");
251 e
= uhash_nextElement(fHashTable
, &pos
);
255 RBBISymbolTableEntry
*s
= (RBBISymbolTableEntry
*)e
->value
.pointer
;
256 RBBI_DEBUG_printUnicodeString(s
->key
);
257 s
->val
->fLeftChild
->printTree(TRUE
);
258 RBBIDebugPrintf("\n");
269 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */