1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class
7 ***************************************************************************
8 * Copyright (C) 2002-2014 International Business Machines Corporation
9 * and others. All rights reserved.
10 ***************************************************************************
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_BREAK_ITERATION
17 #include "unicode/unistr.h"
18 #include "unicode/uniset.h"
19 #include "unicode/uchar.h"
20 #include "unicode/parsepos.h"
29 // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
30 // when the hash table is deleted.
33 static void U_CALLCONV
RBBISymbolTableEntry_deleter(void *p
) {
34 icu::RBBISymbolTableEntry
*px
= (icu::RBBISymbolTableEntry
*)p
;
43 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner
*rs
, const UnicodeString
&rules
, UErrorCode
&status
)
44 :fRules(rules
), fRuleScanner(rs
), ffffString(UChar(0xffff))
47 fCachedSetLookup
= NULL
;
49 fHashTable
= uhash_open(uhash_hashUnicodeString
, uhash_compareUnicodeString
, NULL
, &status
);
50 // uhash_open checks status
51 if (U_FAILURE(status
)) {
54 uhash_setValueDeleter(fHashTable
, RBBISymbolTableEntry_deleter
);
59 RBBISymbolTable::~RBBISymbolTable()
61 uhash_close(fHashTable
);
66 // RBBISymbolTable::lookup This function from the abstract symbol table inteface
67 // looks up a variable name and returns a UnicodeString
68 // containing the substitution text.
70 // The variable name does NOT include the leading $.
72 const UnicodeString
*RBBISymbolTable::lookup(const UnicodeString
& s
) const
74 RBBISymbolTableEntry
*el
;
78 const UnicodeString
*retString
;
79 RBBISymbolTable
*This
= (RBBISymbolTable
*)this; // cast off const
81 el
= (RBBISymbolTableEntry
*)uhash_get(fHashTable
, &s
);
87 exprNode
= varRefNode
->fLeftChild
; // Root node of expression for variable
88 if (exprNode
->fType
== RBBINode::setRef
) {
89 // The $variable refers to a single UnicodeSet
90 // return the ffffString, which will subsequently be interpreted as a
91 // stand-in character for the set by RBBISymbolTable::lookupMatcher()
92 usetNode
= exprNode
->fLeftChild
;
93 This
->fCachedSetLookup
= usetNode
->fInputSet
;
94 retString
= &ffffString
;
98 // The variable refers to something other than just a set.
99 // return the original source string for the expression
100 retString
= &exprNode
->fText
;
101 This
->fCachedSetLookup
= NULL
;
109 // RBBISymbolTable::lookupMatcher This function from the abstract symbol table
110 // interface maps a single stand-in character to a
111 // pointer to a Unicode Set. The Unicode Set code uses this
112 // mechanism to get all references to the same $variable
113 // name to refer to a single common Unicode Set instance.
115 // This implementation cheats a little, and does not maintain a map of stand-in chars
116 // to sets. Instead, it takes advantage of the fact that the UnicodeSet
117 // constructor will always call this function right after calling lookup(),
118 // and we just need to remember what set to return between these two calls.
119 const UnicodeFunctor
*RBBISymbolTable::lookupMatcher(UChar32 ch
) const
121 UnicodeSet
*retVal
= NULL
;
122 RBBISymbolTable
*This
= (RBBISymbolTable
*)this; // cast off const
124 retVal
= fCachedSetLookup
;
125 This
->fCachedSetLookup
= 0;
131 // RBBISymbolTable::parseReference This function from the abstract symbol table interface
132 // looks for a $variable name in the source text.
133 // It does not look it up, only scans for it.
134 // It is used by the UnicodeSet parser.
136 // This implementation is lifted pretty much verbatim
137 // from the rules based transliterator implementation.
138 // I didn't see an obvious way of sharing it.
140 UnicodeString
RBBISymbolTable::parseReference(const UnicodeString
& text
,
141 ParsePosition
& pos
, int32_t limit
) const
143 int32_t start
= pos
.getIndex();
145 UnicodeString result
;
147 UChar c
= text
.charAt(i
);
148 if ((i
==start
&& !u_isIDStart(c
)) || !u_isIDPart(c
)) {
153 if (i
== start
) { // No valid name chars
154 return result
; // Indicate failure with empty string
157 text
.extractBetween(start
, i
, result
);
164 // RBBISymbolTable::lookupNode Given a key (a variable name), return the
165 // corresponding RBBI Node. If there is no entry
166 // in the table for this name, return NULL.
168 RBBINode
*RBBISymbolTable::lookupNode(const UnicodeString
&key
) const{
170 RBBINode
*retNode
= NULL
;
171 RBBISymbolTableEntry
*el
;
173 el
= (RBBISymbolTableEntry
*)uhash_get(fHashTable
, &key
);
182 // RBBISymbolTable::addEntry Add a new entry to the symbol table.
183 // Indicate an error if the name already exists -
184 // this will only occur in the case of duplicate
185 // variable assignments.
187 void RBBISymbolTable::addEntry (const UnicodeString
&key
, RBBINode
*val
, UErrorCode
&err
) {
188 RBBISymbolTableEntry
*e
;
189 /* test for buffer overflows */
190 if (U_FAILURE(err
)) {
193 e
= (RBBISymbolTableEntry
*)uhash_get(fHashTable
, &key
);
195 err
= U_BRK_VARIABLE_REDFINITION
;
199 e
= new RBBISymbolTableEntry
;
201 err
= U_MEMORY_ALLOCATION_ERROR
;
206 uhash_put( fHashTable
, &e
->key
, e
, &err
);
210 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL
) {}
212 RBBISymbolTableEntry::~RBBISymbolTableEntry() {
213 // The "val" of a symbol table entry is a variable reference node.
214 // The l. child of the val is the rhs expression from the assignment.
215 // Unlike other node types, children of variable reference nodes are not
216 // automatically recursively deleted. We do it manually here.
217 delete val
->fLeftChild
;
218 val
->fLeftChild
= NULL
;
222 // Note: the key UnicodeString is destructed by virtue of being in the object by value.
227 // RBBISymbolTable::print Debugging function, dump out the symbol table contents.
230 void RBBISymbolTable::rbbiSymtablePrint() const {
231 RBBIDebugPrintf("Variable Definitions Symbol Table\n"
232 "Name Node serial String Val\n"
233 "-------------------------------------------------------------------\n");
235 int32_t pos
= UHASH_FIRST
;
236 const UHashElement
*e
= NULL
;
238 e
= uhash_nextElement(fHashTable
, &pos
);
242 RBBISymbolTableEntry
*s
= (RBBISymbolTableEntry
*)e
->value
.pointer
;
244 RBBIDebugPrintf("%-19s %8p %7d ", CStr(s
->key
)(), (void *)s
->val
, s
->val
->fSerialNum
);
245 RBBIDebugPrintf(" %s\n", CStr(s
->val
->fLeftChild
->fText
)());
248 RBBIDebugPrintf("\nParsed Variable Definitions\n");
251 e
= uhash_nextElement(fHashTable
, &pos
);
255 RBBISymbolTableEntry
*s
= (RBBISymbolTableEntry
*)e
->value
.pointer
;
256 RBBIDebugPrintf("%s\n", CStr(s
->key
)());
257 RBBINode::printTree(s
->val
, TRUE
);
258 RBBINode::printTree(s
->val
->fLeftChild
, FALSE
);
259 RBBIDebugPrintf("\n");
270 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */