]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | // |
4 | // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class | |
5 | // | |
6 | /* | |
7 | *************************************************************************** | |
b331163b | 8 | * Copyright (C) 2002-2014 International Business Machines Corporation |
4388f060 | 9 | * and others. All rights reserved. |
b75a7d8f A |
10 | *************************************************************************** |
11 | */ | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_BREAK_ITERATION | |
16 | ||
17 | #include "unicode/unistr.h" | |
18 | #include "unicode/uniset.h" | |
19 | #include "unicode/uchar.h" | |
20 | #include "unicode/parsepos.h" | |
21 | ||
f3c0d7a5 | 22 | #include "cstr.h" |
b75a7d8f | 23 | #include "rbbinode.h" |
f3c0d7a5 A |
24 | #include "rbbirb.h" |
25 | #include "umutex.h" | |
b75a7d8f A |
26 | |
27 | ||
28 | // | |
29 | // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents | |
30 | // when the hash table is deleted. | |
31 | // | |
32 | U_CDECL_BEGIN | |
73c04bcf | 33 | static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) { |
4388f060 | 34 | icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p; |
b75a7d8f A |
35 | delete px; |
36 | } | |
37 | U_CDECL_END | |
38 | ||
39 | ||
40 | ||
41 | U_NAMESPACE_BEGIN | |
42 | ||
43 | RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status) | |
44 | :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff)) | |
45 | { | |
46 | fHashTable = NULL; | |
47 | fCachedSetLookup = NULL; | |
374ca955 | 48 | |
73c04bcf | 49 | fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status); |
374ca955 | 50 | // uhash_open checks status |
b75a7d8f A |
51 | if (U_FAILURE(status)) { |
52 | return; | |
53 | } | |
b75a7d8f A |
54 | uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter); |
55 | } | |
56 | ||
57 | ||
58 | ||
59 | RBBISymbolTable::~RBBISymbolTable() | |
60 | { | |
61 | uhash_close(fHashTable); | |
62 | } | |
63 | ||
64 | ||
65 | // | |
66 | // RBBISymbolTable::lookup This function from the abstract symbol table inteface | |
67 | // looks up a variable name and returns a UnicodeString | |
68 | // containing the substitution text. | |
69 | // | |
70 | // The variable name does NOT include the leading $. | |
71 | // | |
72 | const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const | |
73 | { | |
74 | RBBISymbolTableEntry *el; | |
75 | RBBINode *varRefNode; | |
76 | RBBINode *exprNode; | |
77 | RBBINode *usetNode; | |
78 | const UnicodeString *retString; | |
79 | RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const | |
80 | ||
81 | el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s); | |
82 | if (el == NULL) { | |
83 | return NULL; | |
84 | } | |
85 | ||
86 | varRefNode = el->val; | |
87 | exprNode = varRefNode->fLeftChild; // Root node of expression for variable | |
88 | if (exprNode->fType == RBBINode::setRef) { | |
89 | // The $variable refers to a single UnicodeSet | |
90 | // return the ffffString, which will subsequently be interpreted as a | |
91 | // stand-in character for the set by RBBISymbolTable::lookupMatcher() | |
92 | usetNode = exprNode->fLeftChild; | |
93 | This->fCachedSetLookup = usetNode->fInputSet; | |
94 | retString = &ffffString; | |
95 | } | |
96 | else | |
97 | { | |
98 | // The variable refers to something other than just a set. | |
99 | // return the original source string for the expression | |
100 | retString = &exprNode->fText; | |
101 | This->fCachedSetLookup = NULL; | |
102 | } | |
103 | return retString; | |
104 | } | |
105 | ||
106 | ||
107 | ||
108 | // | |
109 | // RBBISymbolTable::lookupMatcher This function from the abstract symbol table | |
110 | // interface maps a single stand-in character to a | |
111 | // pointer to a Unicode Set. The Unicode Set code uses this | |
112 | // mechanism to get all references to the same $variable | |
113 | // name to refer to a single common Unicode Set instance. | |
114 | // | |
115 | // This implementation cheats a little, and does not maintain a map of stand-in chars | |
116 | // to sets. Instead, it takes advantage of the fact that the UnicodeSet | |
117 | // constructor will always call this function right after calling lookup(), | |
118 | // and we just need to remember what set to return between these two calls. | |
119 | const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const | |
120 | { | |
121 | UnicodeSet *retVal = NULL; | |
122 | RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const | |
123 | if (ch == 0xffff) { | |
124 | retVal = fCachedSetLookup; | |
125 | This->fCachedSetLookup = 0; | |
126 | } | |
127 | return retVal; | |
128 | } | |
129 | ||
130 | // | |
131 | // RBBISymbolTable::parseReference This function from the abstract symbol table interface | |
132 | // looks for a $variable name in the source text. | |
133 | // It does not look it up, only scans for it. | |
134 | // It is used by the UnicodeSet parser. | |
135 | // | |
136 | // This implementation is lifted pretty much verbatim | |
137 | // from the rules based transliterator implementation. | |
138 | // I didn't see an obvious way of sharing it. | |
139 | // | |
140 | UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text, | |
141 | ParsePosition& pos, int32_t limit) const | |
142 | { | |
143 | int32_t start = pos.getIndex(); | |
144 | int32_t i = start; | |
145 | UnicodeString result; | |
146 | while (i < limit) { | |
147 | UChar c = text.charAt(i); | |
148 | if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { | |
149 | break; | |
150 | } | |
151 | ++i; | |
152 | } | |
153 | if (i == start) { // No valid name chars | |
154 | return result; // Indicate failure with empty string | |
155 | } | |
156 | pos.setIndex(i); | |
157 | text.extractBetween(start, i, result); | |
158 | return result; | |
159 | } | |
160 | ||
161 | ||
162 | ||
163 | // | |
164 | // RBBISymbolTable::lookupNode Given a key (a variable name), return the | |
165 | // corresponding RBBI Node. If there is no entry | |
166 | // in the table for this name, return NULL. | |
167 | // | |
168 | RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{ | |
169 | ||
170 | RBBINode *retNode = NULL; | |
171 | RBBISymbolTableEntry *el; | |
172 | ||
173 | el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); | |
174 | if (el != NULL) { | |
175 | retNode = el->val; | |
176 | } | |
177 | return retNode; | |
178 | } | |
179 | ||
180 | ||
181 | // | |
182 | // RBBISymbolTable::addEntry Add a new entry to the symbol table. | |
183 | // Indicate an error if the name already exists - | |
184 | // this will only occur in the case of duplicate | |
185 | // variable assignments. | |
186 | // | |
187 | void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) { | |
188 | RBBISymbolTableEntry *e; | |
189 | /* test for buffer overflows */ | |
190 | if (U_FAILURE(err)) { | |
191 | return; | |
192 | } | |
193 | e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); | |
194 | if (e != NULL) { | |
195 | err = U_BRK_VARIABLE_REDFINITION; | |
196 | return; | |
197 | } | |
198 | ||
199 | e = new RBBISymbolTableEntry; | |
200 | if (e == NULL) { | |
201 | err = U_MEMORY_ALLOCATION_ERROR; | |
202 | return; | |
203 | } | |
204 | e->key = key; | |
205 | e->val = val; | |
206 | uhash_put( fHashTable, &e->key, e, &err); | |
207 | } | |
208 | ||
209 | ||
210 | RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {} | |
211 | ||
212 | RBBISymbolTableEntry::~RBBISymbolTableEntry() { | |
213 | // The "val" of a symbol table entry is a variable reference node. | |
214 | // The l. child of the val is the rhs expression from the assignment. | |
215 | // Unlike other node types, children of variable reference nodes are not | |
216 | // automatically recursively deleted. We do it manually here. | |
217 | delete val->fLeftChild; | |
218 | val->fLeftChild = NULL; | |
219 | ||
220 | delete val; | |
221 | ||
222 | // Note: the key UnicodeString is destructed by virtue of being in the object by value. | |
223 | } | |
224 | ||
225 | ||
226 | // | |
227 | // RBBISymbolTable::print Debugging function, dump out the symbol table contents. | |
228 | // | |
374ca955 A |
229 | #ifdef RBBI_DEBUG |
230 | void RBBISymbolTable::rbbiSymtablePrint() const { | |
f3c0d7a5 A |
231 | RBBIDebugPrintf("Variable Definitions Symbol Table\n" |
232 | "Name Node serial String Val\n" | |
233 | "-------------------------------------------------------------------\n"); | |
b75a7d8f | 234 | |
b331163b | 235 | int32_t pos = UHASH_FIRST; |
b75a7d8f A |
236 | const UHashElement *e = NULL; |
237 | for (;;) { | |
238 | e = uhash_nextElement(fHashTable, &pos); | |
239 | if (e == NULL ) { | |
240 | break; | |
241 | } | |
242 | RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; | |
243 | ||
f3c0d7a5 A |
244 | RBBIDebugPrintf("%-19s %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum); |
245 | RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)()); | |
b75a7d8f A |
246 | } |
247 | ||
248 | RBBIDebugPrintf("\nParsed Variable Definitions\n"); | |
249 | pos = -1; | |
250 | for (;;) { | |
251 | e = uhash_nextElement(fHashTable, &pos); | |
252 | if (e == NULL ) { | |
253 | break; | |
254 | } | |
255 | RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; | |
f3c0d7a5 A |
256 | RBBIDebugPrintf("%s\n", CStr(s->key)()); |
257 | RBBINode::printTree(s->val, TRUE); | |
258 | RBBINode::printTree(s->val->fLeftChild, FALSE); | |
b75a7d8f A |
259 | RBBIDebugPrintf("\n"); |
260 | } | |
261 | } | |
374ca955 | 262 | #endif |
b75a7d8f A |
263 | |
264 | ||
265 | ||
266 | ||
267 | ||
268 | U_NAMESPACE_END | |
269 | ||
270 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |