]>
Commit | Line | Data |
---|---|---|
0f5d89e8 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | *************************************************************************** | |
5 | * Copyright (C) 2002-2014 International Business Machines Corporation | |
6 | * and others. All rights reserved. | |
7 | *************************************************************************** | |
8 | */ | |
9 | ||
10 | #include "unicode/utypes.h" | |
11 | ||
12 | #if !UCONFIG_NO_BREAK_ITERATION | |
13 | ||
14 | #include "unicode/unistr.h" | |
15 | #include "unicode/uniset.h" | |
16 | #include "unicode/uchar.h" | |
17 | #include "unicode/parsepos.h" | |
18 | ||
19 | #include "umutex.h" | |
20 | ||
21 | #include "rbbirb57.h" | |
22 | #include "rbbinode.h" | |
23 | ||
24 | ||
25 | // | |
26 | // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents | |
27 | // when the hash table is deleted. | |
28 | // | |
29 | U_CDECL_BEGIN | |
30 | static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) { | |
31 | icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p; | |
32 | delete px; | |
33 | } | |
34 | U_CDECL_END | |
35 | ||
36 | ||
37 | ||
38 | U_NAMESPACE_BEGIN | |
39 | ||
40 | RBBISymbolTable57::RBBISymbolTable57(RBBIRuleScanner57 *rs, const UnicodeString &rules, UErrorCode &status) | |
41 | :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff)) | |
42 | { | |
43 | fHashTable = NULL; | |
44 | fCachedSetLookup = NULL; | |
45 | ||
46 | fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status); | |
47 | // uhash_open checks status | |
48 | if (U_FAILURE(status)) { | |
49 | return; | |
50 | } | |
51 | uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter); | |
52 | } | |
53 | ||
54 | ||
55 | ||
56 | RBBISymbolTable57::~RBBISymbolTable57() | |
57 | { | |
58 | uhash_close(fHashTable); | |
59 | } | |
60 | ||
61 | ||
62 | // | |
63 | // RBBISymbolTable57::lookup This function from the abstract symbol table inteface | |
64 | // looks up a variable name and returns a UnicodeString | |
65 | // containing the substitution text. | |
66 | // | |
67 | // The variable name does NOT include the leading $. | |
68 | // | |
69 | const UnicodeString *RBBISymbolTable57::lookup(const UnicodeString& s) const | |
70 | { | |
71 | RBBISymbolTableEntry *el; | |
72 | RBBINode *varRefNode; | |
73 | RBBINode *exprNode; | |
74 | RBBINode *usetNode; | |
75 | const UnicodeString *retString; | |
76 | RBBISymbolTable57 *This = (RBBISymbolTable57 *)this; // cast off const | |
77 | ||
78 | el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s); | |
79 | if (el == NULL) { | |
80 | return NULL; | |
81 | } | |
82 | ||
83 | varRefNode = el->val; | |
84 | exprNode = varRefNode->fLeftChild; // Root node of expression for variable | |
85 | if (exprNode->fType == RBBINode::setRef) { | |
86 | // The $variable refers to a single UnicodeSet | |
87 | // return the ffffString, which will subsequently be interpreted as a | |
88 | // stand-in character for the set by RBBISymbolTable57::lookupMatcher() | |
89 | usetNode = exprNode->fLeftChild; | |
90 | This->fCachedSetLookup = usetNode->fInputSet; | |
91 | retString = &ffffString; | |
92 | } | |
93 | else | |
94 | { | |
95 | // The variable refers to something other than just a set. | |
96 | // return the original source string for the expression | |
97 | retString = &exprNode->fText; | |
98 | This->fCachedSetLookup = NULL; | |
99 | } | |
100 | return retString; | |
101 | } | |
102 | ||
103 | ||
104 | ||
105 | // | |
106 | // RBBISymbolTable57::lookupMatcher This function from the abstract symbol table | |
107 | // interface maps a single stand-in character to a | |
108 | // pointer to a Unicode Set. The Unicode Set code uses this | |
109 | // mechanism to get all references to the same $variable | |
110 | // name to refer to a single common Unicode Set instance. | |
111 | // | |
112 | // This implementation cheats a little, and does not maintain a map of stand-in chars | |
113 | // to sets. Instead, it takes advantage of the fact that the UnicodeSet | |
114 | // constructor will always call this function right after calling lookup(), | |
115 | // and we just need to remember what set to return between these two calls. | |
116 | const UnicodeFunctor *RBBISymbolTable57::lookupMatcher(UChar32 ch) const | |
117 | { | |
118 | UnicodeSet *retVal = NULL; | |
119 | RBBISymbolTable57 *This = (RBBISymbolTable57 *)this; // cast off const | |
120 | if (ch == 0xffff) { | |
121 | retVal = fCachedSetLookup; | |
122 | This->fCachedSetLookup = 0; | |
123 | } | |
124 | return retVal; | |
125 | } | |
126 | ||
127 | // | |
128 | // RBBISymbolTable57::parseReference This function from the abstract symbol table interface | |
129 | // looks for a $variable name in the source text. | |
130 | // It does not look it up, only scans for it. | |
131 | // It is used by the UnicodeSet parser. | |
132 | // | |
133 | // This implementation is lifted pretty much verbatim | |
134 | // from the rules based transliterator implementation. | |
135 | // I didn't see an obvious way of sharing it. | |
136 | // | |
137 | UnicodeString RBBISymbolTable57::parseReference(const UnicodeString& text, | |
138 | ParsePosition& pos, int32_t limit) const | |
139 | { | |
140 | int32_t start = pos.getIndex(); | |
141 | int32_t i = start; | |
142 | UnicodeString result; | |
143 | while (i < limit) { | |
144 | UChar c = text.charAt(i); | |
145 | if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { | |
146 | break; | |
147 | } | |
148 | ++i; | |
149 | } | |
150 | if (i == start) { // No valid name chars | |
151 | return result; // Indicate failure with empty string | |
152 | } | |
153 | pos.setIndex(i); | |
154 | text.extractBetween(start, i, result); | |
155 | return result; | |
156 | } | |
157 | ||
158 | ||
159 | ||
160 | // | |
161 | // RBBISymbolTable57::lookupNode Given a key (a variable name), return the | |
162 | // corresponding RBBI Node. If there is no entry | |
163 | // in the table for this name, return NULL. | |
164 | // | |
165 | RBBINode *RBBISymbolTable57::lookupNode(const UnicodeString &key) const{ | |
166 | ||
167 | RBBINode *retNode = NULL; | |
168 | RBBISymbolTableEntry *el; | |
169 | ||
170 | el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); | |
171 | if (el != NULL) { | |
172 | retNode = el->val; | |
173 | } | |
174 | return retNode; | |
175 | } | |
176 | ||
177 | ||
178 | // | |
179 | // RBBISymbolTable57::addEntry Add a new entry to the symbol table. | |
180 | // Indicate an error if the name already exists - | |
181 | // this will only occur in the case of duplicate | |
182 | // variable assignments. | |
183 | // | |
184 | void RBBISymbolTable57::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) { | |
185 | RBBISymbolTableEntry *e; | |
186 | /* test for buffer overflows */ | |
187 | if (U_FAILURE(err)) { | |
188 | return; | |
189 | } | |
190 | e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); | |
191 | if (e != NULL) { | |
192 | err = U_BRK_VARIABLE_REDFINITION; | |
193 | return; | |
194 | } | |
195 | ||
196 | e = new RBBISymbolTableEntry; | |
197 | if (e == NULL) { | |
198 | err = U_MEMORY_ALLOCATION_ERROR; | |
199 | return; | |
200 | } | |
201 | e->key = key; | |
202 | e->val = val; | |
203 | uhash_put( fHashTable, &e->key, e, &err); | |
204 | } | |
205 | ||
206 | // RBBISymbolTableEntry::RBBISymbolTableEntry() - from standard rbbistbl.cpp | |
207 | // RBBISymbolTableEntry::~RBBISymbolTableEntry() - from standard rbbistbl.cpp | |
208 | ||
209 | ||
210 | // | |
211 | // RBBISymbolTable57::print Debugging function, dump out the symbol table contents. | |
212 | // | |
213 | #ifdef RBBI_DEBUG | |
214 | void RBBISymbolTable57::rbbiSymtablePrint() const { | |
215 | RBBIDebugPrintf("Variable Definitions\n" | |
216 | "Name Node Val String Val\n" | |
217 | "----------------------------------------------------------------------\n"); | |
218 | ||
219 | int32_t pos = UHASH_FIRST; | |
220 | const UHashElement *e = NULL; | |
221 | for (;;) { | |
222 | e = uhash_nextElement(fHashTable, &pos); | |
223 | if (e == NULL ) { | |
224 | break; | |
225 | } | |
226 | RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; | |
227 | ||
228 | RBBI_DEBUG_printUnicodeString(s->key, 15); | |
229 | RBBIDebugPrintf(" %8p ", (void *)s->val); | |
230 | RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText); | |
231 | RBBIDebugPrintf("\n"); | |
232 | } | |
233 | ||
234 | RBBIDebugPrintf("\nParsed Variable Definitions\n"); | |
235 | pos = -1; | |
236 | for (;;) { | |
237 | e = uhash_nextElement(fHashTable, &pos); | |
238 | if (e == NULL ) { | |
239 | break; | |
240 | } | |
241 | RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; | |
242 | RBBI_DEBUG_printUnicodeString(s->key); | |
243 | s->val->fLeftChild->printTree(TRUE); | |
244 | RBBIDebugPrintf("\n"); | |
245 | } | |
246 | } | |
247 | #endif | |
248 | ||
249 | ||
250 | ||
251 | ||
252 | ||
253 | U_NAMESPACE_END | |
254 | ||
255 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |