]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/rbbistbl57.cpp
ICU-64232.0.1.tar.gz
[apple/icu.git] / icuSources / common / rbbistbl57.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 * Copyright (C) 2002-2014 International Business Machines Corporation
6 * and others. All rights reserved.
7 ***************************************************************************
8 */
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_BREAK_ITERATION
13
14 #include "unicode/unistr.h"
15 #include "unicode/uniset.h"
16 #include "unicode/uchar.h"
17 #include "unicode/parsepos.h"
18
19 #include "umutex.h"
20
21 #include "rbbirb57.h"
22 #include "rbbinode.h"
23
24
25 //
26 // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
27 // when the hash table is deleted.
28 //
29 U_CDECL_BEGIN
30 static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
31 icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
32 delete px;
33 }
34 U_CDECL_END
35
36
37
38 U_NAMESPACE_BEGIN
39
40 RBBISymbolTable57::RBBISymbolTable57(RBBIRuleScanner57 *rs, const UnicodeString &rules, UErrorCode &status)
41 :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
42 {
43 fHashTable = NULL;
44 fCachedSetLookup = NULL;
45
46 fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
47 // uhash_open checks status
48 if (U_FAILURE(status)) {
49 return;
50 }
51 uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
52 }
53
54
55
56 RBBISymbolTable57::~RBBISymbolTable57()
57 {
58 uhash_close(fHashTable);
59 }
60
61
62 //
63 // RBBISymbolTable57::lookup This function from the abstract symbol table inteface
64 // looks up a variable name and returns a UnicodeString
65 // containing the substitution text.
66 //
67 // The variable name does NOT include the leading $.
68 //
69 const UnicodeString *RBBISymbolTable57::lookup(const UnicodeString& s) const
70 {
71 RBBISymbolTableEntry *el;
72 RBBINode *varRefNode;
73 RBBINode *exprNode;
74 RBBINode *usetNode;
75 const UnicodeString *retString;
76 RBBISymbolTable57 *This = (RBBISymbolTable57 *)this; // cast off const
77
78 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
79 if (el == NULL) {
80 return NULL;
81 }
82
83 varRefNode = el->val;
84 exprNode = varRefNode->fLeftChild; // Root node of expression for variable
85 if (exprNode->fType == RBBINode::setRef) {
86 // The $variable refers to a single UnicodeSet
87 // return the ffffString, which will subsequently be interpreted as a
88 // stand-in character for the set by RBBISymbolTable57::lookupMatcher()
89 usetNode = exprNode->fLeftChild;
90 This->fCachedSetLookup = usetNode->fInputSet;
91 retString = &ffffString;
92 }
93 else
94 {
95 // The variable refers to something other than just a set.
96 // return the original source string for the expression
97 retString = &exprNode->fText;
98 This->fCachedSetLookup = NULL;
99 }
100 return retString;
101 }
102
103
104
105 //
106 // RBBISymbolTable57::lookupMatcher This function from the abstract symbol table
107 // interface maps a single stand-in character to a
108 // pointer to a Unicode Set. The Unicode Set code uses this
109 // mechanism to get all references to the same $variable
110 // name to refer to a single common Unicode Set instance.
111 //
112 // This implementation cheats a little, and does not maintain a map of stand-in chars
113 // to sets. Instead, it takes advantage of the fact that the UnicodeSet
114 // constructor will always call this function right after calling lookup(),
115 // and we just need to remember what set to return between these two calls.
116 const UnicodeFunctor *RBBISymbolTable57::lookupMatcher(UChar32 ch) const
117 {
118 UnicodeSet *retVal = NULL;
119 RBBISymbolTable57 *This = (RBBISymbolTable57 *)this; // cast off const
120 if (ch == 0xffff) {
121 retVal = fCachedSetLookup;
122 This->fCachedSetLookup = 0;
123 }
124 return retVal;
125 }
126
127 //
128 // RBBISymbolTable57::parseReference This function from the abstract symbol table interface
129 // looks for a $variable name in the source text.
130 // It does not look it up, only scans for it.
131 // It is used by the UnicodeSet parser.
132 //
133 // This implementation is lifted pretty much verbatim
134 // from the rules based transliterator implementation.
135 // I didn't see an obvious way of sharing it.
136 //
137 UnicodeString RBBISymbolTable57::parseReference(const UnicodeString& text,
138 ParsePosition& pos, int32_t limit) const
139 {
140 int32_t start = pos.getIndex();
141 int32_t i = start;
142 UnicodeString result;
143 while (i < limit) {
144 UChar c = text.charAt(i);
145 if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
146 break;
147 }
148 ++i;
149 }
150 if (i == start) { // No valid name chars
151 return result; // Indicate failure with empty string
152 }
153 pos.setIndex(i);
154 text.extractBetween(start, i, result);
155 return result;
156 }
157
158
159
160 //
161 // RBBISymbolTable57::lookupNode Given a key (a variable name), return the
162 // corresponding RBBI Node. If there is no entry
163 // in the table for this name, return NULL.
164 //
165 RBBINode *RBBISymbolTable57::lookupNode(const UnicodeString &key) const{
166
167 RBBINode *retNode = NULL;
168 RBBISymbolTableEntry *el;
169
170 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
171 if (el != NULL) {
172 retNode = el->val;
173 }
174 return retNode;
175 }
176
177
178 //
179 // RBBISymbolTable57::addEntry Add a new entry to the symbol table.
180 // Indicate an error if the name already exists -
181 // this will only occur in the case of duplicate
182 // variable assignments.
183 //
184 void RBBISymbolTable57::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
185 RBBISymbolTableEntry *e;
186 /* test for buffer overflows */
187 if (U_FAILURE(err)) {
188 return;
189 }
190 e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
191 if (e != NULL) {
192 err = U_BRK_VARIABLE_REDFINITION;
193 return;
194 }
195
196 e = new RBBISymbolTableEntry;
197 if (e == NULL) {
198 err = U_MEMORY_ALLOCATION_ERROR;
199 return;
200 }
201 e->key = key;
202 e->val = val;
203 uhash_put( fHashTable, &e->key, e, &err);
204 }
205
206 // RBBISymbolTableEntry::RBBISymbolTableEntry() - from standard rbbistbl.cpp
207 // RBBISymbolTableEntry::~RBBISymbolTableEntry() - from standard rbbistbl.cpp
208
209
210 //
211 // RBBISymbolTable57::print Debugging function, dump out the symbol table contents.
212 //
213 #ifdef RBBI_DEBUG
214 void RBBISymbolTable57::rbbiSymtablePrint() const {
215 RBBIDebugPrintf("Variable Definitions\n"
216 "Name Node Val String Val\n"
217 "----------------------------------------------------------------------\n");
218
219 int32_t pos = UHASH_FIRST;
220 const UHashElement *e = NULL;
221 for (;;) {
222 e = uhash_nextElement(fHashTable, &pos);
223 if (e == NULL ) {
224 break;
225 }
226 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
227
228 RBBI_DEBUG_printUnicodeString(s->key, 15);
229 RBBIDebugPrintf(" %8p ", (void *)s->val);
230 RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText);
231 RBBIDebugPrintf("\n");
232 }
233
234 RBBIDebugPrintf("\nParsed Variable Definitions\n");
235 pos = -1;
236 for (;;) {
237 e = uhash_nextElement(fHashTable, &pos);
238 if (e == NULL ) {
239 break;
240 }
241 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
242 RBBI_DEBUG_printUnicodeString(s->key);
243 s->val->fLeftChild->printTree(TRUE);
244 RBBIDebugPrintf("\n");
245 }
246 }
247 #endif
248
249
250
251
252
253 U_NAMESPACE_END
254
255 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */