]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/rbbistbl57.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / common / rbbistbl57.cpp
CommitLineData
0f5d89e8
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4***************************************************************************
5* Copyright (C) 2002-2014 International Business Machines Corporation
6* and others. All rights reserved.
7***************************************************************************
8*/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/unistr.h"
15#include "unicode/uniset.h"
16#include "unicode/uchar.h"
17#include "unicode/parsepos.h"
18
19#include "umutex.h"
20
21#include "rbbirb57.h"
22#include "rbbinode.h"
23
24
25//
26// RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
27// when the hash table is deleted.
28//
29U_CDECL_BEGIN
30static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
31 icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
32 delete px;
33}
34U_CDECL_END
35
36
37
38U_NAMESPACE_BEGIN
39
40RBBISymbolTable57::RBBISymbolTable57(RBBIRuleScanner57 *rs, const UnicodeString &rules, UErrorCode &status)
41 :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
42{
43 fHashTable = NULL;
44 fCachedSetLookup = NULL;
45
46 fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
47 // uhash_open checks status
48 if (U_FAILURE(status)) {
49 return;
50 }
51 uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
52}
53
54
55
56RBBISymbolTable57::~RBBISymbolTable57()
57{
58 uhash_close(fHashTable);
59}
60
61
62//
63// RBBISymbolTable57::lookup This function from the abstract symbol table inteface
64// looks up a variable name and returns a UnicodeString
65// containing the substitution text.
66//
67// The variable name does NOT include the leading $.
68//
69const UnicodeString *RBBISymbolTable57::lookup(const UnicodeString& s) const
70{
71 RBBISymbolTableEntry *el;
72 RBBINode *varRefNode;
73 RBBINode *exprNode;
74 RBBINode *usetNode;
75 const UnicodeString *retString;
76 RBBISymbolTable57 *This = (RBBISymbolTable57 *)this; // cast off const
77
78 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
79 if (el == NULL) {
80 return NULL;
81 }
82
83 varRefNode = el->val;
84 exprNode = varRefNode->fLeftChild; // Root node of expression for variable
85 if (exprNode->fType == RBBINode::setRef) {
86 // The $variable refers to a single UnicodeSet
87 // return the ffffString, which will subsequently be interpreted as a
88 // stand-in character for the set by RBBISymbolTable57::lookupMatcher()
89 usetNode = exprNode->fLeftChild;
90 This->fCachedSetLookup = usetNode->fInputSet;
91 retString = &ffffString;
92 }
93 else
94 {
95 // The variable refers to something other than just a set.
96 // return the original source string for the expression
97 retString = &exprNode->fText;
98 This->fCachedSetLookup = NULL;
99 }
100 return retString;
101}
102
103
104
105//
106// RBBISymbolTable57::lookupMatcher This function from the abstract symbol table
107// interface maps a single stand-in character to a
108// pointer to a Unicode Set. The Unicode Set code uses this
109// mechanism to get all references to the same $variable
110// name to refer to a single common Unicode Set instance.
111//
112// This implementation cheats a little, and does not maintain a map of stand-in chars
113// to sets. Instead, it takes advantage of the fact that the UnicodeSet
114// constructor will always call this function right after calling lookup(),
115// and we just need to remember what set to return between these two calls.
116const UnicodeFunctor *RBBISymbolTable57::lookupMatcher(UChar32 ch) const
117{
118 UnicodeSet *retVal = NULL;
119 RBBISymbolTable57 *This = (RBBISymbolTable57 *)this; // cast off const
120 if (ch == 0xffff) {
121 retVal = fCachedSetLookup;
122 This->fCachedSetLookup = 0;
123 }
124 return retVal;
125}
126
127//
128// RBBISymbolTable57::parseReference This function from the abstract symbol table interface
129// looks for a $variable name in the source text.
130// It does not look it up, only scans for it.
131// It is used by the UnicodeSet parser.
132//
133// This implementation is lifted pretty much verbatim
134// from the rules based transliterator implementation.
135// I didn't see an obvious way of sharing it.
136//
137UnicodeString RBBISymbolTable57::parseReference(const UnicodeString& text,
138 ParsePosition& pos, int32_t limit) const
139{
140 int32_t start = pos.getIndex();
141 int32_t i = start;
142 UnicodeString result;
143 while (i < limit) {
144 UChar c = text.charAt(i);
145 if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
146 break;
147 }
148 ++i;
149 }
150 if (i == start) { // No valid name chars
151 return result; // Indicate failure with empty string
152 }
153 pos.setIndex(i);
154 text.extractBetween(start, i, result);
155 return result;
156}
157
158
159
160//
161// RBBISymbolTable57::lookupNode Given a key (a variable name), return the
162// corresponding RBBI Node. If there is no entry
163// in the table for this name, return NULL.
164//
165RBBINode *RBBISymbolTable57::lookupNode(const UnicodeString &key) const{
166
167 RBBINode *retNode = NULL;
168 RBBISymbolTableEntry *el;
169
170 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
171 if (el != NULL) {
172 retNode = el->val;
173 }
174 return retNode;
175}
176
177
178//
179// RBBISymbolTable57::addEntry Add a new entry to the symbol table.
180// Indicate an error if the name already exists -
181// this will only occur in the case of duplicate
182// variable assignments.
183//
184void RBBISymbolTable57::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
185 RBBISymbolTableEntry *e;
186 /* test for buffer overflows */
187 if (U_FAILURE(err)) {
188 return;
189 }
190 e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
191 if (e != NULL) {
192 err = U_BRK_VARIABLE_REDFINITION;
193 return;
194 }
195
196 e = new RBBISymbolTableEntry;
197 if (e == NULL) {
198 err = U_MEMORY_ALLOCATION_ERROR;
199 return;
200 }
201 e->key = key;
202 e->val = val;
203 uhash_put( fHashTable, &e->key, e, &err);
204}
205
206// RBBISymbolTableEntry::RBBISymbolTableEntry() - from standard rbbistbl.cpp
207// RBBISymbolTableEntry::~RBBISymbolTableEntry() - from standard rbbistbl.cpp
208
209
210//
211// RBBISymbolTable57::print Debugging function, dump out the symbol table contents.
212//
213#ifdef RBBI_DEBUG
214void RBBISymbolTable57::rbbiSymtablePrint() const {
215 RBBIDebugPrintf("Variable Definitions\n"
216 "Name Node Val String Val\n"
217 "----------------------------------------------------------------------\n");
218
219 int32_t pos = UHASH_FIRST;
220 const UHashElement *e = NULL;
221 for (;;) {
222 e = uhash_nextElement(fHashTable, &pos);
223 if (e == NULL ) {
224 break;
225 }
226 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
227
228 RBBI_DEBUG_printUnicodeString(s->key, 15);
229 RBBIDebugPrintf(" %8p ", (void *)s->val);
230 RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText);
231 RBBIDebugPrintf("\n");
232 }
233
234 RBBIDebugPrintf("\nParsed Variable Definitions\n");
235 pos = -1;
236 for (;;) {
237 e = uhash_nextElement(fHashTable, &pos);
238 if (e == NULL ) {
239 break;
240 }
241 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
242 RBBI_DEBUG_printUnicodeString(s->key);
243 s->val->fLeftChild->printTree(TRUE);
244 RBBIDebugPrintf("\n");
245 }
246}
247#endif
248
249
250
251
252
253U_NAMESPACE_END
254
255#endif /* #if !UCONFIG_NO_BREAK_ITERATION */