1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ***************************************************************************
5 * Copyright (C) 2002-2014 International Business Machines Corporation
6 * and others. All rights reserved.
7 ***************************************************************************
10 #include "unicode/utypes.h"
12 #if !UCONFIG_NO_BREAK_ITERATION
14 #include "unicode/unistr.h"
15 #include "unicode/uniset.h"
16 #include "unicode/uchar.h"
17 #include "unicode/parsepos.h"
26 // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
27 // when the hash table is deleted.
30 static void U_CALLCONV
RBBISymbolTableEntry_deleter(void *p
) {
31 icu::RBBISymbolTableEntry
*px
= (icu::RBBISymbolTableEntry
*)p
;
40 RBBISymbolTable57::RBBISymbolTable57(RBBIRuleScanner57
*rs
, const UnicodeString
&rules
, UErrorCode
&status
)
41 :fRules(rules
), fRuleScanner(rs
), ffffString(UChar(0xffff))
44 fCachedSetLookup
= NULL
;
46 fHashTable
= uhash_open(uhash_hashUnicodeString
, uhash_compareUnicodeString
, NULL
, &status
);
47 // uhash_open checks status
48 if (U_FAILURE(status
)) {
51 uhash_setValueDeleter(fHashTable
, RBBISymbolTableEntry_deleter
);
56 RBBISymbolTable57::~RBBISymbolTable57()
58 uhash_close(fHashTable
);
63 // RBBISymbolTable57::lookup This function from the abstract symbol table inteface
64 // looks up a variable name and returns a UnicodeString
65 // containing the substitution text.
67 // The variable name does NOT include the leading $.
69 const UnicodeString
*RBBISymbolTable57::lookup(const UnicodeString
& s
) const
71 RBBISymbolTableEntry
*el
;
75 const UnicodeString
*retString
;
76 RBBISymbolTable57
*This
= (RBBISymbolTable57
*)this; // cast off const
78 el
= (RBBISymbolTableEntry
*)uhash_get(fHashTable
, &s
);
84 exprNode
= varRefNode
->fLeftChild
; // Root node of expression for variable
85 if (exprNode
->fType
== RBBINode::setRef
) {
86 // The $variable refers to a single UnicodeSet
87 // return the ffffString, which will subsequently be interpreted as a
88 // stand-in character for the set by RBBISymbolTable57::lookupMatcher()
89 usetNode
= exprNode
->fLeftChild
;
90 This
->fCachedSetLookup
= usetNode
->fInputSet
;
91 retString
= &ffffString
;
95 // The variable refers to something other than just a set.
96 // return the original source string for the expression
97 retString
= &exprNode
->fText
;
98 This
->fCachedSetLookup
= NULL
;
106 // RBBISymbolTable57::lookupMatcher This function from the abstract symbol table
107 // interface maps a single stand-in character to a
108 // pointer to a Unicode Set. The Unicode Set code uses this
109 // mechanism to get all references to the same $variable
110 // name to refer to a single common Unicode Set instance.
112 // This implementation cheats a little, and does not maintain a map of stand-in chars
113 // to sets. Instead, it takes advantage of the fact that the UnicodeSet
114 // constructor will always call this function right after calling lookup(),
115 // and we just need to remember what set to return between these two calls.
116 const UnicodeFunctor
*RBBISymbolTable57::lookupMatcher(UChar32 ch
) const
118 UnicodeSet
*retVal
= NULL
;
119 RBBISymbolTable57
*This
= (RBBISymbolTable57
*)this; // cast off const
121 retVal
= fCachedSetLookup
;
122 This
->fCachedSetLookup
= 0;
128 // RBBISymbolTable57::parseReference This function from the abstract symbol table interface
129 // looks for a $variable name in the source text.
130 // It does not look it up, only scans for it.
131 // It is used by the UnicodeSet parser.
133 // This implementation is lifted pretty much verbatim
134 // from the rules based transliterator implementation.
135 // I didn't see an obvious way of sharing it.
137 UnicodeString
RBBISymbolTable57::parseReference(const UnicodeString
& text
,
138 ParsePosition
& pos
, int32_t limit
) const
140 int32_t start
= pos
.getIndex();
142 UnicodeString result
;
144 UChar c
= text
.charAt(i
);
145 if ((i
==start
&& !u_isIDStart(c
)) || !u_isIDPart(c
)) {
150 if (i
== start
) { // No valid name chars
151 return result
; // Indicate failure with empty string
154 text
.extractBetween(start
, i
, result
);
161 // RBBISymbolTable57::lookupNode Given a key (a variable name), return the
162 // corresponding RBBI Node. If there is no entry
163 // in the table for this name, return NULL.
165 RBBINode
*RBBISymbolTable57::lookupNode(const UnicodeString
&key
) const{
167 RBBINode
*retNode
= NULL
;
168 RBBISymbolTableEntry
*el
;
170 el
= (RBBISymbolTableEntry
*)uhash_get(fHashTable
, &key
);
179 // RBBISymbolTable57::addEntry Add a new entry to the symbol table.
180 // Indicate an error if the name already exists -
181 // this will only occur in the case of duplicate
182 // variable assignments.
184 void RBBISymbolTable57::addEntry (const UnicodeString
&key
, RBBINode
*val
, UErrorCode
&err
) {
185 RBBISymbolTableEntry
*e
;
186 /* test for buffer overflows */
187 if (U_FAILURE(err
)) {
190 e
= (RBBISymbolTableEntry
*)uhash_get(fHashTable
, &key
);
192 err
= U_BRK_VARIABLE_REDFINITION
;
196 e
= new RBBISymbolTableEntry
;
198 err
= U_MEMORY_ALLOCATION_ERROR
;
203 uhash_put( fHashTable
, &e
->key
, e
, &err
);
206 // RBBISymbolTableEntry::RBBISymbolTableEntry() - from standard rbbistbl.cpp
207 // RBBISymbolTableEntry::~RBBISymbolTableEntry() - from standard rbbistbl.cpp
211 // RBBISymbolTable57::print Debugging function, dump out the symbol table contents.
214 void RBBISymbolTable57::rbbiSymtablePrint() const {
215 RBBIDebugPrintf("Variable Definitions\n"
216 "Name Node Val String Val\n"
217 "----------------------------------------------------------------------\n");
219 int32_t pos
= UHASH_FIRST
;
220 const UHashElement
*e
= NULL
;
222 e
= uhash_nextElement(fHashTable
, &pos
);
226 RBBISymbolTableEntry
*s
= (RBBISymbolTableEntry
*)e
->value
.pointer
;
228 RBBI_DEBUG_printUnicodeString(s
->key
, 15);
229 RBBIDebugPrintf(" %8p ", (void *)s
->val
);
230 RBBI_DEBUG_printUnicodeString(s
->val
->fLeftChild
->fText
);
231 RBBIDebugPrintf("\n");
234 RBBIDebugPrintf("\nParsed Variable Definitions\n");
237 e
= uhash_nextElement(fHashTable
, &pos
);
241 RBBISymbolTableEntry
*s
= (RBBISymbolTableEntry
*)e
->value
.pointer
;
242 RBBI_DEBUG_printUnicodeString(s
->key
);
243 s
->val
->fLeftChild
->printTree(TRUE
);
244 RBBIDebugPrintf("\n");
255 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */