]>
Commit | Line | Data |
---|---|---|
0f5d89e8 A |
1 | // © 2018 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | #include "unicode/utypes.h" | |
5 | ||
6 | #if !UCONFIG_NO_FORMATTING | |
7 | ||
8 | // Allow implicit conversion from char16_t* to UnicodeString for this file: | |
9 | // Helpful in toString methods and elsewhere. | |
10 | #define UNISTR_FROM_STRING_EXPLICIT | |
11 | ||
12 | #include "static_unicode_sets.h" | |
13 | #include "umutex.h" | |
14 | #include "ucln_cmn.h" | |
15 | #include "unicode/uniset.h" | |
16 | #include "uresimp.h" | |
17 | #include "cstring.h" | |
18 | #include "uassert.h" | |
19 | ||
20 | using namespace icu; | |
21 | using namespace icu::unisets; | |
22 | ||
23 | ||
24 | namespace { | |
25 | ||
26 | UnicodeSet* gUnicodeSets[COUNT] = {}; | |
27 | ||
28 | // Save the empty instance in static memory to have well-defined behavior if a | |
29 | // regular UnicodeSet cannot be allocated. | |
30 | char gEmptyUnicodeSet[sizeof(UnicodeSet)]; | |
31 | ||
32 | // Whether the gEmptyUnicodeSet is initialized and ready to use. | |
33 | UBool gEmptyUnicodeSetInitialized = FALSE; | |
34 | ||
35 | inline UnicodeSet* getImpl(Key key) { | |
36 | UnicodeSet* candidate = gUnicodeSets[key]; | |
37 | if (candidate == nullptr) { | |
38 | return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet); | |
39 | } | |
40 | return candidate; | |
41 | } | |
42 | ||
43 | UnicodeSet* computeUnion(Key k1, Key k2) { | |
44 | UnicodeSet* result = new UnicodeSet(); | |
45 | if (result == nullptr) { | |
46 | return nullptr; | |
47 | } | |
48 | result->addAll(*getImpl(k1)); | |
49 | result->addAll(*getImpl(k2)); | |
50 | result->freeze(); | |
51 | return result; | |
52 | } | |
53 | ||
54 | UnicodeSet* computeUnion(Key k1, Key k2, Key k3) { | |
55 | UnicodeSet* result = new UnicodeSet(); | |
56 | if (result == nullptr) { | |
57 | return nullptr; | |
58 | } | |
59 | result->addAll(*getImpl(k1)); | |
60 | result->addAll(*getImpl(k2)); | |
61 | result->addAll(*getImpl(k3)); | |
62 | result->freeze(); | |
63 | return result; | |
64 | } | |
65 | ||
66 | ||
67 | void saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) { | |
68 | // assert unicodeSets.get(key) == null; | |
69 | gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status); | |
70 | } | |
71 | ||
72 | class ParseDataSink : public ResourceSink { | |
73 | public: | |
74 | void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE { | |
75 | ResourceTable contextsTable = value.getTable(status); | |
76 | if (U_FAILURE(status)) { return; } | |
77 | for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) { | |
78 | if (uprv_strcmp(key, "date") == 0) { | |
79 | // ignore | |
80 | } else { | |
81 | ResourceTable strictnessTable = value.getTable(status); | |
82 | if (U_FAILURE(status)) { return; } | |
83 | for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) { | |
84 | bool isLenient = (uprv_strcmp(key, "lenient") == 0); | |
85 | ResourceArray array = value.getArray(status); | |
86 | if (U_FAILURE(status)) { return; } | |
87 | for (int k = 0; k < array.getSize(); k++) { | |
88 | array.getValue(k, value); | |
89 | UnicodeString str = value.getUnicodeString(status); | |
90 | if (U_FAILURE(status)) { return; } | |
91 | // There is both lenient and strict data for comma/period, | |
92 | // but not for any of the other symbols. | |
93 | if (str.indexOf(u'.') != -1) { | |
94 | saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status); | |
95 | } else if (str.indexOf(u',') != -1) { | |
96 | saveSet(isLenient ? COMMA : STRICT_COMMA, str, status); | |
97 | } else if (str.indexOf(u'+') != -1) { | |
98 | saveSet(PLUS_SIGN, str, status); | |
99 | } else if (str.indexOf(u'‒') != -1) { | |
100 | saveSet(MINUS_SIGN, str, status); | |
101 | } else if (str.indexOf(u'$') != -1) { | |
102 | saveSet(DOLLAR_SIGN, str, status); | |
103 | } else if (str.indexOf(u'£') != -1) { | |
104 | saveSet(POUND_SIGN, str, status); | |
105 | } else if (str.indexOf(u'₨') != -1) { | |
106 | saveSet(RUPEE_SIGN, str, status); | |
107 | } | |
108 | if (U_FAILURE(status)) { return; } | |
109 | } | |
110 | } | |
111 | } | |
112 | } | |
113 | } | |
114 | }; | |
115 | ||
116 | ||
117 | icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER; | |
118 | ||
119 | UBool U_CALLCONV cleanupNumberParseUniSets() { | |
120 | if (gEmptyUnicodeSetInitialized) { | |
121 | reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet(); | |
122 | gEmptyUnicodeSetInitialized = FALSE; | |
123 | } | |
124 | for (int32_t i = 0; i < COUNT; i++) { | |
125 | delete gUnicodeSets[i]; | |
126 | gUnicodeSets[i] = nullptr; | |
127 | } | |
128 | gNumberParseUniSetsInitOnce.reset(); | |
129 | return TRUE; | |
130 | } | |
131 | ||
132 | void U_CALLCONV initNumberParseUniSets(UErrorCode& status) { | |
133 | ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets); | |
134 | ||
135 | // Initialize the empty instance for well-defined fallback behavior | |
136 | new(gEmptyUnicodeSet) UnicodeSet(); | |
137 | reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->freeze(); | |
138 | gEmptyUnicodeSetInitialized = TRUE; | |
139 | ||
140 | // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309. | |
141 | // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property). | |
142 | gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet( | |
143 | u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status); | |
144 | gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status); | |
145 | ||
146 | LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status)); | |
147 | if (U_FAILURE(status)) { return; } | |
148 | ParseDataSink sink; | |
149 | ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status); | |
150 | if (U_FAILURE(status)) { return; } | |
151 | ||
152 | // NOTE: It is OK for these assertions to fail if there was a no-data build. | |
153 | U_ASSERT(gUnicodeSets[COMMA] != nullptr); | |
154 | U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr); | |
155 | U_ASSERT(gUnicodeSets[PERIOD] != nullptr); | |
156 | U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr); | |
157 | ||
158 | gUnicodeSets[OTHER_GROUPING_SEPARATORS] = new UnicodeSet( | |
159 | u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status); | |
160 | gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS); | |
161 | gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion( | |
162 | STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS); | |
163 | ||
164 | U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr); | |
165 | U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr); | |
166 | ||
167 | gUnicodeSets[PERCENT_SIGN] = new UnicodeSet(u"[%٪]", status); | |
168 | gUnicodeSets[PERMILLE_SIGN] = new UnicodeSet(u"[‰؉]", status); | |
169 | gUnicodeSets[INFINITY_KEY] = new UnicodeSet(u"[∞]", status); | |
170 | ||
171 | U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr); | |
172 | U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr); | |
173 | U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr); | |
174 | gUnicodeSets[YEN_SIGN] = new UnicodeSet(u"[¥\\uffe5]", status); | |
175 | ||
176 | gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status); | |
177 | ||
178 | gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS); | |
179 | gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS); | |
180 | ||
181 | for (auto* uniset : gUnicodeSets) { | |
182 | if (uniset != nullptr) { | |
183 | uniset->freeze(); | |
184 | } | |
185 | } | |
186 | } | |
187 | ||
188 | } | |
189 | ||
190 | const UnicodeSet* unisets::get(Key key) { | |
191 | UErrorCode localStatus = U_ZERO_ERROR; | |
192 | umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus); | |
193 | if (U_FAILURE(localStatus)) { | |
194 | return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet); | |
195 | } | |
196 | return getImpl(key); | |
197 | } | |
198 | ||
199 | Key unisets::chooseFrom(UnicodeString str, Key key1) { | |
200 | return get(key1)->contains(str) ? key1 : NONE; | |
201 | } | |
202 | ||
203 | Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) { | |
204 | return get(key1)->contains(str) ? key1 : chooseFrom(str, key2); | |
205 | } | |
206 | ||
207 | //Key unisets::chooseCurrency(UnicodeString str) { | |
208 | // if (get(DOLLAR_SIGN)->contains(str)) { | |
209 | // return DOLLAR_SIGN; | |
210 | // } else if (get(POUND_SIGN)->contains(str)) { | |
211 | // return POUND_SIGN; | |
212 | // } else if (get(RUPEE_SIGN)->contains(str)) { | |
213 | // return RUPEE_SIGN; | |
214 | // } else if (get(YEN_SIGN)->contains(str)) { | |
215 | // return YEN_SIGN; | |
216 | // } else { | |
217 | // return NONE; | |
218 | // } | |
219 | //} | |
220 | ||
221 | ||
222 | #endif /* #if !UCONFIG_NO_FORMATTING */ |