1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
12 #include "static_unicode_sets.h"
15 #include "unicode/uniset.h"
21 using namespace icu::unisets
;
26 UnicodeSet
* gUnicodeSets
[COUNT
] = {};
28 // Save the empty instance in static memory to have well-defined behavior if a
29 // regular UnicodeSet cannot be allocated.
30 char gEmptyUnicodeSet
[sizeof(UnicodeSet
)];
32 // Whether the gEmptyUnicodeSet is initialized and ready to use.
33 UBool gEmptyUnicodeSetInitialized
= FALSE
;
35 inline UnicodeSet
* getImpl(Key key
) {
36 UnicodeSet
* candidate
= gUnicodeSets
[key
];
37 if (candidate
== nullptr) {
38 return reinterpret_cast<UnicodeSet
*>(gEmptyUnicodeSet
);
43 UnicodeSet
* computeUnion(Key k1
, Key k2
) {
44 UnicodeSet
* result
= new UnicodeSet();
45 if (result
== nullptr) {
48 result
->addAll(*getImpl(k1
));
49 result
->addAll(*getImpl(k2
));
54 UnicodeSet
* computeUnion(Key k1
, Key k2
, Key k3
) {
55 UnicodeSet
* result
= new UnicodeSet();
56 if (result
== nullptr) {
59 result
->addAll(*getImpl(k1
));
60 result
->addAll(*getImpl(k2
));
61 result
->addAll(*getImpl(k3
));
67 void saveSet(Key key
, const UnicodeString
& unicodeSetPattern
, UErrorCode
& status
) {
68 // assert unicodeSets.get(key) == null;
69 gUnicodeSets
[key
] = new UnicodeSet(unicodeSetPattern
, status
);
72 class ParseDataSink
: public ResourceSink
{
74 void put(const char* key
, ResourceValue
& value
, UBool
/*noFallback*/, UErrorCode
& status
) U_OVERRIDE
{
75 ResourceTable contextsTable
= value
.getTable(status
);
76 if (U_FAILURE(status
)) { return; }
77 for (int i
= 0; contextsTable
.getKeyAndValue(i
, key
, value
); i
++) {
78 if (uprv_strcmp(key
, "date") == 0) {
81 ResourceTable strictnessTable
= value
.getTable(status
);
82 if (U_FAILURE(status
)) { return; }
83 for (int j
= 0; strictnessTable
.getKeyAndValue(j
, key
, value
); j
++) {
84 bool isLenient
= (uprv_strcmp(key
, "lenient") == 0);
85 ResourceArray array
= value
.getArray(status
);
86 if (U_FAILURE(status
)) { return; }
87 for (int k
= 0; k
< array
.getSize(); k
++) {
88 array
.getValue(k
, value
);
89 UnicodeString str
= value
.getUnicodeString(status
);
90 if (U_FAILURE(status
)) { return; }
91 // There is both lenient and strict data for comma/period,
92 // but not for any of the other symbols.
93 if (str
.indexOf(u
'.') != -1) {
94 saveSet(isLenient
? PERIOD
: STRICT_PERIOD
, str
, status
);
95 } else if (str
.indexOf(u
',') != -1) {
96 saveSet(isLenient
? COMMA
: STRICT_COMMA
, str
, status
);
97 } else if (str
.indexOf(u
'+') != -1) {
98 saveSet(PLUS_SIGN
, str
, status
);
99 } else if (str
.indexOf(u
'‒') != -1) {
100 saveSet(MINUS_SIGN
, str
, status
);
101 } else if (str
.indexOf(u
'$') != -1) {
102 saveSet(DOLLAR_SIGN
, str
, status
);
103 } else if (str
.indexOf(u
'£') != -1) {
104 saveSet(POUND_SIGN
, str
, status
);
105 } else if (str
.indexOf(u
'₨') != -1) {
106 saveSet(RUPEE_SIGN
, str
, status
);
108 if (U_FAILURE(status
)) { return; }
117 icu::UInitOnce gNumberParseUniSetsInitOnce
= U_INITONCE_INITIALIZER
;
119 UBool U_CALLCONV
cleanupNumberParseUniSets() {
120 if (gEmptyUnicodeSetInitialized
) {
121 reinterpret_cast<UnicodeSet
*>(gEmptyUnicodeSet
)->~UnicodeSet();
122 gEmptyUnicodeSetInitialized
= FALSE
;
124 for (int32_t i
= 0; i
< COUNT
; i
++) {
125 delete gUnicodeSets
[i
];
126 gUnicodeSets
[i
] = nullptr;
128 gNumberParseUniSetsInitOnce
.reset();
132 void U_CALLCONV
initNumberParseUniSets(UErrorCode
& status
) {
133 ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS
, cleanupNumberParseUniSets
);
135 // Initialize the empty instance for well-defined fallback behavior
136 new(gEmptyUnicodeSet
) UnicodeSet();
137 reinterpret_cast<UnicodeSet
*>(gEmptyUnicodeSet
)->freeze();
138 gEmptyUnicodeSetInitialized
= TRUE
;
140 // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
141 // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
142 gUnicodeSets
[DEFAULT_IGNORABLES
] = new UnicodeSet(
143 u
"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status
);
144 gUnicodeSets
[STRICT_IGNORABLES
] = new UnicodeSet(u
"[[:Bidi_Control:]]", status
);
146 LocalUResourceBundlePointer
rb(ures_open(nullptr, "root", &status
));
147 if (U_FAILURE(status
)) { return; }
149 ures_getAllItemsWithFallback(rb
.getAlias(), "parse", sink
, status
);
150 if (U_FAILURE(status
)) { return; }
152 // NOTE: It is OK for these assertions to fail if there was a no-data build.
153 U_ASSERT(gUnicodeSets
[COMMA
] != nullptr);
154 U_ASSERT(gUnicodeSets
[STRICT_COMMA
] != nullptr);
155 U_ASSERT(gUnicodeSets
[PERIOD
] != nullptr);
156 U_ASSERT(gUnicodeSets
[STRICT_PERIOD
] != nullptr);
158 gUnicodeSets
[OTHER_GROUPING_SEPARATORS
] = new UnicodeSet(
159 u
"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status
);
160 gUnicodeSets
[ALL_SEPARATORS
] = computeUnion(COMMA
, PERIOD
, OTHER_GROUPING_SEPARATORS
);
161 gUnicodeSets
[STRICT_ALL_SEPARATORS
] = computeUnion(
162 STRICT_COMMA
, STRICT_PERIOD
, OTHER_GROUPING_SEPARATORS
);
164 U_ASSERT(gUnicodeSets
[MINUS_SIGN
] != nullptr);
165 U_ASSERT(gUnicodeSets
[PLUS_SIGN
] != nullptr);
167 gUnicodeSets
[PERCENT_SIGN
] = new UnicodeSet(u
"[%٪]", status
);
168 gUnicodeSets
[PERMILLE_SIGN
] = new UnicodeSet(u
"[‰؉]", status
);
169 gUnicodeSets
[INFINITY_KEY
] = new UnicodeSet(u
"[∞]", status
);
171 U_ASSERT(gUnicodeSets
[DOLLAR_SIGN
] != nullptr);
172 U_ASSERT(gUnicodeSets
[POUND_SIGN
] != nullptr);
173 U_ASSERT(gUnicodeSets
[RUPEE_SIGN
] != nullptr);
174 gUnicodeSets
[YEN_SIGN
] = new UnicodeSet(u
"[¥\\uffe5]", status
);
176 gUnicodeSets
[DIGITS
] = new UnicodeSet(u
"[:digit:]", status
);
178 gUnicodeSets
[DIGITS_OR_ALL_SEPARATORS
] = computeUnion(DIGITS
, ALL_SEPARATORS
);
179 gUnicodeSets
[DIGITS_OR_STRICT_ALL_SEPARATORS
] = computeUnion(DIGITS
, STRICT_ALL_SEPARATORS
);
181 for (auto* uniset
: gUnicodeSets
) {
182 if (uniset
!= nullptr) {
190 const UnicodeSet
* unisets::get(Key key
) {
191 UErrorCode localStatus
= U_ZERO_ERROR
;
192 umtx_initOnce(gNumberParseUniSetsInitOnce
, &initNumberParseUniSets
, localStatus
);
193 if (U_FAILURE(localStatus
)) {
194 return reinterpret_cast<UnicodeSet
*>(gEmptyUnicodeSet
);
199 Key
unisets::chooseFrom(UnicodeString str
, Key key1
) {
200 return get(key1
)->contains(str
) ? key1
: NONE
;
203 Key
unisets::chooseFrom(UnicodeString str
, Key key1
, Key key2
) {
204 return get(key1
)->contains(str
) ? key1
: chooseFrom(str
, key2
);
207 //Key unisets::chooseCurrency(UnicodeString str) {
208 // if (get(DOLLAR_SIGN)->contains(str)) {
209 // return DOLLAR_SIGN;
210 // } else if (get(POUND_SIGN)->contains(str)) {
211 // return POUND_SIGN;
212 // } else if (get(RUPEE_SIGN)->contains(str)) {
213 // return RUPEE_SIGN;
214 // } else if (get(YEN_SIGN)->contains(str)) {
222 #endif /* #if !UCONFIG_NO_FORMATTING */