]>
Commit | Line | Data |
---|---|---|
3d1f044b A |
1 | // © 2019 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | #include <utility> | |
5 | ||
6 | #include "bytesinkutil.h" // CharStringByteSink | |
7 | #include "charstr.h" | |
8 | #include "cstring.h" | |
9 | #include "ulocimp.h" | |
10 | #include "unicode/localebuilder.h" | |
11 | #include "unicode/locid.h" | |
12 | ||
13 | U_NAMESPACE_BEGIN | |
14 | ||
15 | #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) | |
16 | #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) | |
17 | ||
18 | const char* kAttributeKey = "attribute"; | |
19 | ||
20 | static bool _isExtensionSubtags(char key, const char* s, int32_t len) { | |
21 | switch (uprv_tolower(key)) { | |
22 | case 'u': | |
23 | return ultag_isUnicodeExtensionSubtags(s, len); | |
24 | case 't': | |
25 | return ultag_isTransformedExtensionSubtags(s, len); | |
26 | case 'x': | |
27 | return ultag_isPrivateuseValueSubtags(s, len); | |
28 | default: | |
29 | return ultag_isExtensionSubtags(s, len); | |
30 | } | |
31 | } | |
32 | ||
33 | LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(), | |
34 | script_(), region_(), variant_(nullptr), extensions_(nullptr) | |
35 | { | |
36 | language_[0] = 0; | |
37 | script_[0] = 0; | |
38 | region_[0] = 0; | |
39 | } | |
40 | ||
41 | LocaleBuilder::~LocaleBuilder() | |
42 | { | |
43 | delete variant_; | |
44 | delete extensions_; | |
45 | } | |
46 | ||
47 | LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale) | |
48 | { | |
49 | clear(); | |
50 | setLanguage(locale.getLanguage()); | |
51 | setScript(locale.getScript()); | |
52 | setRegion(locale.getCountry()); | |
53 | setVariant(locale.getVariant()); | |
54 | extensions_ = locale.clone(); | |
55 | if (extensions_ == nullptr) { | |
56 | status_ = U_MEMORY_ALLOCATION_ERROR; | |
57 | } | |
58 | return *this; | |
59 | } | |
60 | ||
61 | LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag) | |
62 | { | |
63 | Locale l = Locale::forLanguageTag(tag, status_); | |
64 | if (U_FAILURE(status_)) { return *this; } | |
65 | // Because setLocale will reset status_ we need to return | |
66 | // first if we have error in forLanguageTag. | |
67 | setLocale(l); | |
68 | return *this; | |
69 | } | |
70 | ||
71 | static void setField(StringPiece input, char* dest, UErrorCode& errorCode, | |
72 | UBool (*test)(const char*, int32_t)) { | |
73 | if (U_FAILURE(errorCode)) { return; } | |
74 | if (input.empty()) { | |
75 | dest[0] = '\0'; | |
76 | } else if (test(input.data(), input.length())) { | |
77 | uprv_memcpy(dest, input.data(), input.length()); | |
78 | dest[input.length()] = '\0'; | |
79 | } else { | |
80 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
81 | } | |
82 | } | |
83 | ||
84 | LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language) | |
85 | { | |
86 | setField(language, language_, status_, &ultag_isLanguageSubtag); | |
87 | return *this; | |
88 | } | |
89 | ||
90 | LocaleBuilder& LocaleBuilder::setScript(StringPiece script) | |
91 | { | |
92 | setField(script, script_, status_, &ultag_isScriptSubtag); | |
93 | return *this; | |
94 | } | |
95 | ||
96 | LocaleBuilder& LocaleBuilder::setRegion(StringPiece region) | |
97 | { | |
98 | setField(region, region_, status_, &ultag_isRegionSubtag); | |
99 | return *this; | |
100 | } | |
101 | ||
102 | static void transform(char* data, int32_t len) { | |
103 | for (int32_t i = 0; i < len; i++, data++) { | |
104 | if (*data == '_') { | |
105 | *data = '-'; | |
106 | } else { | |
107 | *data = uprv_tolower(*data); | |
108 | } | |
109 | } | |
110 | } | |
111 | ||
112 | LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant) | |
113 | { | |
114 | if (U_FAILURE(status_)) { return *this; } | |
115 | if (variant.empty()) { | |
116 | delete variant_; | |
117 | variant_ = nullptr; | |
118 | return *this; | |
119 | } | |
120 | CharString* new_variant = new CharString(variant, status_); | |
121 | if (U_FAILURE(status_)) { return *this; } | |
122 | if (new_variant == nullptr) { | |
123 | status_ = U_MEMORY_ALLOCATION_ERROR; | |
124 | return *this; | |
125 | } | |
126 | transform(new_variant->data(), new_variant->length()); | |
127 | if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) { | |
128 | delete new_variant; | |
129 | status_ = U_ILLEGAL_ARGUMENT_ERROR; | |
130 | return *this; | |
131 | } | |
132 | delete variant_; | |
133 | variant_ = new_variant; | |
134 | return *this; | |
135 | } | |
136 | ||
137 | static bool | |
138 | _isKeywordValue(const char* key, const char* value, int32_t value_len) | |
139 | { | |
140 | if (key[1] == '\0') { | |
141 | // one char key | |
142 | return (UPRV_ISALPHANUM(uprv_tolower(key[0])) && | |
143 | _isExtensionSubtags(key[0], value, value_len)); | |
144 | } else if (uprv_strcmp(key, kAttributeKey) == 0) { | |
145 | // unicode attributes | |
146 | return ultag_isUnicodeLocaleAttributes(value, value_len); | |
147 | } | |
148 | // otherwise: unicode extension value | |
149 | // We need to convert from legacy key/value to unicode | |
150 | // key/value | |
151 | const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key); | |
152 | const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value); | |
153 | ||
154 | return unicode_locale_key && unicode_locale_type && | |
155 | ultag_isUnicodeLocaleKey(unicode_locale_key, -1) && | |
156 | ultag_isUnicodeLocaleType(unicode_locale_type, -1); | |
157 | } | |
158 | ||
159 | static void | |
160 | _copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& errorCode) | |
161 | { | |
162 | if (U_FAILURE(errorCode)) { return; } | |
163 | LocalPointer<icu::StringEnumeration> iter(from.createKeywords(errorCode)); | |
164 | if (U_FAILURE(errorCode) || iter.isNull()) { return; } | |
165 | const char* key; | |
166 | while ((key = iter->next(nullptr, errorCode)) != nullptr) { | |
167 | CharString value; | |
168 | CharStringByteSink sink(&value); | |
169 | from.getKeywordValue(key, sink, errorCode); | |
170 | if (U_FAILURE(errorCode)) { return; } | |
171 | if (uprv_strcmp(key, kAttributeKey) == 0) { | |
172 | transform(value.data(), value.length()); | |
173 | } | |
174 | if (validate && | |
175 | !_isKeywordValue(key, value.data(), value.length())) { | |
176 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
177 | return; | |
178 | } | |
179 | to->setKeywordValue(key, value.data(), errorCode); | |
180 | if (U_FAILURE(errorCode)) { return; } | |
181 | } | |
182 | } | |
183 | ||
184 | void static | |
185 | _clearUAttributesAndKeyType(Locale* locale, UErrorCode& errorCode) | |
186 | { | |
187 | // Clear Unicode attributes | |
188 | locale->setKeywordValue(kAttributeKey, "", errorCode); | |
189 | ||
190 | // Clear all Unicode keyword values | |
191 | LocalPointer<icu::StringEnumeration> iter(locale->createUnicodeKeywords(errorCode)); | |
192 | if (U_FAILURE(errorCode) || iter.isNull()) { return; } | |
193 | const char* key; | |
194 | while ((key = iter->next(nullptr, errorCode)) != nullptr) { | |
195 | locale->setUnicodeKeywordValue(key, nullptr, errorCode); | |
196 | } | |
197 | } | |
198 | ||
199 | static void | |
200 | _setUnicodeExtensions(Locale* locale, const CharString& value, UErrorCode& errorCode) | |
201 | { | |
202 | // Add the unicode extensions to extensions_ | |
203 | CharString locale_str("und-u-", errorCode); | |
204 | locale_str.append(value, errorCode); | |
205 | _copyExtensions( | |
206 | Locale::forLanguageTag(locale_str.data(), errorCode), | |
207 | locale, false, errorCode); | |
208 | } | |
209 | ||
210 | LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value) | |
211 | { | |
212 | if (U_FAILURE(status_)) { return *this; } | |
213 | if (!UPRV_ISALPHANUM(key)) { | |
214 | status_ = U_ILLEGAL_ARGUMENT_ERROR; | |
215 | return *this; | |
216 | } | |
217 | CharString value_str(value, status_); | |
218 | if (U_FAILURE(status_)) { return *this; } | |
219 | transform(value_str.data(), value_str.length()); | |
220 | if (!value_str.isEmpty() && | |
221 | !_isExtensionSubtags(key, value_str.data(), value_str.length())) { | |
222 | status_ = U_ILLEGAL_ARGUMENT_ERROR; | |
223 | return *this; | |
224 | } | |
225 | if (extensions_ == nullptr) { | |
226 | extensions_ = new Locale(); | |
227 | if (extensions_ == nullptr) { | |
228 | status_ = U_MEMORY_ALLOCATION_ERROR; | |
229 | return *this; | |
230 | } | |
231 | } | |
232 | if (uprv_tolower(key) != 'u') { | |
233 | // for t, x and others extension. | |
234 | extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(), | |
235 | status_); | |
236 | return *this; | |
237 | } | |
238 | _clearUAttributesAndKeyType(extensions_, status_); | |
239 | if (U_FAILURE(status_)) { return *this; } | |
240 | if (!value.empty()) { | |
241 | _setUnicodeExtensions(extensions_, value_str, status_); | |
242 | } | |
243 | return *this; | |
244 | } | |
245 | ||
246 | LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword( | |
247 | StringPiece key, StringPiece type) | |
248 | { | |
249 | if (U_FAILURE(status_)) { return *this; } | |
250 | if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) || | |
251 | (!type.empty() && | |
252 | !ultag_isUnicodeLocaleType(type.data(), type.length()))) { | |
253 | status_ = U_ILLEGAL_ARGUMENT_ERROR; | |
254 | return *this; | |
255 | } | |
256 | if (extensions_ == nullptr) { | |
257 | extensions_ = new Locale(); | |
258 | } | |
259 | if (extensions_ == nullptr) { | |
260 | status_ = U_MEMORY_ALLOCATION_ERROR; | |
261 | return *this; | |
262 | } | |
263 | extensions_->setUnicodeKeywordValue(key, type, status_); | |
264 | return *this; | |
265 | } | |
266 | ||
267 | LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute( | |
268 | StringPiece value) | |
269 | { | |
270 | CharString value_str(value, status_); | |
271 | if (U_FAILURE(status_)) { return *this; } | |
272 | transform(value_str.data(), value_str.length()); | |
273 | if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) { | |
274 | status_ = U_ILLEGAL_ARGUMENT_ERROR; | |
275 | return *this; | |
276 | } | |
277 | if (extensions_ == nullptr) { | |
278 | extensions_ = new Locale(); | |
279 | if (extensions_ == nullptr) { | |
280 | status_ = U_MEMORY_ALLOCATION_ERROR; | |
281 | return *this; | |
282 | } | |
283 | extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_); | |
284 | return *this; | |
285 | } | |
286 | ||
287 | CharString attributes; | |
288 | CharStringByteSink sink(&attributes); | |
289 | UErrorCode localErrorCode = U_ZERO_ERROR; | |
290 | extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode); | |
291 | if (U_FAILURE(localErrorCode)) { | |
292 | CharString new_attributes(value_str.data(), status_); | |
293 | // No attributes, set the attribute. | |
294 | extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); | |
295 | return *this; | |
296 | } | |
297 | ||
298 | transform(attributes.data(),attributes.length()); | |
299 | const char* start = attributes.data(); | |
300 | const char* limit = attributes.data() + attributes.length(); | |
301 | CharString new_attributes; | |
302 | bool inserted = false; | |
303 | while (start < limit) { | |
304 | if (!inserted) { | |
305 | int cmp = uprv_strcmp(start, value_str.data()); | |
306 | if (cmp == 0) { return *this; } // Found it in attributes: Just return | |
307 | if (cmp > 0) { | |
308 | if (!new_attributes.isEmpty()) new_attributes.append('_', status_); | |
309 | new_attributes.append(value_str.data(), status_); | |
310 | inserted = true; | |
311 | } | |
312 | } | |
313 | if (!new_attributes.isEmpty()) { | |
314 | new_attributes.append('_', status_); | |
315 | } | |
316 | new_attributes.append(start, status_); | |
317 | start += uprv_strlen(start) + 1; | |
318 | } | |
319 | if (!inserted) { | |
320 | if (!new_attributes.isEmpty()) { | |
321 | new_attributes.append('_', status_); | |
322 | } | |
323 | new_attributes.append(value_str.data(), status_); | |
324 | } | |
325 | // Not yet in the attributes, set the attribute. | |
326 | extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); | |
327 | return *this; | |
328 | } | |
329 | ||
330 | LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute( | |
331 | StringPiece value) | |
332 | { | |
333 | CharString value_str(value, status_); | |
334 | if (U_FAILURE(status_)) { return *this; } | |
335 | transform(value_str.data(), value_str.length()); | |
336 | if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) { | |
337 | status_ = U_ILLEGAL_ARGUMENT_ERROR; | |
338 | return *this; | |
339 | } | |
340 | if (extensions_ == nullptr) { return *this; } | |
341 | UErrorCode localErrorCode = U_ZERO_ERROR; | |
342 | CharString attributes; | |
343 | CharStringByteSink sink(&attributes); | |
344 | extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode); | |
345 | // get failure, just return | |
346 | if (U_FAILURE(localErrorCode)) { return *this; } | |
347 | // Do not have any attributes, just return. | |
348 | if (attributes.isEmpty()) { return *this; } | |
349 | ||
350 | char* p = attributes.data(); | |
351 | // Replace null terminiator in place for _ and - so later | |
352 | // we can use uprv_strcmp to compare. | |
353 | for (int32_t i = 0; i < attributes.length(); i++, p++) { | |
354 | *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p); | |
355 | } | |
356 | ||
357 | const char* start = attributes.data(); | |
358 | const char* limit = attributes.data() + attributes.length(); | |
359 | CharString new_attributes; | |
360 | bool found = false; | |
361 | while (start < limit) { | |
362 | if (uprv_strcmp(start, value_str.data()) == 0) { | |
363 | found = true; | |
364 | } else { | |
365 | if (!new_attributes.isEmpty()) { | |
366 | new_attributes.append('_', status_); | |
367 | } | |
368 | new_attributes.append(start, status_); | |
369 | } | |
370 | start += uprv_strlen(start) + 1; | |
371 | } | |
372 | // Found the value in attributes, set the attribute. | |
373 | if (found) { | |
374 | extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); | |
375 | } | |
376 | return *this; | |
377 | } | |
378 | ||
379 | LocaleBuilder& LocaleBuilder::clear() | |
380 | { | |
381 | status_ = U_ZERO_ERROR; | |
382 | language_[0] = 0; | |
383 | script_[0] = 0; | |
384 | region_[0] = 0; | |
385 | delete variant_; | |
386 | variant_ = nullptr; | |
387 | clearExtensions(); | |
388 | return *this; | |
389 | } | |
390 | ||
391 | LocaleBuilder& LocaleBuilder::clearExtensions() | |
392 | { | |
393 | delete extensions_; | |
394 | extensions_ = nullptr; | |
395 | return *this; | |
396 | } | |
397 | ||
398 | Locale makeBogusLocale() { | |
399 | Locale bogus; | |
400 | bogus.setToBogus(); | |
401 | return bogus; | |
402 | } | |
403 | ||
404 | Locale LocaleBuilder::build(UErrorCode& errorCode) | |
405 | { | |
406 | if (U_FAILURE(errorCode)) { | |
407 | return makeBogusLocale(); | |
408 | } | |
409 | if (U_FAILURE(status_)) { | |
410 | errorCode = status_; | |
411 | return makeBogusLocale(); | |
412 | } | |
413 | CharString locale_str(language_, errorCode); | |
414 | if (uprv_strlen(script_) > 0) { | |
415 | locale_str.append('-', errorCode).append(StringPiece(script_), errorCode); | |
416 | } | |
417 | if (uprv_strlen(region_) > 0) { | |
418 | locale_str.append('-', errorCode).append(StringPiece(region_), errorCode); | |
419 | } | |
420 | if (variant_ != nullptr) { | |
421 | locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode); | |
422 | } | |
423 | if (U_FAILURE(errorCode)) { | |
424 | return makeBogusLocale(); | |
425 | } | |
426 | Locale product(locale_str.data()); | |
427 | if (extensions_ != nullptr) { | |
428 | _copyExtensions(*extensions_, &product, true, errorCode); | |
429 | } | |
430 | if (U_FAILURE(errorCode)) { | |
431 | return makeBogusLocale(); | |
432 | } | |
433 | return product; | |
434 | } | |
435 | ||
436 | U_NAMESPACE_END |