]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/localebuilder.cpp
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / common / localebuilder.cpp
CommitLineData
3d1f044b
A
1// © 2019 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include <utility>
5
6#include "bytesinkutil.h" // CharStringByteSink
7#include "charstr.h"
8#include "cstring.h"
9#include "ulocimp.h"
10#include "unicode/localebuilder.h"
11#include "unicode/locid.h"
12
13U_NAMESPACE_BEGIN
14
15#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
16#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
17
18const char* kAttributeKey = "attribute";
19
20static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
21 switch (uprv_tolower(key)) {
22 case 'u':
23 return ultag_isUnicodeExtensionSubtags(s, len);
24 case 't':
25 return ultag_isTransformedExtensionSubtags(s, len);
26 case 'x':
27 return ultag_isPrivateuseValueSubtags(s, len);
28 default:
29 return ultag_isExtensionSubtags(s, len);
30 }
31}
32
33LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
34 script_(), region_(), variant_(nullptr), extensions_(nullptr)
35{
36 language_[0] = 0;
37 script_[0] = 0;
38 region_[0] = 0;
39}
40
41LocaleBuilder::~LocaleBuilder()
42{
43 delete variant_;
44 delete extensions_;
45}
46
47LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
48{
49 clear();
50 setLanguage(locale.getLanguage());
51 setScript(locale.getScript());
52 setRegion(locale.getCountry());
53 setVariant(locale.getVariant());
54 extensions_ = locale.clone();
55 if (extensions_ == nullptr) {
56 status_ = U_MEMORY_ALLOCATION_ERROR;
57 }
58 return *this;
59}
60
61LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
62{
63 Locale l = Locale::forLanguageTag(tag, status_);
64 if (U_FAILURE(status_)) { return *this; }
65 // Because setLocale will reset status_ we need to return
66 // first if we have error in forLanguageTag.
67 setLocale(l);
68 return *this;
69}
70
71static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
72 UBool (*test)(const char*, int32_t)) {
73 if (U_FAILURE(errorCode)) { return; }
74 if (input.empty()) {
75 dest[0] = '\0';
76 } else if (test(input.data(), input.length())) {
77 uprv_memcpy(dest, input.data(), input.length());
78 dest[input.length()] = '\0';
79 } else {
80 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
81 }
82}
83
84LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
85{
86 setField(language, language_, status_, &ultag_isLanguageSubtag);
87 return *this;
88}
89
90LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
91{
92 setField(script, script_, status_, &ultag_isScriptSubtag);
93 return *this;
94}
95
96LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
97{
98 setField(region, region_, status_, &ultag_isRegionSubtag);
99 return *this;
100}
101
102static void transform(char* data, int32_t len) {
103 for (int32_t i = 0; i < len; i++, data++) {
104 if (*data == '_') {
105 *data = '-';
106 } else {
107 *data = uprv_tolower(*data);
108 }
109 }
110}
111
112LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
113{
114 if (U_FAILURE(status_)) { return *this; }
115 if (variant.empty()) {
116 delete variant_;
117 variant_ = nullptr;
118 return *this;
119 }
120 CharString* new_variant = new CharString(variant, status_);
121 if (U_FAILURE(status_)) { return *this; }
122 if (new_variant == nullptr) {
123 status_ = U_MEMORY_ALLOCATION_ERROR;
124 return *this;
125 }
126 transform(new_variant->data(), new_variant->length());
127 if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
128 delete new_variant;
129 status_ = U_ILLEGAL_ARGUMENT_ERROR;
130 return *this;
131 }
132 delete variant_;
133 variant_ = new_variant;
134 return *this;
135}
136
137static bool
138_isKeywordValue(const char* key, const char* value, int32_t value_len)
139{
140 if (key[1] == '\0') {
141 // one char key
142 return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
143 _isExtensionSubtags(key[0], value, value_len));
144 } else if (uprv_strcmp(key, kAttributeKey) == 0) {
145 // unicode attributes
146 return ultag_isUnicodeLocaleAttributes(value, value_len);
147 }
148 // otherwise: unicode extension value
149 // We need to convert from legacy key/value to unicode
150 // key/value
151 const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
152 const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);
153
154 return unicode_locale_key && unicode_locale_type &&
155 ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
156 ultag_isUnicodeLocaleType(unicode_locale_type, -1);
157}
158
159static void
160_copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& errorCode)
161{
162 if (U_FAILURE(errorCode)) { return; }
163 LocalPointer<icu::StringEnumeration> iter(from.createKeywords(errorCode));
164 if (U_FAILURE(errorCode) || iter.isNull()) { return; }
165 const char* key;
166 while ((key = iter->next(nullptr, errorCode)) != nullptr) {
167 CharString value;
168 CharStringByteSink sink(&value);
169 from.getKeywordValue(key, sink, errorCode);
170 if (U_FAILURE(errorCode)) { return; }
171 if (uprv_strcmp(key, kAttributeKey) == 0) {
172 transform(value.data(), value.length());
173 }
174 if (validate &&
175 !_isKeywordValue(key, value.data(), value.length())) {
176 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
177 return;
178 }
179 to->setKeywordValue(key, value.data(), errorCode);
180 if (U_FAILURE(errorCode)) { return; }
181 }
182}
183
184void static
185_clearUAttributesAndKeyType(Locale* locale, UErrorCode& errorCode)
186{
187 // Clear Unicode attributes
188 locale->setKeywordValue(kAttributeKey, "", errorCode);
189
190 // Clear all Unicode keyword values
191 LocalPointer<icu::StringEnumeration> iter(locale->createUnicodeKeywords(errorCode));
192 if (U_FAILURE(errorCode) || iter.isNull()) { return; }
193 const char* key;
194 while ((key = iter->next(nullptr, errorCode)) != nullptr) {
195 locale->setUnicodeKeywordValue(key, nullptr, errorCode);
196 }
197}
198
199static void
200_setUnicodeExtensions(Locale* locale, const CharString& value, UErrorCode& errorCode)
201{
202 // Add the unicode extensions to extensions_
203 CharString locale_str("und-u-", errorCode);
204 locale_str.append(value, errorCode);
205 _copyExtensions(
206 Locale::forLanguageTag(locale_str.data(), errorCode),
207 locale, false, errorCode);
208}
209
210LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
211{
212 if (U_FAILURE(status_)) { return *this; }
213 if (!UPRV_ISALPHANUM(key)) {
214 status_ = U_ILLEGAL_ARGUMENT_ERROR;
215 return *this;
216 }
217 CharString value_str(value, status_);
218 if (U_FAILURE(status_)) { return *this; }
219 transform(value_str.data(), value_str.length());
220 if (!value_str.isEmpty() &&
221 !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
222 status_ = U_ILLEGAL_ARGUMENT_ERROR;
223 return *this;
224 }
225 if (extensions_ == nullptr) {
226 extensions_ = new Locale();
227 if (extensions_ == nullptr) {
228 status_ = U_MEMORY_ALLOCATION_ERROR;
229 return *this;
230 }
231 }
232 if (uprv_tolower(key) != 'u') {
233 // for t, x and others extension.
234 extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
235 status_);
236 return *this;
237 }
238 _clearUAttributesAndKeyType(extensions_, status_);
239 if (U_FAILURE(status_)) { return *this; }
240 if (!value.empty()) {
241 _setUnicodeExtensions(extensions_, value_str, status_);
242 }
243 return *this;
244}
245
246LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
247 StringPiece key, StringPiece type)
248{
249 if (U_FAILURE(status_)) { return *this; }
250 if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
251 (!type.empty() &&
252 !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
253 status_ = U_ILLEGAL_ARGUMENT_ERROR;
254 return *this;
255 }
256 if (extensions_ == nullptr) {
257 extensions_ = new Locale();
258 }
259 if (extensions_ == nullptr) {
260 status_ = U_MEMORY_ALLOCATION_ERROR;
261 return *this;
262 }
263 extensions_->setUnicodeKeywordValue(key, type, status_);
264 return *this;
265}
266
267LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
268 StringPiece value)
269{
270 CharString value_str(value, status_);
271 if (U_FAILURE(status_)) { return *this; }
272 transform(value_str.data(), value_str.length());
273 if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
274 status_ = U_ILLEGAL_ARGUMENT_ERROR;
275 return *this;
276 }
277 if (extensions_ == nullptr) {
278 extensions_ = new Locale();
279 if (extensions_ == nullptr) {
280 status_ = U_MEMORY_ALLOCATION_ERROR;
281 return *this;
282 }
283 extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
284 return *this;
285 }
286
287 CharString attributes;
288 CharStringByteSink sink(&attributes);
289 UErrorCode localErrorCode = U_ZERO_ERROR;
290 extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
291 if (U_FAILURE(localErrorCode)) {
292 CharString new_attributes(value_str.data(), status_);
293 // No attributes, set the attribute.
294 extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
295 return *this;
296 }
297
298 transform(attributes.data(),attributes.length());
299 const char* start = attributes.data();
300 const char* limit = attributes.data() + attributes.length();
301 CharString new_attributes;
302 bool inserted = false;
303 while (start < limit) {
304 if (!inserted) {
305 int cmp = uprv_strcmp(start, value_str.data());
306 if (cmp == 0) { return *this; } // Found it in attributes: Just return
307 if (cmp > 0) {
308 if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
309 new_attributes.append(value_str.data(), status_);
310 inserted = true;
311 }
312 }
313 if (!new_attributes.isEmpty()) {
314 new_attributes.append('_', status_);
315 }
316 new_attributes.append(start, status_);
317 start += uprv_strlen(start) + 1;
318 }
319 if (!inserted) {
320 if (!new_attributes.isEmpty()) {
321 new_attributes.append('_', status_);
322 }
323 new_attributes.append(value_str.data(), status_);
324 }
325 // Not yet in the attributes, set the attribute.
326 extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
327 return *this;
328}
329
330LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
331 StringPiece value)
332{
333 CharString value_str(value, status_);
334 if (U_FAILURE(status_)) { return *this; }
335 transform(value_str.data(), value_str.length());
336 if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
337 status_ = U_ILLEGAL_ARGUMENT_ERROR;
338 return *this;
339 }
340 if (extensions_ == nullptr) { return *this; }
341 UErrorCode localErrorCode = U_ZERO_ERROR;
342 CharString attributes;
343 CharStringByteSink sink(&attributes);
344 extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
345 // get failure, just return
346 if (U_FAILURE(localErrorCode)) { return *this; }
347 // Do not have any attributes, just return.
348 if (attributes.isEmpty()) { return *this; }
349
350 char* p = attributes.data();
351 // Replace null terminiator in place for _ and - so later
352 // we can use uprv_strcmp to compare.
353 for (int32_t i = 0; i < attributes.length(); i++, p++) {
354 *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
355 }
356
357 const char* start = attributes.data();
358 const char* limit = attributes.data() + attributes.length();
359 CharString new_attributes;
360 bool found = false;
361 while (start < limit) {
362 if (uprv_strcmp(start, value_str.data()) == 0) {
363 found = true;
364 } else {
365 if (!new_attributes.isEmpty()) {
366 new_attributes.append('_', status_);
367 }
368 new_attributes.append(start, status_);
369 }
370 start += uprv_strlen(start) + 1;
371 }
372 // Found the value in attributes, set the attribute.
373 if (found) {
374 extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
375 }
376 return *this;
377}
378
379LocaleBuilder& LocaleBuilder::clear()
380{
381 status_ = U_ZERO_ERROR;
382 language_[0] = 0;
383 script_[0] = 0;
384 region_[0] = 0;
385 delete variant_;
386 variant_ = nullptr;
387 clearExtensions();
388 return *this;
389}
390
391LocaleBuilder& LocaleBuilder::clearExtensions()
392{
393 delete extensions_;
394 extensions_ = nullptr;
395 return *this;
396}
397
398Locale makeBogusLocale() {
399 Locale bogus;
400 bogus.setToBogus();
401 return bogus;
402}
403
404Locale LocaleBuilder::build(UErrorCode& errorCode)
405{
406 if (U_FAILURE(errorCode)) {
407 return makeBogusLocale();
408 }
409 if (U_FAILURE(status_)) {
410 errorCode = status_;
411 return makeBogusLocale();
412 }
413 CharString locale_str(language_, errorCode);
414 if (uprv_strlen(script_) > 0) {
415 locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
416 }
417 if (uprv_strlen(region_) > 0) {
418 locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
419 }
420 if (variant_ != nullptr) {
421 locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
422 }
423 if (U_FAILURE(errorCode)) {
424 return makeBogusLocale();
425 }
426 Locale product(locale_str.data());
427 if (extensions_ != nullptr) {
428 _copyExtensions(*extensions_, &product, true, errorCode);
429 }
430 if (U_FAILURE(errorCode)) {
431 return makeBogusLocale();
432 }
433 return product;
434}
435
436U_NAMESPACE_END