1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
6 #include "bytesinkutil.h" // CharStringByteSink
10 #include "unicode/localebuilder.h"
11 #include "unicode/locid.h"
15 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
16 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
18 const char* kAttributeKey
= "attribute";
20 static bool _isExtensionSubtags(char key
, const char* s
, int32_t len
) {
21 switch (uprv_tolower(key
)) {
23 return ultag_isUnicodeExtensionSubtags(s
, len
);
25 return ultag_isTransformedExtensionSubtags(s
, len
);
27 return ultag_isPrivateuseValueSubtags(s
, len
);
29 return ultag_isExtensionSubtags(s
, len
);
33 LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR
), language_(),
34 script_(), region_(), variant_(nullptr), extensions_(nullptr)
41 LocaleBuilder::~LocaleBuilder()
47 LocaleBuilder
& LocaleBuilder::setLocale(const Locale
& locale
)
50 setLanguage(locale
.getLanguage());
51 setScript(locale
.getScript());
52 setRegion(locale
.getCountry());
53 setVariant(locale
.getVariant());
54 extensions_
= locale
.clone();
55 if (extensions_
== nullptr) {
56 status_
= U_MEMORY_ALLOCATION_ERROR
;
61 LocaleBuilder
& LocaleBuilder::setLanguageTag(StringPiece tag
)
63 Locale l
= Locale::forLanguageTag(tag
, status_
);
64 if (U_FAILURE(status_
)) { return *this; }
65 // Because setLocale will reset status_ we need to return
66 // first if we have error in forLanguageTag.
71 static void setField(StringPiece input
, char* dest
, UErrorCode
& errorCode
,
72 UBool (*test
)(const char*, int32_t)) {
73 if (U_FAILURE(errorCode
)) { return; }
76 } else if (test(input
.data(), input
.length())) {
77 uprv_memcpy(dest
, input
.data(), input
.length());
78 dest
[input
.length()] = '\0';
80 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
84 LocaleBuilder
& LocaleBuilder::setLanguage(StringPiece language
)
86 setField(language
, language_
, status_
, &ultag_isLanguageSubtag
);
90 LocaleBuilder
& LocaleBuilder::setScript(StringPiece script
)
92 setField(script
, script_
, status_
, &ultag_isScriptSubtag
);
96 LocaleBuilder
& LocaleBuilder::setRegion(StringPiece region
)
98 setField(region
, region_
, status_
, &ultag_isRegionSubtag
);
102 static void transform(char* data
, int32_t len
) {
103 for (int32_t i
= 0; i
< len
; i
++, data
++) {
107 *data
= uprv_tolower(*data
);
112 LocaleBuilder
& LocaleBuilder::setVariant(StringPiece variant
)
114 if (U_FAILURE(status_
)) { return *this; }
115 if (variant
.empty()) {
120 CharString
* new_variant
= new CharString(variant
, status_
);
121 if (U_FAILURE(status_
)) { return *this; }
122 if (new_variant
== nullptr) {
123 status_
= U_MEMORY_ALLOCATION_ERROR
;
126 transform(new_variant
->data(), new_variant
->length());
127 if (!ultag_isVariantSubtags(new_variant
->data(), new_variant
->length())) {
129 status_
= U_ILLEGAL_ARGUMENT_ERROR
;
133 variant_
= new_variant
;
138 _isKeywordValue(const char* key
, const char* value
, int32_t value_len
)
140 if (key
[1] == '\0') {
142 return (UPRV_ISALPHANUM(uprv_tolower(key
[0])) &&
143 _isExtensionSubtags(key
[0], value
, value_len
));
144 } else if (uprv_strcmp(key
, kAttributeKey
) == 0) {
145 // unicode attributes
146 return ultag_isUnicodeLocaleAttributes(value
, value_len
);
148 // otherwise: unicode extension value
149 // We need to convert from legacy key/value to unicode
151 const char* unicode_locale_key
= uloc_toUnicodeLocaleKey(key
);
152 const char* unicode_locale_type
= uloc_toUnicodeLocaleType(key
, value
);
154 return unicode_locale_key
&& unicode_locale_type
&&
155 ultag_isUnicodeLocaleKey(unicode_locale_key
, -1) &&
156 ultag_isUnicodeLocaleType(unicode_locale_type
, -1);
160 _copyExtensions(const Locale
& from
, icu::StringEnumeration
*keywords
,
161 Locale
& to
, bool validate
, UErrorCode
& errorCode
)
163 if (U_FAILURE(errorCode
)) { return; }
164 LocalPointer
<icu::StringEnumeration
> ownedKeywords
;
165 if (keywords
== nullptr) {
166 ownedKeywords
.adoptInstead(from
.createKeywords(errorCode
));
167 if (U_FAILURE(errorCode
) || ownedKeywords
.isNull()) { return; }
168 keywords
= ownedKeywords
.getAlias();
171 while ((key
= keywords
->next(nullptr, errorCode
)) != nullptr) {
173 CharStringByteSink
sink(&value
);
174 from
.getKeywordValue(key
, sink
, errorCode
);
175 if (U_FAILURE(errorCode
)) { return; }
176 if (uprv_strcmp(key
, kAttributeKey
) == 0) {
177 transform(value
.data(), value
.length());
180 !_isKeywordValue(key
, value
.data(), value
.length())) {
181 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
184 to
.setKeywordValue(key
, value
.data(), errorCode
);
185 if (U_FAILURE(errorCode
)) { return; }
190 _clearUAttributesAndKeyType(Locale
& locale
, UErrorCode
& errorCode
)
192 // Clear Unicode attributes
193 locale
.setKeywordValue(kAttributeKey
, "", errorCode
);
195 // Clear all Unicode keyword values
196 LocalPointer
<icu::StringEnumeration
> iter(locale
.createUnicodeKeywords(errorCode
));
197 if (U_FAILURE(errorCode
) || iter
.isNull()) { return; }
199 while ((key
= iter
->next(nullptr, errorCode
)) != nullptr) {
200 locale
.setUnicodeKeywordValue(key
, nullptr, errorCode
);
205 _setUnicodeExtensions(Locale
& locale
, const CharString
& value
, UErrorCode
& errorCode
)
207 // Add the unicode extensions to extensions_
208 CharString
locale_str("und-u-", errorCode
);
209 locale_str
.append(value
, errorCode
);
211 Locale::forLanguageTag(locale_str
.data(), errorCode
), nullptr,
212 locale
, false, errorCode
);
215 LocaleBuilder
& LocaleBuilder::setExtension(char key
, StringPiece value
)
217 if (U_FAILURE(status_
)) { return *this; }
218 if (!UPRV_ISALPHANUM(key
)) {
219 status_
= U_ILLEGAL_ARGUMENT_ERROR
;
222 CharString
value_str(value
, status_
);
223 if (U_FAILURE(status_
)) { return *this; }
224 transform(value_str
.data(), value_str
.length());
225 if (!value_str
.isEmpty() &&
226 !_isExtensionSubtags(key
, value_str
.data(), value_str
.length())) {
227 status_
= U_ILLEGAL_ARGUMENT_ERROR
;
230 if (extensions_
== nullptr) {
231 extensions_
= new Locale();
232 if (extensions_
== nullptr) {
233 status_
= U_MEMORY_ALLOCATION_ERROR
;
237 if (uprv_tolower(key
) != 'u') {
238 // for t, x and others extension.
239 extensions_
->setKeywordValue(StringPiece(&key
, 1), value_str
.data(),
243 _clearUAttributesAndKeyType(*extensions_
, status_
);
244 if (U_FAILURE(status_
)) { return *this; }
245 if (!value
.empty()) {
246 _setUnicodeExtensions(*extensions_
, value_str
, status_
);
251 LocaleBuilder
& LocaleBuilder::setUnicodeLocaleKeyword(
252 StringPiece key
, StringPiece type
)
254 if (U_FAILURE(status_
)) { return *this; }
255 if (!ultag_isUnicodeLocaleKey(key
.data(), key
.length()) ||
257 !ultag_isUnicodeLocaleType(type
.data(), type
.length()))) {
258 status_
= U_ILLEGAL_ARGUMENT_ERROR
;
261 if (extensions_
== nullptr) {
262 extensions_
= new Locale();
264 if (extensions_
== nullptr) {
265 status_
= U_MEMORY_ALLOCATION_ERROR
;
268 extensions_
->setUnicodeKeywordValue(key
, type
, status_
);
272 LocaleBuilder
& LocaleBuilder::addUnicodeLocaleAttribute(
275 CharString
value_str(value
, status_
);
276 if (U_FAILURE(status_
)) { return *this; }
277 transform(value_str
.data(), value_str
.length());
278 if (!ultag_isUnicodeLocaleAttribute(value_str
.data(), value_str
.length())) {
279 status_
= U_ILLEGAL_ARGUMENT_ERROR
;
282 if (extensions_
== nullptr) {
283 extensions_
= new Locale();
284 if (extensions_
== nullptr) {
285 status_
= U_MEMORY_ALLOCATION_ERROR
;
288 extensions_
->setKeywordValue(kAttributeKey
, value_str
.data(), status_
);
292 CharString attributes
;
293 CharStringByteSink
sink(&attributes
);
294 UErrorCode localErrorCode
= U_ZERO_ERROR
;
295 extensions_
->getKeywordValue(kAttributeKey
, sink
, localErrorCode
);
296 if (U_FAILURE(localErrorCode
)) {
297 CharString
new_attributes(value_str
.data(), status_
);
298 // No attributes, set the attribute.
299 extensions_
->setKeywordValue(kAttributeKey
, new_attributes
.data(), status_
);
303 transform(attributes
.data(),attributes
.length());
304 const char* start
= attributes
.data();
305 const char* limit
= attributes
.data() + attributes
.length();
306 CharString new_attributes
;
307 bool inserted
= false;
308 while (start
< limit
) {
310 int cmp
= uprv_strcmp(start
, value_str
.data());
311 if (cmp
== 0) { return *this; } // Found it in attributes: Just return
313 if (!new_attributes
.isEmpty()) new_attributes
.append('_', status_
);
314 new_attributes
.append(value_str
.data(), status_
);
318 if (!new_attributes
.isEmpty()) {
319 new_attributes
.append('_', status_
);
321 new_attributes
.append(start
, status_
);
322 start
+= uprv_strlen(start
) + 1;
325 if (!new_attributes
.isEmpty()) {
326 new_attributes
.append('_', status_
);
328 new_attributes
.append(value_str
.data(), status_
);
330 // Not yet in the attributes, set the attribute.
331 extensions_
->setKeywordValue(kAttributeKey
, new_attributes
.data(), status_
);
335 LocaleBuilder
& LocaleBuilder::removeUnicodeLocaleAttribute(
338 CharString
value_str(value
, status_
);
339 if (U_FAILURE(status_
)) { return *this; }
340 transform(value_str
.data(), value_str
.length());
341 if (!ultag_isUnicodeLocaleAttribute(value_str
.data(), value_str
.length())) {
342 status_
= U_ILLEGAL_ARGUMENT_ERROR
;
345 if (extensions_
== nullptr) { return *this; }
346 UErrorCode localErrorCode
= U_ZERO_ERROR
;
347 CharString attributes
;
348 CharStringByteSink
sink(&attributes
);
349 extensions_
->getKeywordValue(kAttributeKey
, sink
, localErrorCode
);
350 // get failure, just return
351 if (U_FAILURE(localErrorCode
)) { return *this; }
352 // Do not have any attributes, just return.
353 if (attributes
.isEmpty()) { return *this; }
355 char* p
= attributes
.data();
356 // Replace null terminiator in place for _ and - so later
357 // we can use uprv_strcmp to compare.
358 for (int32_t i
= 0; i
< attributes
.length(); i
++, p
++) {
359 *p
= (*p
== '_' || *p
== '-') ? '\0' : uprv_tolower(*p
);
362 const char* start
= attributes
.data();
363 const char* limit
= attributes
.data() + attributes
.length();
364 CharString new_attributes
;
366 while (start
< limit
) {
367 if (uprv_strcmp(start
, value_str
.data()) == 0) {
370 if (!new_attributes
.isEmpty()) {
371 new_attributes
.append('_', status_
);
373 new_attributes
.append(start
, status_
);
375 start
+= uprv_strlen(start
) + 1;
377 // Found the value in attributes, set the attribute.
379 extensions_
->setKeywordValue(kAttributeKey
, new_attributes
.data(), status_
);
384 LocaleBuilder
& LocaleBuilder::clear()
386 status_
= U_ZERO_ERROR
;
396 LocaleBuilder
& LocaleBuilder::clearExtensions()
399 extensions_
= nullptr;
403 Locale
makeBogusLocale() {
409 void LocaleBuilder::copyExtensionsFrom(const Locale
& src
, UErrorCode
& errorCode
)
411 if (U_FAILURE(errorCode
)) { return; }
412 LocalPointer
<icu::StringEnumeration
> keywords(src
.createKeywords(errorCode
));
413 if (U_FAILURE(errorCode
) || keywords
.isNull() || keywords
->count(errorCode
) == 0) {
414 // Error, or no extensions to copy.
417 if (extensions_
== nullptr) {
418 extensions_
= new Locale();
419 if (extensions_
== nullptr) {
420 status_
= U_MEMORY_ALLOCATION_ERROR
;
424 _copyExtensions(src
, keywords
.getAlias(), *extensions_
, false, errorCode
);
427 Locale
LocaleBuilder::build(UErrorCode
& errorCode
)
429 if (U_FAILURE(errorCode
)) {
430 return makeBogusLocale();
432 if (U_FAILURE(status_
)) {
434 return makeBogusLocale();
436 CharString
locale_str(language_
, errorCode
);
437 if (uprv_strlen(script_
) > 0) {
438 locale_str
.append('-', errorCode
).append(StringPiece(script_
), errorCode
);
440 if (uprv_strlen(region_
) > 0) {
441 locale_str
.append('-', errorCode
).append(StringPiece(region_
), errorCode
);
443 if (variant_
!= nullptr) {
444 locale_str
.append('-', errorCode
).append(StringPiece(variant_
->data()), errorCode
);
446 if (U_FAILURE(errorCode
)) {
447 return makeBogusLocale();
449 Locale
product(locale_str
.data());
450 if (extensions_
!= nullptr) {
451 _copyExtensions(*extensions_
, nullptr, product
, true, errorCode
);
453 if (U_FAILURE(errorCode
)) {
454 return makeBogusLocale();
459 UBool
LocaleBuilder::copyErrorTo(UErrorCode
&outErrorCode
) const {
460 if (U_FAILURE(outErrorCode
)) {
461 // Do not overwrite the older error code
464 outErrorCode
= status_
;
465 return U_FAILURE(outErrorCode
);