1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
6 #include "bytesinkutil.h" // CharStringByteSink
10 #include "unicode/localebuilder.h"
11 #include "unicode/locid.h"
15 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
16 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
18 const char* kAttributeKey
= "attribute";
20 static bool _isExtensionSubtags(char key
, const char* s
, int32_t len
) {
21 switch (uprv_tolower(key
)) {
23 return ultag_isUnicodeExtensionSubtags(s
, len
);
25 return ultag_isTransformedExtensionSubtags(s
, len
);
27 return ultag_isPrivateuseValueSubtags(s
, len
);
29 return ultag_isExtensionSubtags(s
, len
);
33 LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR
), language_(),
34 script_(), region_(), variant_(nullptr), extensions_(nullptr)
41 LocaleBuilder::~LocaleBuilder()
47 LocaleBuilder
& LocaleBuilder::setLocale(const Locale
& locale
)
50 setLanguage(locale
.getLanguage());
51 setScript(locale
.getScript());
52 setRegion(locale
.getCountry());
53 setVariant(locale
.getVariant());
54 extensions_
= locale
.clone();
55 if (extensions_
== nullptr) {
56 status_
= U_MEMORY_ALLOCATION_ERROR
;
61 LocaleBuilder
& LocaleBuilder::setLanguageTag(StringPiece tag
)
63 Locale l
= Locale::forLanguageTag(tag
, status_
);
64 if (U_FAILURE(status_
)) { return *this; }
65 // Because setLocale will reset status_ we need to return
66 // first if we have error in forLanguageTag.
71 static void setField(StringPiece input
, char* dest
, UErrorCode
& errorCode
,
72 UBool (*test
)(const char*, int32_t)) {
73 if (U_FAILURE(errorCode
)) { return; }
76 } else if (test(input
.data(), input
.length())) {
77 uprv_memcpy(dest
, input
.data(), input
.length());
78 dest
[input
.length()] = '\0';
80 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
84 LocaleBuilder
& LocaleBuilder::setLanguage(StringPiece language
)
86 setField(language
, language_
, status_
, &ultag_isLanguageSubtag
);
90 LocaleBuilder
& LocaleBuilder::setScript(StringPiece script
)
92 setField(script
, script_
, status_
, &ultag_isScriptSubtag
);
96 LocaleBuilder
& LocaleBuilder::setRegion(StringPiece region
)
98 setField(region
, region_
, status_
, &ultag_isRegionSubtag
);
102 static void transform(char* data
, int32_t len
) {
103 for (int32_t i
= 0; i
< len
; i
++, data
++) {
107 *data
= uprv_tolower(*data
);
112 LocaleBuilder
& LocaleBuilder::setVariant(StringPiece variant
)
114 if (U_FAILURE(status_
)) { return *this; }
115 if (variant
.empty()) {
120 CharString
* new_variant
= new CharString(variant
, status_
);
121 if (U_FAILURE(status_
)) { return *this; }
122 if (new_variant
== nullptr) {
123 status_
= U_MEMORY_ALLOCATION_ERROR
;
126 transform(new_variant
->data(), new_variant
->length());
127 if (!ultag_isVariantSubtags(new_variant
->data(), new_variant
->length())) {
129 status_
= U_ILLEGAL_ARGUMENT_ERROR
;
133 variant_
= new_variant
;
138 _isKeywordValue(const char* key
, const char* value
, int32_t value_len
)
140 if (key
[1] == '\0') {
142 return (UPRV_ISALPHANUM(uprv_tolower(key
[0])) &&
143 _isExtensionSubtags(key
[0], value
, value_len
));
144 } else if (uprv_strcmp(key
, kAttributeKey
) == 0) {
145 // unicode attributes
146 return ultag_isUnicodeLocaleAttributes(value
, value_len
);
148 // otherwise: unicode extension value
149 // We need to convert from legacy key/value to unicode
151 const char* unicode_locale_key
= uloc_toUnicodeLocaleKey(key
);
152 const char* unicode_locale_type
= uloc_toUnicodeLocaleType(key
, value
);
154 return unicode_locale_key
&& unicode_locale_type
&&
155 ultag_isUnicodeLocaleKey(unicode_locale_key
, -1) &&
156 ultag_isUnicodeLocaleType(unicode_locale_type
, -1);
160 _copyExtensions(const Locale
& from
, Locale
* to
, bool validate
, UErrorCode
& errorCode
)
162 if (U_FAILURE(errorCode
)) { return; }
163 LocalPointer
<icu::StringEnumeration
> iter(from
.createKeywords(errorCode
));
164 if (U_FAILURE(errorCode
) || iter
.isNull()) { return; }
166 while ((key
= iter
->next(nullptr, errorCode
)) != nullptr) {
168 CharStringByteSink
sink(&value
);
169 from
.getKeywordValue(key
, sink
, errorCode
);
170 if (U_FAILURE(errorCode
)) { return; }
171 if (uprv_strcmp(key
, kAttributeKey
) == 0) {
172 transform(value
.data(), value
.length());
175 !_isKeywordValue(key
, value
.data(), value
.length())) {
176 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
179 to
->setKeywordValue(key
, value
.data(), errorCode
);
180 if (U_FAILURE(errorCode
)) { return; }
185 _clearUAttributesAndKeyType(Locale
* locale
, UErrorCode
& errorCode
)
187 // Clear Unicode attributes
188 locale
->setKeywordValue(kAttributeKey
, "", errorCode
);
190 // Clear all Unicode keyword values
191 LocalPointer
<icu::StringEnumeration
> iter(locale
->createUnicodeKeywords(errorCode
));
192 if (U_FAILURE(errorCode
) || iter
.isNull()) { return; }
194 while ((key
= iter
->next(nullptr, errorCode
)) != nullptr) {
195 locale
->setUnicodeKeywordValue(key
, nullptr, errorCode
);
200 _setUnicodeExtensions(Locale
* locale
, const CharString
& value
, UErrorCode
& errorCode
)
202 // Add the unicode extensions to extensions_
203 CharString
locale_str("und-u-", errorCode
);
204 locale_str
.append(value
, errorCode
);
206 Locale::forLanguageTag(locale_str
.data(), errorCode
),
207 locale
, false, errorCode
);
210 LocaleBuilder
& LocaleBuilder::setExtension(char key
, StringPiece value
)
212 if (U_FAILURE(status_
)) { return *this; }
213 if (!UPRV_ISALPHANUM(key
)) {
214 status_
= U_ILLEGAL_ARGUMENT_ERROR
;
217 CharString
value_str(value
, status_
);
218 if (U_FAILURE(status_
)) { return *this; }
219 transform(value_str
.data(), value_str
.length());
220 if (!value_str
.isEmpty() &&
221 !_isExtensionSubtags(key
, value_str
.data(), value_str
.length())) {
222 status_
= U_ILLEGAL_ARGUMENT_ERROR
;
225 if (extensions_
== nullptr) {
226 extensions_
= new Locale();
227 if (extensions_
== nullptr) {
228 status_
= U_MEMORY_ALLOCATION_ERROR
;
232 if (uprv_tolower(key
) != 'u') {
233 // for t, x and others extension.
234 extensions_
->setKeywordValue(StringPiece(&key
, 1), value_str
.data(),
238 _clearUAttributesAndKeyType(extensions_
, status_
);
239 if (U_FAILURE(status_
)) { return *this; }
240 if (!value
.empty()) {
241 _setUnicodeExtensions(extensions_
, value_str
, status_
);
246 LocaleBuilder
& LocaleBuilder::setUnicodeLocaleKeyword(
247 StringPiece key
, StringPiece type
)
249 if (U_FAILURE(status_
)) { return *this; }
250 if (!ultag_isUnicodeLocaleKey(key
.data(), key
.length()) ||
252 !ultag_isUnicodeLocaleType(type
.data(), type
.length()))) {
253 status_
= U_ILLEGAL_ARGUMENT_ERROR
;
256 if (extensions_
== nullptr) {
257 extensions_
= new Locale();
259 if (extensions_
== nullptr) {
260 status_
= U_MEMORY_ALLOCATION_ERROR
;
263 extensions_
->setUnicodeKeywordValue(key
, type
, status_
);
267 LocaleBuilder
& LocaleBuilder::addUnicodeLocaleAttribute(
270 CharString
value_str(value
, status_
);
271 if (U_FAILURE(status_
)) { return *this; }
272 transform(value_str
.data(), value_str
.length());
273 if (!ultag_isUnicodeLocaleAttribute(value_str
.data(), value_str
.length())) {
274 status_
= U_ILLEGAL_ARGUMENT_ERROR
;
277 if (extensions_
== nullptr) {
278 extensions_
= new Locale();
279 if (extensions_
== nullptr) {
280 status_
= U_MEMORY_ALLOCATION_ERROR
;
283 extensions_
->setKeywordValue(kAttributeKey
, value_str
.data(), status_
);
287 CharString attributes
;
288 CharStringByteSink
sink(&attributes
);
289 UErrorCode localErrorCode
= U_ZERO_ERROR
;
290 extensions_
->getKeywordValue(kAttributeKey
, sink
, localErrorCode
);
291 if (U_FAILURE(localErrorCode
)) {
292 CharString
new_attributes(value_str
.data(), status_
);
293 // No attributes, set the attribute.
294 extensions_
->setKeywordValue(kAttributeKey
, new_attributes
.data(), status_
);
298 transform(attributes
.data(),attributes
.length());
299 const char* start
= attributes
.data();
300 const char* limit
= attributes
.data() + attributes
.length();
301 CharString new_attributes
;
302 bool inserted
= false;
303 while (start
< limit
) {
305 int cmp
= uprv_strcmp(start
, value_str
.data());
306 if (cmp
== 0) { return *this; } // Found it in attributes: Just return
308 if (!new_attributes
.isEmpty()) new_attributes
.append('_', status_
);
309 new_attributes
.append(value_str
.data(), status_
);
313 if (!new_attributes
.isEmpty()) {
314 new_attributes
.append('_', status_
);
316 new_attributes
.append(start
, status_
);
317 start
+= uprv_strlen(start
) + 1;
320 if (!new_attributes
.isEmpty()) {
321 new_attributes
.append('_', status_
);
323 new_attributes
.append(value_str
.data(), status_
);
325 // Not yet in the attributes, set the attribute.
326 extensions_
->setKeywordValue(kAttributeKey
, new_attributes
.data(), status_
);
330 LocaleBuilder
& LocaleBuilder::removeUnicodeLocaleAttribute(
333 CharString
value_str(value
, status_
);
334 if (U_FAILURE(status_
)) { return *this; }
335 transform(value_str
.data(), value_str
.length());
336 if (!ultag_isUnicodeLocaleAttribute(value_str
.data(), value_str
.length())) {
337 status_
= U_ILLEGAL_ARGUMENT_ERROR
;
340 if (extensions_
== nullptr) { return *this; }
341 UErrorCode localErrorCode
= U_ZERO_ERROR
;
342 CharString attributes
;
343 CharStringByteSink
sink(&attributes
);
344 extensions_
->getKeywordValue(kAttributeKey
, sink
, localErrorCode
);
345 // get failure, just return
346 if (U_FAILURE(localErrorCode
)) { return *this; }
347 // Do not have any attributes, just return.
348 if (attributes
.isEmpty()) { return *this; }
350 char* p
= attributes
.data();
351 // Replace null terminiator in place for _ and - so later
352 // we can use uprv_strcmp to compare.
353 for (int32_t i
= 0; i
< attributes
.length(); i
++, p
++) {
354 *p
= (*p
== '_' || *p
== '-') ? '\0' : uprv_tolower(*p
);
357 const char* start
= attributes
.data();
358 const char* limit
= attributes
.data() + attributes
.length();
359 CharString new_attributes
;
361 while (start
< limit
) {
362 if (uprv_strcmp(start
, value_str
.data()) == 0) {
365 if (!new_attributes
.isEmpty()) {
366 new_attributes
.append('_', status_
);
368 new_attributes
.append(start
, status_
);
370 start
+= uprv_strlen(start
) + 1;
372 // Found the value in attributes, set the attribute.
374 extensions_
->setKeywordValue(kAttributeKey
, new_attributes
.data(), status_
);
379 LocaleBuilder
& LocaleBuilder::clear()
381 status_
= U_ZERO_ERROR
;
391 LocaleBuilder
& LocaleBuilder::clearExtensions()
394 extensions_
= nullptr;
398 Locale
makeBogusLocale() {
404 Locale
LocaleBuilder::build(UErrorCode
& errorCode
)
406 if (U_FAILURE(errorCode
)) {
407 return makeBogusLocale();
409 if (U_FAILURE(status_
)) {
411 return makeBogusLocale();
413 CharString
locale_str(language_
, errorCode
);
414 if (uprv_strlen(script_
) > 0) {
415 locale_str
.append('-', errorCode
).append(StringPiece(script_
), errorCode
);
417 if (uprv_strlen(region_
) > 0) {
418 locale_str
.append('-', errorCode
).append(StringPiece(region_
), errorCode
);
420 if (variant_
!= nullptr) {
421 locale_str
.append('-', errorCode
).append(StringPiece(variant_
->data()), errorCode
);
423 if (U_FAILURE(errorCode
)) {
424 return makeBogusLocale();
426 Locale
product(locale_str
.data());
427 if (extensions_
!= nullptr) {
428 _copyExtensions(*extensions_
, &product
, true, errorCode
);
430 if (U_FAILURE(errorCode
)) {
431 return makeBogusLocale();