]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
729e4ab9 A |
3 | /* |
4 | ********************************************************************** | |
2ca993e8 | 5 | * Copyright (C) 2009-2015, International Business Machines |
729e4ab9 A |
6 | * Corporation and others. All Rights Reserved. |
7 | ********************************************************************** | |
8 | */ | |
9 | ||
10 | #include "unicode/utypes.h" | |
11 | #include "unicode/ures.h" | |
12 | #include "unicode/putil.h" | |
13 | #include "unicode/uloc.h" | |
14 | #include "ustr_imp.h" | |
15 | #include "cmemory.h" | |
16 | #include "cstring.h" | |
17 | #include "putilimp.h" | |
18 | #include "uinvchar.h" | |
19 | #include "ulocimp.h" | |
51004dcb A |
20 | #include "uassert.h" |
21 | ||
f3c0d7a5 | 22 | |
729e4ab9 A |
23 | /* struct holding a single variant */ |
24 | typedef struct VariantListEntry { | |
25 | const char *variant; | |
26 | struct VariantListEntry *next; | |
27 | } VariantListEntry; | |
28 | ||
4388f060 A |
29 | /* struct holding a single attribute value */ |
30 | typedef struct AttributeListEntry { | |
31 | const char *attribute; | |
32 | struct AttributeListEntry *next; | |
33 | } AttributeListEntry; | |
34 | ||
729e4ab9 A |
35 | /* struct holding a single extension */ |
36 | typedef struct ExtensionListEntry { | |
37 | const char *key; | |
38 | const char *value; | |
39 | struct ExtensionListEntry *next; | |
40 | } ExtensionListEntry; | |
41 | ||
42 | #define MAXEXTLANG 3 | |
43 | typedef struct ULanguageTag { | |
44 | char *buf; /* holding parsed subtags */ | |
45 | const char *language; | |
46 | const char *extlang[MAXEXTLANG]; | |
47 | const char *script; | |
48 | const char *region; | |
49 | VariantListEntry *variants; | |
50 | ExtensionListEntry *extensions; | |
51 | const char *privateuse; | |
52 | const char *grandfathered; | |
53 | } ULanguageTag; | |
54 | ||
55 | #define MINLEN 2 | |
56 | #define SEP '-' | |
57 | #define PRIVATEUSE 'x' | |
58 | #define LDMLEXT 'u' | |
59 | ||
60 | #define LOCALE_SEP '_' | |
61 | #define LOCALE_EXT_SEP '@' | |
62 | #define LOCALE_KEYWORD_SEP ';' | |
63 | #define LOCALE_KEY_TYPE_SEP '=' | |
64 | ||
4388f060 | 65 | #define ISALPHA(c) uprv_isASCIILetter(c) |
729e4ab9 A |
66 | #define ISNUMERIC(c) ((c)>='0' && (c)<='9') |
67 | ||
51004dcb A |
68 | static const char EMPTY[] = ""; |
69 | static const char LANG_UND[] = "und"; | |
70 | static const char PRIVATEUSE_KEY[] = "x"; | |
71 | static const char _POSIX[] = "_POSIX"; | |
72 | static const char POSIX_KEY[] = "va"; | |
73 | static const char POSIX_VALUE[] = "posix"; | |
74 | static const char LOCALE_ATTRIBUTE_KEY[] = "attribute"; | |
75 | static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant"; | |
76 | static const char LOCALE_TYPE_YES[] = "yes"; | |
729e4ab9 A |
77 | |
78 | #define LANG_UND_LEN 3 | |
79 | ||
51004dcb | 80 | static const char* const GRANDFATHERED[] = { |
729e4ab9 A |
81 | /* grandfathered preferred */ |
82 | "art-lojban", "jbo", | |
4388f060 A |
83 | "cel-gaulish", "xtg-x-cel-gaulish", |
84 | "en-GB-oed", "en-GB-x-oed", | |
729e4ab9 A |
85 | "i-ami", "ami", |
86 | "i-bnn", "bnn", | |
4388f060 A |
87 | "i-default", "en-x-i-default", |
88 | "i-enochian", "und-x-i-enochian", | |
729e4ab9 A |
89 | "i-hak", "hak", |
90 | "i-klingon", "tlh", | |
91 | "i-lux", "lb", | |
4388f060 | 92 | "i-mingo", "see-x-i-mingo", |
729e4ab9 A |
93 | "i-navajo", "nv", |
94 | "i-pwn", "pwn", | |
95 | "i-tao", "tao", | |
96 | "i-tay", "tay", | |
97 | "i-tsu", "tsu", | |
98 | "no-bok", "nb", | |
99 | "no-nyn", "nn", | |
100 | "sgn-be-fr", "sfb", | |
101 | "sgn-be-nl", "vgt", | |
102 | "sgn-ch-de", "sgg", | |
103 | "zh-guoyu", "cmn", | |
104 | "zh-hakka", "hak", | |
4388f060 | 105 | "zh-min", "nan-x-zh-min", |
729e4ab9 A |
106 | "zh-min-nan", "nan", |
107 | "zh-xiang", "hsn", | |
108 | NULL, NULL | |
109 | }; | |
110 | ||
51004dcb | 111 | static const char DEPRECATEDLANGS[][4] = { |
729e4ab9 A |
112 | /* deprecated new */ |
113 | "iw", "he", | |
114 | "ji", "yi", | |
51004dcb | 115 | "in", "id" |
729e4ab9 A |
116 | }; |
117 | ||
118 | /* | |
119 | * ------------------------------------------------- | |
120 | * | |
121 | * These ultag_ functions may be exposed as APIs later | |
122 | * | |
123 | * ------------------------------------------------- | |
124 | */ | |
125 | ||
126 | static ULanguageTag* | |
127 | ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); | |
128 | ||
129 | static void | |
130 | ultag_close(ULanguageTag* langtag); | |
131 | ||
132 | static const char* | |
133 | ultag_getLanguage(const ULanguageTag* langtag); | |
134 | ||
135 | #if 0 | |
136 | static const char* | |
137 | ultag_getJDKLanguage(const ULanguageTag* langtag); | |
138 | #endif | |
139 | ||
140 | static const char* | |
141 | ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); | |
142 | ||
143 | static int32_t | |
144 | ultag_getExtlangSize(const ULanguageTag* langtag); | |
145 | ||
146 | static const char* | |
147 | ultag_getScript(const ULanguageTag* langtag); | |
148 | ||
149 | static const char* | |
150 | ultag_getRegion(const ULanguageTag* langtag); | |
151 | ||
152 | static const char* | |
153 | ultag_getVariant(const ULanguageTag* langtag, int32_t idx); | |
154 | ||
155 | static int32_t | |
156 | ultag_getVariantsSize(const ULanguageTag* langtag); | |
157 | ||
158 | static const char* | |
159 | ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); | |
160 | ||
161 | static const char* | |
162 | ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); | |
163 | ||
164 | static int32_t | |
165 | ultag_getExtensionsSize(const ULanguageTag* langtag); | |
166 | ||
167 | static const char* | |
168 | ultag_getPrivateUse(const ULanguageTag* langtag); | |
169 | ||
170 | #if 0 | |
171 | static const char* | |
172 | ultag_getGrandfathered(const ULanguageTag* langtag); | |
173 | #endif | |
174 | ||
175 | /* | |
176 | * ------------------------------------------------- | |
177 | * | |
178 | * Language subtag syntax validation functions | |
179 | * | |
180 | * ------------------------------------------------- | |
181 | */ | |
182 | ||
183 | static UBool | |
184 | _isAlphaString(const char* s, int32_t len) { | |
185 | int32_t i; | |
186 | for (i = 0; i < len; i++) { | |
187 | if (!ISALPHA(*(s + i))) { | |
188 | return FALSE; | |
189 | } | |
190 | } | |
191 | return TRUE; | |
192 | } | |
193 | ||
194 | static UBool | |
195 | _isNumericString(const char* s, int32_t len) { | |
196 | int32_t i; | |
197 | for (i = 0; i < len; i++) { | |
198 | if (!ISNUMERIC(*(s + i))) { | |
199 | return FALSE; | |
200 | } | |
201 | } | |
202 | return TRUE; | |
203 | } | |
204 | ||
205 | static UBool | |
206 | _isAlphaNumericString(const char* s, int32_t len) { | |
207 | int32_t i; | |
208 | for (i = 0; i < len; i++) { | |
209 | if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { | |
210 | return FALSE; | |
211 | } | |
212 | } | |
213 | return TRUE; | |
214 | } | |
215 | ||
216 | static UBool | |
217 | _isLanguageSubtag(const char* s, int32_t len) { | |
218 | /* | |
219 | * language = 2*3ALPHA ; shortest ISO 639 code | |
220 | * ["-" extlang] ; sometimes followed by | |
221 | * ; extended language subtags | |
222 | * / 4ALPHA ; or reserved for future use | |
223 | * / 5*8ALPHA ; or registered language subtag | |
224 | */ | |
225 | if (len < 0) { | |
226 | len = (int32_t)uprv_strlen(s); | |
227 | } | |
228 | if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { | |
229 | return TRUE; | |
230 | } | |
231 | return FALSE; | |
232 | } | |
233 | ||
234 | static UBool | |
235 | _isExtlangSubtag(const char* s, int32_t len) { | |
236 | /* | |
237 | * extlang = 3ALPHA ; selected ISO 639 codes | |
238 | * *2("-" 3ALPHA) ; permanently reserved | |
239 | */ | |
240 | if (len < 0) { | |
241 | len = (int32_t)uprv_strlen(s); | |
242 | } | |
243 | if (len == 3 && _isAlphaString(s, len)) { | |
244 | return TRUE; | |
245 | } | |
246 | return FALSE; | |
247 | } | |
248 | ||
249 | static UBool | |
250 | _isScriptSubtag(const char* s, int32_t len) { | |
251 | /* | |
252 | * script = 4ALPHA ; ISO 15924 code | |
253 | */ | |
254 | if (len < 0) { | |
255 | len = (int32_t)uprv_strlen(s); | |
256 | } | |
257 | if (len == 4 && _isAlphaString(s, len)) { | |
258 | return TRUE; | |
259 | } | |
260 | return FALSE; | |
261 | } | |
262 | ||
263 | static UBool | |
264 | _isRegionSubtag(const char* s, int32_t len) { | |
265 | /* | |
266 | * region = 2ALPHA ; ISO 3166-1 code | |
267 | * / 3DIGIT ; UN M.49 code | |
268 | */ | |
269 | if (len < 0) { | |
270 | len = (int32_t)uprv_strlen(s); | |
271 | } | |
272 | if (len == 2 && _isAlphaString(s, len)) { | |
273 | return TRUE; | |
274 | } | |
275 | if (len == 3 && _isNumericString(s, len)) { | |
276 | return TRUE; | |
277 | } | |
278 | return FALSE; | |
279 | } | |
280 | ||
281 | static UBool | |
282 | _isVariantSubtag(const char* s, int32_t len) { | |
283 | /* | |
284 | * variant = 5*8alphanum ; registered variants | |
285 | * / (DIGIT 3alphanum) | |
286 | */ | |
287 | if (len < 0) { | |
288 | len = (int32_t)uprv_strlen(s); | |
289 | } | |
4388f060 | 290 | if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) { |
729e4ab9 A |
291 | return TRUE; |
292 | } | |
293 | if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { | |
294 | return TRUE; | |
295 | } | |
296 | return FALSE; | |
297 | } | |
298 | ||
4388f060 A |
299 | static UBool |
300 | _isPrivateuseVariantSubtag(const char* s, int32_t len) { | |
301 | /* | |
302 | * variant = 1*8alphanum ; registered variants | |
303 | * / (DIGIT 3alphanum) | |
304 | */ | |
305 | if (len < 0) { | |
306 | len = (int32_t)uprv_strlen(s); | |
307 | } | |
308 | if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { | |
309 | return TRUE; | |
310 | } | |
311 | return FALSE; | |
312 | } | |
313 | ||
729e4ab9 A |
314 | static UBool |
315 | _isExtensionSingleton(const char* s, int32_t len) { | |
316 | /* | |
317 | * extension = singleton 1*("-" (2*8alphanum)) | |
318 | */ | |
319 | if (len < 0) { | |
320 | len = (int32_t)uprv_strlen(s); | |
321 | } | |
322 | if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { | |
323 | return TRUE; | |
324 | } | |
325 | return FALSE; | |
326 | } | |
327 | ||
328 | static UBool | |
329 | _isExtensionSubtag(const char* s, int32_t len) { | |
330 | /* | |
331 | * extension = singleton 1*("-" (2*8alphanum)) | |
332 | */ | |
333 | if (len < 0) { | |
334 | len = (int32_t)uprv_strlen(s); | |
335 | } | |
336 | if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { | |
337 | return TRUE; | |
338 | } | |
339 | return FALSE; | |
340 | } | |
341 | ||
342 | static UBool | |
343 | _isExtensionSubtags(const char* s, int32_t len) { | |
344 | const char *p = s; | |
345 | const char *pSubtag = NULL; | |
346 | ||
347 | if (len < 0) { | |
348 | len = (int32_t)uprv_strlen(s); | |
349 | } | |
350 | ||
351 | while ((p - s) < len) { | |
352 | if (*p == SEP) { | |
353 | if (pSubtag == NULL) { | |
354 | return FALSE; | |
355 | } | |
356 | if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { | |
357 | return FALSE; | |
358 | } | |
359 | pSubtag = NULL; | |
360 | } else if (pSubtag == NULL) { | |
361 | pSubtag = p; | |
362 | } | |
363 | p++; | |
364 | } | |
365 | if (pSubtag == NULL) { | |
366 | return FALSE; | |
367 | } | |
368 | return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); | |
369 | } | |
370 | ||
371 | static UBool | |
372 | _isPrivateuseValueSubtag(const char* s, int32_t len) { | |
373 | /* | |
374 | * privateuse = "x" 1*("-" (1*8alphanum)) | |
375 | */ | |
376 | if (len < 0) { | |
377 | len = (int32_t)uprv_strlen(s); | |
378 | } | |
379 | if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { | |
380 | return TRUE; | |
381 | } | |
382 | return FALSE; | |
383 | } | |
384 | ||
385 | static UBool | |
386 | _isPrivateuseValueSubtags(const char* s, int32_t len) { | |
387 | const char *p = s; | |
388 | const char *pSubtag = NULL; | |
389 | ||
390 | if (len < 0) { | |
391 | len = (int32_t)uprv_strlen(s); | |
392 | } | |
393 | ||
394 | while ((p - s) < len) { | |
395 | if (*p == SEP) { | |
396 | if (pSubtag == NULL) { | |
397 | return FALSE; | |
398 | } | |
399 | if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { | |
400 | return FALSE; | |
401 | } | |
402 | pSubtag = NULL; | |
403 | } else if (pSubtag == NULL) { | |
404 | pSubtag = p; | |
405 | } | |
406 | p++; | |
407 | } | |
408 | if (pSubtag == NULL) { | |
409 | return FALSE; | |
410 | } | |
411 | return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); | |
412 | } | |
413 | ||
b331163b A |
414 | U_CFUNC UBool |
415 | ultag_isUnicodeLocaleKey(const char* s, int32_t len) { | |
729e4ab9 A |
416 | if (len < 0) { |
417 | len = (int32_t)uprv_strlen(s); | |
418 | } | |
419 | if (len == 2 && _isAlphaNumericString(s, len)) { | |
420 | return TRUE; | |
421 | } | |
422 | return FALSE; | |
423 | } | |
424 | ||
b331163b A |
425 | U_CFUNC UBool |
426 | ultag_isUnicodeLocaleType(const char*s, int32_t len) { | |
427 | const char* p; | |
428 | int32_t subtagLen = 0; | |
429 | ||
729e4ab9 A |
430 | if (len < 0) { |
431 | len = (int32_t)uprv_strlen(s); | |
432 | } | |
b331163b A |
433 | |
434 | for (p = s; len > 0; p++, len--) { | |
435 | if (*p == SEP) { | |
436 | if (subtagLen < 3) { | |
437 | return FALSE; | |
438 | } | |
439 | subtagLen = 0; | |
440 | } else if (ISALPHA(*p) || ISNUMERIC(*p)) { | |
441 | subtagLen++; | |
442 | if (subtagLen > 8) { | |
443 | return FALSE; | |
444 | } | |
445 | } else { | |
446 | return FALSE; | |
447 | } | |
729e4ab9 | 448 | } |
729e4ab9 | 449 | |
b331163b A |
450 | return (subtagLen >= 3); |
451 | } | |
729e4ab9 A |
452 | /* |
453 | * ------------------------------------------------- | |
454 | * | |
455 | * Helper functions | |
456 | * | |
457 | * ------------------------------------------------- | |
458 | */ | |
459 | ||
460 | static UBool | |
461 | _addVariantToList(VariantListEntry **first, VariantListEntry *var) { | |
462 | UBool bAdded = TRUE; | |
463 | ||
464 | if (*first == NULL) { | |
465 | var->next = NULL; | |
466 | *first = var; | |
467 | } else { | |
468 | VariantListEntry *prev, *cur; | |
469 | int32_t cmp; | |
470 | ||
4388f060 | 471 | /* variants order should be preserved */ |
729e4ab9 A |
472 | prev = NULL; |
473 | cur = *first; | |
474 | while (TRUE) { | |
475 | if (cur == NULL) { | |
476 | prev->next = var; | |
477 | var->next = NULL; | |
478 | break; | |
479 | } | |
4388f060 A |
480 | |
481 | /* Checking for duplicate variant */ | |
729e4ab9 | 482 | cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); |
4388f060 A |
483 | if (cmp == 0) { |
484 | /* duplicated variant */ | |
485 | bAdded = FALSE; | |
486 | break; | |
487 | } | |
488 | prev = cur; | |
489 | cur = cur->next; | |
490 | } | |
491 | } | |
492 | ||
493 | return bAdded; | |
494 | } | |
495 | ||
496 | static UBool | |
497 | _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { | |
498 | UBool bAdded = TRUE; | |
499 | ||
500 | if (*first == NULL) { | |
501 | attr->next = NULL; | |
502 | *first = attr; | |
503 | } else { | |
504 | AttributeListEntry *prev, *cur; | |
505 | int32_t cmp; | |
506 | ||
507 | /* reorder variants in alphabetical order */ | |
508 | prev = NULL; | |
509 | cur = *first; | |
510 | while (TRUE) { | |
511 | if (cur == NULL) { | |
512 | prev->next = attr; | |
513 | attr->next = NULL; | |
514 | break; | |
515 | } | |
516 | cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); | |
729e4ab9 A |
517 | if (cmp < 0) { |
518 | if (prev == NULL) { | |
4388f060 | 519 | *first = attr; |
729e4ab9 | 520 | } else { |
4388f060 | 521 | prev->next = attr; |
729e4ab9 | 522 | } |
4388f060 | 523 | attr->next = cur; |
729e4ab9 A |
524 | break; |
525 | } | |
526 | if (cmp == 0) { | |
527 | /* duplicated variant */ | |
528 | bAdded = FALSE; | |
529 | break; | |
530 | } | |
531 | prev = cur; | |
532 | cur = cur->next; | |
533 | } | |
534 | } | |
535 | ||
536 | return bAdded; | |
537 | } | |
538 | ||
539 | ||
540 | static UBool | |
541 | _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { | |
542 | UBool bAdded = TRUE; | |
543 | ||
544 | if (*first == NULL) { | |
545 | ext->next = NULL; | |
546 | *first = ext; | |
547 | } else { | |
548 | ExtensionListEntry *prev, *cur; | |
549 | int32_t cmp; | |
550 | ||
551 | /* reorder variants in alphabetical order */ | |
552 | prev = NULL; | |
553 | cur = *first; | |
554 | while (TRUE) { | |
555 | if (cur == NULL) { | |
556 | prev->next = ext; | |
557 | ext->next = NULL; | |
558 | break; | |
559 | } | |
560 | if (localeToBCP) { | |
561 | /* special handling for locale to bcp conversion */ | |
562 | int32_t len, curlen; | |
563 | ||
564 | len = (int32_t)uprv_strlen(ext->key); | |
565 | curlen = (int32_t)uprv_strlen(cur->key); | |
566 | ||
567 | if (len == 1 && curlen == 1) { | |
568 | if (*(ext->key) == *(cur->key)) { | |
569 | cmp = 0; | |
570 | } else if (*(ext->key) == PRIVATEUSE) { | |
571 | cmp = 1; | |
572 | } else if (*(cur->key) == PRIVATEUSE) { | |
573 | cmp = -1; | |
574 | } else { | |
575 | cmp = *(ext->key) - *(cur->key); | |
576 | } | |
577 | } else if (len == 1) { | |
578 | cmp = *(ext->key) - LDMLEXT; | |
579 | } else if (curlen == 1) { | |
580 | cmp = LDMLEXT - *(cur->key); | |
581 | } else { | |
582 | cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); | |
f3c0d7a5 A |
583 | /* Both are u extension keys - we need special handling for 'attribute' */ |
584 | if (cmp != 0) { | |
585 | if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) { | |
586 | cmp = 1; | |
587 | } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { | |
588 | cmp = -1; | |
589 | } | |
590 | } | |
729e4ab9 A |
591 | } |
592 | } else { | |
593 | cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); | |
594 | } | |
595 | if (cmp < 0) { | |
596 | if (prev == NULL) { | |
597 | *first = ext; | |
598 | } else { | |
599 | prev->next = ext; | |
600 | } | |
601 | ext->next = cur; | |
602 | break; | |
603 | } | |
604 | if (cmp == 0) { | |
605 | /* duplicated extension key */ | |
606 | bAdded = FALSE; | |
607 | break; | |
608 | } | |
609 | prev = cur; | |
610 | cur = cur->next; | |
611 | } | |
612 | } | |
613 | ||
614 | return bAdded; | |
615 | } | |
616 | ||
617 | static void | |
618 | _initializeULanguageTag(ULanguageTag* langtag) { | |
619 | int32_t i; | |
620 | ||
621 | langtag->buf = NULL; | |
622 | ||
623 | langtag->language = EMPTY; | |
624 | for (i = 0; i < MAXEXTLANG; i++) { | |
625 | langtag->extlang[i] = NULL; | |
626 | } | |
627 | ||
628 | langtag->script = EMPTY; | |
629 | langtag->region = EMPTY; | |
630 | ||
631 | langtag->variants = NULL; | |
632 | langtag->extensions = NULL; | |
633 | ||
634 | langtag->grandfathered = EMPTY; | |
635 | langtag->privateuse = EMPTY; | |
636 | } | |
637 | ||
729e4ab9 A |
638 | static int32_t |
639 | _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { | |
640 | char buf[ULOC_LANG_CAPACITY]; | |
641 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
642 | int32_t len, i; | |
643 | int32_t reslen = 0; | |
644 | ||
645 | if (U_FAILURE(*status)) { | |
646 | return 0; | |
647 | } | |
648 | ||
649 | len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); | |
650 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { | |
651 | if (strict) { | |
652 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
653 | return 0; | |
654 | } | |
655 | len = 0; | |
656 | } | |
657 | ||
658 | /* Note: returned language code is in lower case letters */ | |
659 | ||
660 | if (len == 0) { | |
661 | if (reslen < capacity) { | |
662 | uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); | |
663 | } | |
664 | reslen += LANG_UND_LEN; | |
665 | } else if (!_isLanguageSubtag(buf, len)) { | |
666 | /* invalid language code */ | |
667 | if (strict) { | |
668 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
669 | return 0; | |
670 | } | |
671 | if (reslen < capacity) { | |
672 | uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); | |
673 | } | |
674 | reslen += LANG_UND_LEN; | |
675 | } else { | |
676 | /* resolve deprecated */ | |
b331163b | 677 | for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) { |
729e4ab9 A |
678 | if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { |
679 | uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); | |
680 | len = (int32_t)uprv_strlen(buf); | |
681 | break; | |
682 | } | |
683 | } | |
684 | if (reslen < capacity) { | |
685 | uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); | |
686 | } | |
687 | reslen += len; | |
688 | } | |
689 | u_terminateChars(appendAt, capacity, reslen, status); | |
690 | return reslen; | |
691 | } | |
692 | ||
693 | static int32_t | |
694 | _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { | |
695 | char buf[ULOC_SCRIPT_CAPACITY]; | |
696 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
697 | int32_t len; | |
698 | int32_t reslen = 0; | |
699 | ||
700 | if (U_FAILURE(*status)) { | |
701 | return 0; | |
702 | } | |
703 | ||
704 | len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); | |
705 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { | |
706 | if (strict) { | |
707 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
708 | } | |
709 | return 0; | |
710 | } | |
711 | ||
712 | if (len > 0) { | |
713 | if (!_isScriptSubtag(buf, len)) { | |
714 | /* invalid script code */ | |
715 | if (strict) { | |
716 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
717 | } | |
718 | return 0; | |
719 | } else { | |
720 | if (reslen < capacity) { | |
721 | *(appendAt + reslen) = SEP; | |
722 | } | |
723 | reslen++; | |
724 | ||
725 | if (reslen < capacity) { | |
726 | uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); | |
727 | } | |
728 | reslen += len; | |
729 | } | |
730 | } | |
731 | u_terminateChars(appendAt, capacity, reslen, status); | |
732 | return reslen; | |
733 | } | |
734 | ||
735 | static int32_t | |
736 | _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { | |
737 | char buf[ULOC_COUNTRY_CAPACITY]; | |
738 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
739 | int32_t len; | |
740 | int32_t reslen = 0; | |
741 | ||
742 | if (U_FAILURE(*status)) { | |
743 | return 0; | |
744 | } | |
745 | ||
746 | len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); | |
747 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { | |
748 | if (strict) { | |
749 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
750 | } | |
751 | return 0; | |
752 | } | |
753 | ||
754 | if (len > 0) { | |
755 | if (!_isRegionSubtag(buf, len)) { | |
756 | /* invalid region code */ | |
757 | if (strict) { | |
758 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
759 | } | |
760 | return 0; | |
761 | } else { | |
762 | if (reslen < capacity) { | |
763 | *(appendAt + reslen) = SEP; | |
764 | } | |
765 | reslen++; | |
766 | ||
767 | if (reslen < capacity) { | |
768 | uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); | |
769 | } | |
770 | reslen += len; | |
771 | } | |
772 | } | |
773 | u_terminateChars(appendAt, capacity, reslen, status); | |
774 | return reslen; | |
775 | } | |
776 | ||
777 | static int32_t | |
778 | _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { | |
779 | char buf[ULOC_FULLNAME_CAPACITY]; | |
780 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
781 | int32_t len, i; | |
782 | int32_t reslen = 0; | |
783 | ||
784 | if (U_FAILURE(*status)) { | |
785 | return 0; | |
786 | } | |
787 | ||
788 | len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); | |
789 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { | |
790 | if (strict) { | |
791 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
792 | } | |
793 | return 0; | |
794 | } | |
795 | ||
796 | if (len > 0) { | |
797 | char *p, *pVar; | |
798 | UBool bNext = TRUE; | |
799 | VariantListEntry *var; | |
800 | VariantListEntry *varFirst = NULL; | |
801 | ||
802 | pVar = NULL; | |
803 | p = buf; | |
804 | while (bNext) { | |
805 | if (*p == SEP || *p == LOCALE_SEP || *p == 0) { | |
806 | if (*p == 0) { | |
807 | bNext = FALSE; | |
808 | } else { | |
809 | *p = 0; /* terminate */ | |
810 | } | |
811 | if (pVar == NULL) { | |
812 | if (strict) { | |
813 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
814 | break; | |
815 | } | |
816 | /* ignore empty variant */ | |
817 | } else { | |
818 | /* ICU uses upper case letters for variants, but | |
819 | the canonical format is lowercase in BCP47 */ | |
820 | for (i = 0; *(pVar + i) != 0; i++) { | |
821 | *(pVar + i) = uprv_tolower(*(pVar + i)); | |
822 | } | |
823 | ||
824 | /* validate */ | |
825 | if (_isVariantSubtag(pVar, -1)) { | |
f3c0d7a5 | 826 | if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) { |
729e4ab9 | 827 | /* emit the variant to the list */ |
51004dcb | 828 | var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); |
729e4ab9 A |
829 | if (var == NULL) { |
830 | *status = U_MEMORY_ALLOCATION_ERROR; | |
831 | break; | |
832 | } | |
833 | var->variant = pVar; | |
834 | if (!_addVariantToList(&varFirst, var)) { | |
835 | /* duplicated variant */ | |
836 | uprv_free(var); | |
837 | if (strict) { | |
838 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
839 | break; | |
840 | } | |
841 | } | |
842 | } else { | |
843 | /* Special handling for POSIX variant, need to remember that we had it and then */ | |
844 | /* treat it like an extension later. */ | |
845 | *hadPosix = TRUE; | |
846 | } | |
847 | } else if (strict) { | |
848 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
849 | break; | |
4388f060 A |
850 | } else if (_isPrivateuseValueSubtag(pVar, -1)) { |
851 | /* Handle private use subtags separately */ | |
852 | break; | |
729e4ab9 A |
853 | } |
854 | } | |
855 | /* reset variant starting position */ | |
856 | pVar = NULL; | |
857 | } else if (pVar == NULL) { | |
858 | pVar = p; | |
859 | } | |
860 | p++; | |
861 | } | |
862 | ||
863 | if (U_SUCCESS(*status)) { | |
864 | if (varFirst != NULL) { | |
865 | int32_t varLen; | |
866 | ||
4388f060 | 867 | /* write out validated/normalized variants to the target */ |
729e4ab9 A |
868 | var = varFirst; |
869 | while (var != NULL) { | |
870 | if (reslen < capacity) { | |
871 | *(appendAt + reslen) = SEP; | |
872 | } | |
873 | reslen++; | |
874 | varLen = (int32_t)uprv_strlen(var->variant); | |
875 | if (reslen < capacity) { | |
876 | uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); | |
877 | } | |
878 | reslen += varLen; | |
879 | var = var->next; | |
880 | } | |
881 | } | |
882 | } | |
883 | ||
884 | /* clean up */ | |
885 | var = varFirst; | |
886 | while (var != NULL) { | |
887 | VariantListEntry *tmpVar = var->next; | |
888 | uprv_free(var); | |
889 | var = tmpVar; | |
890 | } | |
891 | ||
892 | if (U_FAILURE(*status)) { | |
893 | return 0; | |
894 | } | |
895 | } | |
896 | ||
897 | u_terminateChars(appendAt, capacity, reslen, status); | |
898 | return reslen; | |
899 | } | |
900 | ||
901 | static int32_t | |
902 | _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { | |
903 | char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
4388f060 A |
904 | char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; |
905 | int32_t attrBufLength = 0; | |
729e4ab9 A |
906 | UEnumeration *keywordEnum = NULL; |
907 | int32_t reslen = 0; | |
908 | ||
909 | keywordEnum = uloc_openKeywords(localeID, status); | |
910 | if (U_FAILURE(*status) && !hadPosix) { | |
911 | uenum_close(keywordEnum); | |
912 | return 0; | |
913 | } | |
914 | if (keywordEnum != NULL || hadPosix) { | |
915 | /* reorder extensions */ | |
916 | int32_t len; | |
917 | const char *key; | |
918 | ExtensionListEntry *firstExt = NULL; | |
919 | ExtensionListEntry *ext; | |
4388f060 A |
920 | AttributeListEntry *firstAttr = NULL; |
921 | AttributeListEntry *attr; | |
922 | char *attrValue; | |
729e4ab9 A |
923 | char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
924 | char *pExtBuf = extBuf; | |
925 | int32_t extBufCapacity = sizeof(extBuf); | |
f3c0d7a5 | 926 | const char *bcpKey=nullptr, *bcpValue=nullptr; |
729e4ab9 A |
927 | UErrorCode tmpStatus = U_ZERO_ERROR; |
928 | int32_t keylen; | |
b331163b | 929 | UBool isBcpUExt; |
729e4ab9 A |
930 | |
931 | while (TRUE) { | |
932 | key = uenum_next(keywordEnum, NULL, status); | |
933 | if (key == NULL) { | |
934 | break; | |
935 | } | |
936 | len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); | |
b331163b A |
937 | /* buf must be null-terminated */ |
938 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { | |
729e4ab9 A |
939 | if (strict) { |
940 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
941 | break; | |
942 | } | |
943 | /* ignore this keyword */ | |
944 | tmpStatus = U_ZERO_ERROR; | |
945 | continue; | |
946 | } | |
947 | ||
948 | keylen = (int32_t)uprv_strlen(key); | |
b331163b | 949 | isBcpUExt = (keylen > 1); |
729e4ab9 | 950 | |
4388f060 A |
951 | /* special keyword used for representing Unicode locale attributes */ |
952 | if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { | |
4388f060 A |
953 | if (len > 0) { |
954 | int32_t i = 0; | |
955 | while (TRUE) { | |
956 | attrBufLength = 0; | |
957 | for (; i < len; i++) { | |
958 | if (buf[i] != '-') { | |
959 | attrBuf[attrBufLength++] = buf[i]; | |
960 | } else { | |
961 | i++; | |
962 | break; | |
963 | } | |
964 | } | |
965 | if (attrBufLength > 0) { | |
966 | attrBuf[attrBufLength] = 0; | |
967 | ||
968 | } else if (i >= len){ | |
969 | break; | |
970 | } | |
971 | ||
972 | /* create AttributeListEntry */ | |
51004dcb | 973 | attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); |
4388f060 A |
974 | if (attr == NULL) { |
975 | *status = U_MEMORY_ALLOCATION_ERROR; | |
976 | break; | |
977 | } | |
51004dcb | 978 | attrValue = (char*)uprv_malloc(attrBufLength + 1); |
4388f060 A |
979 | if (attrValue == NULL) { |
980 | *status = U_MEMORY_ALLOCATION_ERROR; | |
981 | break; | |
982 | } | |
983 | uprv_strcpy(attrValue, attrBuf); | |
984 | attr->attribute = attrValue; | |
985 | ||
986 | if (!_addAttributeToList(&firstAttr, attr)) { | |
987 | uprv_free(attr); | |
988 | uprv_free(attrValue); | |
989 | if (strict) { | |
990 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
991 | break; | |
992 | } | |
993 | } | |
994 | } | |
f3c0d7a5 A |
995 | /* for a place holder ExtensionListEntry */ |
996 | bcpKey = LOCALE_ATTRIBUTE_KEY; | |
997 | bcpValue = NULL; | |
4388f060 | 998 | } |
b331163b A |
999 | } else if (isBcpUExt) { |
1000 | bcpKey = uloc_toUnicodeLocaleKey(key); | |
1001 | if (bcpKey == NULL) { | |
729e4ab9 A |
1002 | if (strict) { |
1003 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1004 | break; | |
1005 | } | |
729e4ab9 A |
1006 | continue; |
1007 | } | |
1008 | ||
b331163b A |
1009 | /* we've checked buf is null-terminated above */ |
1010 | bcpValue = uloc_toUnicodeLocaleType(key, buf); | |
1011 | if (bcpValue == NULL) { | |
729e4ab9 A |
1012 | if (strict) { |
1013 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1014 | break; | |
1015 | } | |
729e4ab9 A |
1016 | continue; |
1017 | } | |
b331163b A |
1018 | if (bcpValue == buf) { |
1019 | /* | |
1020 | When uloc_toUnicodeLocaleType(key, buf) returns the | |
1021 | input value as is, the value is well-formed, but has | |
1022 | no known mapping. This implementation normalizes the | |
1023 | the value to lower case | |
1024 | */ | |
1025 | int32_t bcpValueLen = uprv_strlen(bcpValue); | |
1026 | if (bcpValueLen < extBufCapacity) { | |
1027 | uprv_strcpy(pExtBuf, bcpValue); | |
1028 | T_CString_toLowerCase(pExtBuf); | |
1029 | ||
1030 | bcpValue = pExtBuf; | |
1031 | ||
1032 | pExtBuf += (bcpValueLen + 1); | |
1033 | extBufCapacity -= (bcpValueLen + 1); | |
1034 | } else { | |
1035 | if (strict) { | |
1036 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1037 | break; | |
1038 | } | |
1039 | continue; | |
1040 | } | |
1041 | } | |
729e4ab9 A |
1042 | } else { |
1043 | if (*key == PRIVATEUSE) { | |
1044 | if (!_isPrivateuseValueSubtags(buf, len)) { | |
1045 | if (strict) { | |
1046 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1047 | break; | |
1048 | } | |
1049 | continue; | |
1050 | } | |
1051 | } else { | |
1052 | if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { | |
1053 | if (strict) { | |
1054 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1055 | break; | |
1056 | } | |
1057 | continue; | |
1058 | } | |
1059 | } | |
1060 | bcpKey = key; | |
1061 | if ((len + 1) < extBufCapacity) { | |
1062 | uprv_memcpy(pExtBuf, buf, len); | |
1063 | bcpValue = pExtBuf; | |
1064 | ||
1065 | pExtBuf += len; | |
1066 | ||
1067 | *pExtBuf = 0; | |
1068 | pExtBuf++; | |
1069 | ||
1070 | extBufCapacity -= (len + 1); | |
1071 | } else { | |
1072 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1073 | break; | |
1074 | } | |
1075 | } | |
1076 | ||
f3c0d7a5 A |
1077 | /* create ExtensionListEntry */ |
1078 | ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); | |
1079 | if (ext == NULL) { | |
1080 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1081 | break; | |
1082 | } | |
1083 | ext->key = bcpKey; | |
1084 | ext->value = bcpValue; | |
4388f060 | 1085 | |
f3c0d7a5 A |
1086 | if (!_addExtensionToList(&firstExt, ext, TRUE)) { |
1087 | uprv_free(ext); | |
1088 | if (strict) { | |
1089 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1090 | break; | |
4388f060 | 1091 | } |
729e4ab9 A |
1092 | } |
1093 | } | |
1094 | ||
1095 | /* Special handling for POSIX variant - add the keywords for POSIX */ | |
1096 | if (hadPosix) { | |
1097 | /* create ExtensionListEntry for POSIX */ | |
51004dcb | 1098 | ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
729e4ab9 A |
1099 | if (ext == NULL) { |
1100 | *status = U_MEMORY_ALLOCATION_ERROR; | |
4388f060 | 1101 | goto cleanup; |
729e4ab9 A |
1102 | } |
1103 | ext->key = POSIX_KEY; | |
1104 | ext->value = POSIX_VALUE; | |
1105 | ||
1106 | if (!_addExtensionToList(&firstExt, ext, TRUE)) { | |
1107 | uprv_free(ext); | |
1108 | } | |
1109 | } | |
1110 | ||
4388f060 | 1111 | if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { |
729e4ab9 | 1112 | UBool startLDMLExtension = FALSE; |
f3c0d7a5 A |
1113 | for (ext = firstExt; ext; ext = ext->next) { |
1114 | if (!startLDMLExtension && uprv_strlen(ext->key) > 1) { | |
1115 | /* first LDML u singlton extension */ | |
4388f060 A |
1116 | if (reslen < capacity) { |
1117 | *(appendAt + reslen) = SEP; | |
1118 | } | |
1119 | reslen++; | |
1120 | if (reslen < capacity) { | |
1121 | *(appendAt + reslen) = LDMLEXT; | |
1122 | } | |
1123 | reslen++; | |
1124 | ||
1125 | startLDMLExtension = TRUE; | |
1126 | } | |
1127 | ||
1128 | /* write out the sorted BCP47 attributes, extensions and private use */ | |
f3c0d7a5 A |
1129 | if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { |
1130 | /* write the value for the attributes */ | |
1131 | for (attr = firstAttr; attr; attr = attr->next) { | |
1132 | if (reslen < capacity) { | |
1133 | *(appendAt + reslen) = SEP; | |
1134 | } | |
1135 | reslen++; | |
1136 | len = (int32_t)uprv_strlen(attr->attribute); | |
1137 | if (reslen < capacity) { | |
1138 | uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); | |
1139 | } | |
1140 | reslen += len; | |
1141 | } | |
1142 | } else { | |
729e4ab9 A |
1143 | if (reslen < capacity) { |
1144 | *(appendAt + reslen) = SEP; | |
1145 | } | |
1146 | reslen++; | |
4388f060 | 1147 | len = (int32_t)uprv_strlen(ext->key); |
729e4ab9 | 1148 | if (reslen < capacity) { |
4388f060 A |
1149 | uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); |
1150 | } | |
1151 | reslen += len; | |
1152 | if (reslen < capacity) { | |
1153 | *(appendAt + reslen) = SEP; | |
729e4ab9 A |
1154 | } |
1155 | reslen++; | |
4388f060 A |
1156 | len = (int32_t)uprv_strlen(ext->value); |
1157 | if (reslen < capacity) { | |
1158 | uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); | |
1159 | } | |
1160 | reslen += len; | |
4388f060 | 1161 | } |
f3c0d7a5 | 1162 | } |
729e4ab9 | 1163 | } |
4388f060 | 1164 | cleanup: |
729e4ab9 A |
1165 | /* clean up */ |
1166 | ext = firstExt; | |
1167 | while (ext != NULL) { | |
1168 | ExtensionListEntry *tmpExt = ext->next; | |
1169 | uprv_free(ext); | |
1170 | ext = tmpExt; | |
1171 | } | |
1172 | ||
4388f060 A |
1173 | attr = firstAttr; |
1174 | while (attr != NULL) { | |
1175 | AttributeListEntry *tmpAttr = attr->next; | |
1176 | char *pValue = (char *)attr->attribute; | |
1177 | uprv_free(pValue); | |
1178 | uprv_free(attr); | |
1179 | attr = tmpAttr; | |
1180 | } | |
1181 | ||
729e4ab9 A |
1182 | uenum_close(keywordEnum); |
1183 | ||
1184 | if (U_FAILURE(*status)) { | |
1185 | return 0; | |
1186 | } | |
1187 | } | |
1188 | ||
1189 | return u_terminateChars(appendAt, capacity, reslen, status); | |
1190 | } | |
1191 | ||
1192 | /** | |
1193 | * Append keywords parsed from LDML extension value | |
1194 | * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} | |
1195 | * Note: char* buf is used for storing keywords | |
1196 | */ | |
1197 | static void | |
1198 | _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { | |
51004dcb A |
1199 | const char *pTag; /* beginning of current subtag */ |
1200 | const char *pKwds; /* beginning of key-type pairs */ | |
1201 | UBool variantExists = *posixVariant; | |
1202 | ||
1203 | ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ | |
729e4ab9 | 1204 | ExtensionListEntry *kwd, *nextKwd; |
51004dcb A |
1205 | |
1206 | AttributeListEntry *attrFirst = NULL; /* first attribute */ | |
1207 | AttributeListEntry *attr, *nextAttr; | |
1208 | ||
1209 | int32_t len; | |
729e4ab9 | 1210 | int32_t bufIdx = 0; |
51004dcb A |
1211 | |
1212 | char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
1213 | int32_t attrBufIdx = 0; | |
4388f060 A |
1214 | |
1215 | /* Reset the posixVariant value */ | |
1216 | *posixVariant = FALSE; | |
729e4ab9 | 1217 | |
51004dcb A |
1218 | pTag = ldmlext; |
1219 | pKwds = NULL; | |
729e4ab9 | 1220 | |
51004dcb A |
1221 | /* Iterate through u extension attributes */ |
1222 | while (*pTag) { | |
729e4ab9 | 1223 | /* locate next separator char */ |
51004dcb A |
1224 | for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); |
1225 | ||
b331163b | 1226 | if (ultag_isUnicodeLocaleKey(pTag, len)) { |
51004dcb A |
1227 | pKwds = pTag; |
1228 | break; | |
729e4ab9 | 1229 | } |
51004dcb A |
1230 | |
1231 | /* add this attribute to the list */ | |
1232 | attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); | |
1233 | if (attr == NULL) { | |
1234 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1235 | goto cleanup; | |
729e4ab9 A |
1236 | } |
1237 | ||
51004dcb A |
1238 | if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { |
1239 | uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); | |
1240 | attrBuf[attrBufIdx + len] = 0; | |
1241 | attr->attribute = &attrBuf[attrBufIdx]; | |
1242 | attrBufIdx += (len + 1); | |
729e4ab9 | 1243 | } else { |
51004dcb A |
1244 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1245 | goto cleanup; | |
1246 | } | |
729e4ab9 | 1247 | |
51004dcb A |
1248 | if (!_addAttributeToList(&attrFirst, attr)) { |
1249 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1250 | uprv_free(attr); | |
1251 | goto cleanup; | |
1252 | } | |
729e4ab9 | 1253 | |
51004dcb A |
1254 | /* next tag */ |
1255 | pTag += len; | |
1256 | if (*pTag) { | |
1257 | /* next to the separator */ | |
1258 | pTag++; | |
1259 | } | |
1260 | } | |
1261 | ||
1262 | if (attrFirst) { | |
1263 | /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ | |
1264 | ||
1265 | if (attrBufIdx > bufSize) { | |
1266 | /* attrBufIdx == <total length of attribute subtag> + 1 */ | |
1267 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1268 | goto cleanup; | |
1269 | } | |
1270 | ||
1271 | kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); | |
1272 | if (kwd == NULL) { | |
1273 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1274 | goto cleanup; | |
1275 | } | |
1276 | ||
1277 | kwd->key = LOCALE_ATTRIBUTE_KEY; | |
1278 | kwd->value = buf; | |
1279 | ||
1280 | /* attribute subtags sorted in alphabetical order as type */ | |
1281 | attr = attrFirst; | |
1282 | while (attr != NULL) { | |
1283 | nextAttr = attr->next; | |
1284 | ||
1285 | /* buffer size check is done above */ | |
1286 | if (attr != attrFirst) { | |
1287 | *(buf + bufIdx) = SEP; | |
1288 | bufIdx++; | |
729e4ab9 | 1289 | } |
51004dcb A |
1290 | |
1291 | len = uprv_strlen(attr->attribute); | |
1292 | uprv_memcpy(buf + bufIdx, attr->attribute, len); | |
729e4ab9 | 1293 | bufIdx += len; |
729e4ab9 | 1294 | |
51004dcb A |
1295 | attr = nextAttr; |
1296 | } | |
1297 | *(buf + bufIdx) = 0; | |
1298 | bufIdx++; | |
1299 | ||
1300 | if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { | |
1301 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1302 | uprv_free(kwd); | |
1303 | goto cleanup; | |
1304 | } | |
1305 | ||
1306 | /* once keyword entry is created, delete the attribute list */ | |
1307 | attr = attrFirst; | |
1308 | while (attr != NULL) { | |
1309 | nextAttr = attr->next; | |
1310 | uprv_free(attr); | |
1311 | attr = nextAttr; | |
1312 | } | |
1313 | attrFirst = NULL; | |
1314 | } | |
1315 | ||
1316 | if (pKwds) { | |
1317 | const char *pBcpKey = NULL; /* u extenstion key subtag */ | |
1318 | const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ | |
1319 | int32_t bcpKeyLen = 0; | |
1320 | int32_t bcpTypeLen = 0; | |
1321 | UBool isDone = FALSE; | |
1322 | ||
1323 | pTag = pKwds; | |
1324 | /* BCP47 representation of LDML key/type pairs */ | |
1325 | while (!isDone) { | |
1326 | const char *pNextBcpKey = NULL; | |
b331163b | 1327 | int32_t nextBcpKeyLen = 0; |
51004dcb A |
1328 | UBool emitKeyword = FALSE; |
1329 | ||
1330 | if (*pTag) { | |
1331 | /* locate next separator char */ | |
1332 | for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); | |
1333 | ||
b331163b | 1334 | if (ultag_isUnicodeLocaleKey(pTag, len)) { |
51004dcb A |
1335 | if (pBcpKey) { |
1336 | emitKeyword = TRUE; | |
1337 | pNextBcpKey = pTag; | |
1338 | nextBcpKeyLen = len; | |
1339 | } else { | |
1340 | pBcpKey = pTag; | |
1341 | bcpKeyLen = len; | |
1342 | } | |
1343 | } else { | |
1344 | U_ASSERT(pBcpKey != NULL); | |
1345 | /* within LDML type subtags */ | |
1346 | if (pBcpType) { | |
1347 | bcpTypeLen += (len + 1); | |
1348 | } else { | |
1349 | pBcpType = pTag; | |
1350 | bcpTypeLen = len; | |
1351 | } | |
1352 | } | |
729e4ab9 | 1353 | |
51004dcb A |
1354 | /* next tag */ |
1355 | pTag += len; | |
1356 | if (*pTag) { | |
1357 | /* next to the separator */ | |
1358 | pTag++; | |
1359 | } | |
729e4ab9 | 1360 | } else { |
51004dcb A |
1361 | /* processing last one */ |
1362 | emitKeyword = TRUE; | |
1363 | isDone = TRUE; | |
1364 | } | |
1365 | ||
1366 | if (emitKeyword) { | |
1367 | const char *pKey = NULL; /* LDML key */ | |
1368 | const char *pType = NULL; /* LDML type */ | |
1369 | ||
b331163b A |
1370 | char bcpKeyBuf[9]; /* BCP key length is always 2 for now */ |
1371 | ||
51004dcb A |
1372 | U_ASSERT(pBcpKey != NULL); |
1373 | ||
f3c0d7a5 | 1374 | if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) { |
b331163b A |
1375 | /* the BCP key is invalid */ |
1376 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1377 | goto cleanup; | |
1378 | } | |
1379 | ||
1380 | uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen); | |
1381 | bcpKeyBuf[bcpKeyLen] = 0; | |
1382 | ||
51004dcb | 1383 | /* u extension key to LDML key */ |
b331163b A |
1384 | pKey = uloc_toLegacyKey(bcpKeyBuf); |
1385 | if (pKey == NULL) { | |
1386 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
729e4ab9 A |
1387 | goto cleanup; |
1388 | } | |
b331163b A |
1389 | if (pKey == bcpKeyBuf) { |
1390 | /* | |
1391 | The key returned by toLegacyKey points to the input buffer. | |
1392 | We normalize the result key to lower case. | |
1393 | */ | |
1394 | T_CString_toLowerCase(bcpKeyBuf); | |
1395 | if (bufSize - bufIdx - 1 >= bcpKeyLen) { | |
1396 | uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen); | |
1397 | pKey = buf + bufIdx; | |
1398 | bufIdx += bcpKeyLen; | |
1399 | *(buf + bufIdx) = 0; | |
1400 | bufIdx++; | |
1401 | } else { | |
1402 | *status = U_BUFFER_OVERFLOW_ERROR; | |
1403 | goto cleanup; | |
1404 | } | |
1405 | } | |
51004dcb A |
1406 | |
1407 | if (pBcpType) { | |
b331163b | 1408 | char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */ |
f3c0d7a5 | 1409 | if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) { |
b331163b A |
1410 | /* the BCP type is too long */ |
1411 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1412 | goto cleanup; | |
1413 | } | |
1414 | ||
1415 | uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen); | |
1416 | bcpTypeBuf[bcpTypeLen] = 0; | |
1417 | ||
51004dcb | 1418 | /* BCP type to locale type */ |
b331163b A |
1419 | pType = uloc_toLegacyType(pKey, bcpTypeBuf); |
1420 | if (pType == NULL) { | |
1421 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
51004dcb A |
1422 | goto cleanup; |
1423 | } | |
b331163b A |
1424 | if (pType == bcpTypeBuf) { |
1425 | /* | |
1426 | The type returned by toLegacyType points to the input buffer. | |
1427 | We normalize the result type to lower case. | |
1428 | */ | |
1429 | /* normalize to lower case */ | |
1430 | T_CString_toLowerCase(bcpTypeBuf); | |
1431 | if (bufSize - bufIdx - 1 >= bcpTypeLen) { | |
1432 | uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen); | |
1433 | pType = buf + bufIdx; | |
1434 | bufIdx += bcpTypeLen; | |
1435 | *(buf + bufIdx) = 0; | |
1436 | bufIdx++; | |
1437 | } else { | |
1438 | *status = U_BUFFER_OVERFLOW_ERROR; | |
1439 | goto cleanup; | |
1440 | } | |
1441 | } | |
51004dcb A |
1442 | } else { |
1443 | /* typeless - default type value is "yes" */ | |
1444 | pType = LOCALE_TYPE_YES; | |
1445 | } | |
729e4ab9 | 1446 | |
51004dcb A |
1447 | /* Special handling for u-va-posix, since we want to treat this as a variant, |
1448 | not as a keyword */ | |
1449 | if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { | |
1450 | *posixVariant = TRUE; | |
1451 | } else { | |
1452 | /* create an ExtensionListEntry for this keyword */ | |
1453 | kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); | |
1454 | if (kwd == NULL) { | |
1455 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1456 | goto cleanup; | |
1457 | } | |
729e4ab9 | 1458 | |
51004dcb A |
1459 | kwd->key = pKey; |
1460 | kwd->value = pType; | |
1461 | ||
1462 | if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { | |
1463 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1464 | uprv_free(kwd); | |
1465 | goto cleanup; | |
1466 | } | |
729e4ab9 | 1467 | } |
729e4ab9 | 1468 | |
51004dcb A |
1469 | pBcpKey = pNextBcpKey; |
1470 | bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; | |
1471 | pBcpType = NULL; | |
1472 | bcpTypeLen = 0; | |
1473 | } | |
729e4ab9 A |
1474 | } |
1475 | } | |
1476 | ||
729e4ab9 A |
1477 | kwd = kwdFirst; |
1478 | while (kwd != NULL) { | |
1479 | nextKwd = kwd->next; | |
1480 | _addExtensionToList(appendTo, kwd, FALSE); | |
1481 | kwd = nextKwd; | |
1482 | } | |
1483 | ||
1484 | return; | |
1485 | ||
1486 | cleanup: | |
51004dcb A |
1487 | attr = attrFirst; |
1488 | while (attr != NULL) { | |
1489 | nextAttr = attr->next; | |
1490 | uprv_free(attr); | |
1491 | attr = nextAttr; | |
1492 | } | |
1493 | ||
729e4ab9 A |
1494 | kwd = kwdFirst; |
1495 | while (kwd != NULL) { | |
1496 | nextKwd = kwd->next; | |
1497 | uprv_free(kwd); | |
1498 | kwd = nextKwd; | |
1499 | } | |
1500 | } | |
1501 | ||
1502 | ||
1503 | static int32_t | |
1504 | _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { | |
1505 | int32_t reslen = 0; | |
1506 | int32_t i, n; | |
1507 | int32_t len; | |
1508 | ExtensionListEntry *kwdFirst = NULL; | |
1509 | ExtensionListEntry *kwd; | |
1510 | const char *key, *type; | |
4388f060 A |
1511 | char *kwdBuf = NULL; |
1512 | int32_t kwdBufLength = capacity; | |
729e4ab9 A |
1513 | UBool posixVariant = FALSE; |
1514 | ||
1515 | if (U_FAILURE(*status)) { | |
1516 | return 0; | |
1517 | } | |
1518 | ||
51004dcb | 1519 | kwdBuf = (char*)uprv_malloc(kwdBufLength); |
4388f060 A |
1520 | if (kwdBuf == NULL) { |
1521 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1522 | return 0; | |
1523 | } | |
1524 | ||
1525 | /* Determine if variants already exists */ | |
1526 | if (ultag_getVariantsSize(langtag)) { | |
1527 | posixVariant = TRUE; | |
1528 | } | |
1529 | ||
729e4ab9 A |
1530 | n = ultag_getExtensionsSize(langtag); |
1531 | ||
1532 | /* resolve locale keywords and reordering keys */ | |
1533 | for (i = 0; i < n; i++) { | |
1534 | key = ultag_getExtensionKey(langtag, i); | |
1535 | type = ultag_getExtensionValue(langtag, i); | |
1536 | if (*key == LDMLEXT) { | |
4388f060 | 1537 | _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status); |
729e4ab9 A |
1538 | if (U_FAILURE(*status)) { |
1539 | break; | |
1540 | } | |
1541 | } else { | |
51004dcb | 1542 | kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
729e4ab9 A |
1543 | if (kwd == NULL) { |
1544 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1545 | break; | |
1546 | } | |
1547 | kwd->key = key; | |
1548 | kwd->value = type; | |
1549 | if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { | |
1550 | uprv_free(kwd); | |
1551 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1552 | break; | |
1553 | } | |
1554 | } | |
1555 | } | |
1556 | ||
1557 | if (U_SUCCESS(*status)) { | |
1558 | type = ultag_getPrivateUse(langtag); | |
1559 | if ((int32_t)uprv_strlen(type) > 0) { | |
1560 | /* add private use as a keyword */ | |
51004dcb | 1561 | kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
729e4ab9 A |
1562 | if (kwd == NULL) { |
1563 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1564 | } else { | |
1565 | kwd->key = PRIVATEUSE_KEY; | |
1566 | kwd->value = type; | |
1567 | if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { | |
1568 | uprv_free(kwd); | |
1569 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1570 | } | |
1571 | } | |
1572 | } | |
1573 | } | |
1574 | ||
1575 | /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ | |
1576 | ||
1577 | if (U_SUCCESS(*status) && posixVariant) { | |
1578 | len = (int32_t) uprv_strlen(_POSIX); | |
1579 | if (reslen < capacity) { | |
1580 | uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); | |
1581 | } | |
1582 | reslen += len; | |
1583 | } | |
1584 | ||
51004dcb | 1585 | if (U_SUCCESS(*status) && kwdFirst != NULL) { |
729e4ab9 | 1586 | /* write out the sorted keywords */ |
4388f060 | 1587 | UBool firstValue = TRUE; |
729e4ab9 | 1588 | kwd = kwdFirst; |
4388f060 | 1589 | do { |
729e4ab9 | 1590 | if (reslen < capacity) { |
4388f060 | 1591 | if (firstValue) { |
729e4ab9 A |
1592 | /* '@' */ |
1593 | *(appendAt + reslen) = LOCALE_EXT_SEP; | |
4388f060 | 1594 | firstValue = FALSE; |
51004dcb | 1595 | } else { |
729e4ab9 A |
1596 | /* ';' */ |
1597 | *(appendAt + reslen) = LOCALE_KEYWORD_SEP; | |
1598 | } | |
1599 | } | |
1600 | reslen++; | |
1601 | ||
51004dcb A |
1602 | /* key */ |
1603 | len = (int32_t)uprv_strlen(kwd->key); | |
1604 | if (reslen < capacity) { | |
1605 | uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); | |
1606 | } | |
1607 | reslen += len; | |
4388f060 | 1608 | |
51004dcb A |
1609 | /* '=' */ |
1610 | if (reslen < capacity) { | |
1611 | *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; | |
1612 | } | |
1613 | reslen++; | |
4388f060 | 1614 | |
51004dcb A |
1615 | /* type */ |
1616 | len = (int32_t)uprv_strlen(kwd->value); | |
1617 | if (reslen < capacity) { | |
1618 | uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); | |
4388f060 | 1619 | } |
51004dcb A |
1620 | reslen += len; |
1621 | ||
1622 | kwd = kwd->next; | |
1623 | } while (kwd); | |
729e4ab9 A |
1624 | } |
1625 | ||
1626 | /* clean up */ | |
1627 | kwd = kwdFirst; | |
1628 | while (kwd != NULL) { | |
1629 | ExtensionListEntry *tmpKwd = kwd->next; | |
1630 | uprv_free(kwd); | |
1631 | kwd = tmpKwd; | |
1632 | } | |
1633 | ||
4388f060 A |
1634 | uprv_free(kwdBuf); |
1635 | ||
729e4ab9 A |
1636 | if (U_FAILURE(*status)) { |
1637 | return 0; | |
1638 | } | |
1639 | ||
1640 | return u_terminateChars(appendAt, capacity, reslen, status); | |
1641 | } | |
1642 | ||
4388f060 A |
1643 | static int32_t |
1644 | _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { | |
f3c0d7a5 | 1645 | (void)hadPosix; |
4388f060 A |
1646 | char buf[ULOC_FULLNAME_CAPACITY]; |
1647 | char tmpAppend[ULOC_FULLNAME_CAPACITY]; | |
1648 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
1649 | int32_t len, i; | |
1650 | int32_t reslen = 0; | |
1651 | ||
1652 | if (U_FAILURE(*status)) { | |
1653 | return 0; | |
1654 | } | |
1655 | ||
1656 | len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); | |
1657 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { | |
1658 | if (strict) { | |
1659 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1660 | } | |
1661 | return 0; | |
1662 | } | |
1663 | ||
1664 | if (len > 0) { | |
1665 | char *p, *pPriv; | |
1666 | UBool bNext = TRUE; | |
1667 | UBool firstValue = TRUE; | |
1668 | UBool writeValue; | |
1669 | ||
1670 | pPriv = NULL; | |
1671 | p = buf; | |
1672 | while (bNext) { | |
1673 | writeValue = FALSE; | |
1674 | if (*p == SEP || *p == LOCALE_SEP || *p == 0) { | |
1675 | if (*p == 0) { | |
1676 | bNext = FALSE; | |
1677 | } else { | |
1678 | *p = 0; /* terminate */ | |
1679 | } | |
1680 | if (pPriv != NULL) { | |
1681 | /* Private use in the canonical format is lowercase in BCP47 */ | |
1682 | for (i = 0; *(pPriv + i) != 0; i++) { | |
1683 | *(pPriv + i) = uprv_tolower(*(pPriv + i)); | |
1684 | } | |
1685 | ||
1686 | /* validate */ | |
1687 | if (_isPrivateuseValueSubtag(pPriv, -1)) { | |
1688 | if (firstValue) { | |
1689 | if (!_isVariantSubtag(pPriv, -1)) { | |
1690 | writeValue = TRUE; | |
1691 | } | |
1692 | } else { | |
1693 | writeValue = TRUE; | |
1694 | } | |
1695 | } else if (strict) { | |
1696 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1697 | break; | |
1698 | } else { | |
1699 | break; | |
1700 | } | |
1701 | ||
1702 | if (writeValue) { | |
1703 | if (reslen < capacity) { | |
1704 | tmpAppend[reslen++] = SEP; | |
1705 | } | |
1706 | ||
1707 | if (firstValue) { | |
1708 | if (reslen < capacity) { | |
1709 | tmpAppend[reslen++] = *PRIVATEUSE_KEY; | |
1710 | } | |
1711 | ||
1712 | if (reslen < capacity) { | |
1713 | tmpAppend[reslen++] = SEP; | |
1714 | } | |
1715 | ||
1716 | len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); | |
1717 | if (reslen < capacity) { | |
1718 | uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); | |
1719 | } | |
1720 | reslen += len; | |
1721 | ||
1722 | if (reslen < capacity) { | |
1723 | tmpAppend[reslen++] = SEP; | |
1724 | } | |
1725 | ||
1726 | firstValue = FALSE; | |
1727 | } | |
1728 | ||
1729 | len = (int32_t)uprv_strlen(pPriv); | |
1730 | if (reslen < capacity) { | |
1731 | uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); | |
1732 | } | |
1733 | reslen += len; | |
1734 | } | |
1735 | } | |
1736 | /* reset private use starting position */ | |
1737 | pPriv = NULL; | |
1738 | } else if (pPriv == NULL) { | |
1739 | pPriv = p; | |
1740 | } | |
1741 | p++; | |
1742 | } | |
1743 | ||
1744 | if (U_FAILURE(*status)) { | |
1745 | return 0; | |
1746 | } | |
1747 | } | |
1748 | ||
1749 | if (U_SUCCESS(*status)) { | |
1750 | len = reslen; | |
1751 | if (reslen < capacity) { | |
1752 | uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen)); | |
1753 | } | |
1754 | } | |
1755 | ||
1756 | u_terminateChars(appendAt, capacity, reslen, status); | |
1757 | ||
1758 | return reslen; | |
1759 | } | |
1760 | ||
729e4ab9 A |
1761 | /* |
1762 | * ------------------------------------------------- | |
1763 | * | |
1764 | * ultag_ functions | |
1765 | * | |
1766 | * ------------------------------------------------- | |
1767 | */ | |
1768 | ||
1769 | /* Bit flags used by the parser */ | |
1770 | #define LANG 0x0001 | |
1771 | #define EXTL 0x0002 | |
1772 | #define SCRT 0x0004 | |
1773 | #define REGN 0x0008 | |
1774 | #define VART 0x0010 | |
1775 | #define EXTS 0x0020 | |
1776 | #define EXTV 0x0040 | |
1777 | #define PRIV 0x0080 | |
1778 | ||
f3c0d7a5 A |
1779 | /** |
1780 | * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing | |
1781 | * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ ) | |
1782 | * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above. | |
1783 | */ | |
1784 | #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) | |
1785 | #pragma optimize( "", off ) | |
1786 | #endif | |
1787 | ||
729e4ab9 A |
1788 | static ULanguageTag* |
1789 | ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { | |
1790 | ULanguageTag *t; | |
1791 | char *tagBuf; | |
1792 | int16_t next; | |
1793 | char *pSubtag, *pNext, *pLastGoodPosition; | |
1794 | int32_t subtagLen; | |
1795 | int32_t extlangIdx; | |
1796 | ExtensionListEntry *pExtension; | |
1797 | char *pExtValueSubtag, *pExtValueSubtagEnd; | |
1798 | int32_t i; | |
51004dcb A |
1799 | UBool privateuseVar = FALSE; |
1800 | int32_t grandfatheredLen = 0; | |
729e4ab9 A |
1801 | |
1802 | if (parsedLen != NULL) { | |
1803 | *parsedLen = 0; | |
1804 | } | |
1805 | ||
1806 | if (U_FAILURE(*status)) { | |
1807 | return NULL; | |
1808 | } | |
1809 | ||
1810 | if (tagLen < 0) { | |
1811 | tagLen = (int32_t)uprv_strlen(tag); | |
1812 | } | |
1813 | ||
1814 | /* copy the entire string */ | |
1815 | tagBuf = (char*)uprv_malloc(tagLen + 1); | |
1816 | if (tagBuf == NULL) { | |
1817 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1818 | return NULL; | |
1819 | } | |
1820 | uprv_memcpy(tagBuf, tag, tagLen); | |
1821 | *(tagBuf + tagLen) = 0; | |
1822 | ||
1823 | /* create a ULanguageTag */ | |
1824 | t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); | |
729e4ab9 A |
1825 | if (t == NULL) { |
1826 | uprv_free(tagBuf); | |
1827 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1828 | return NULL; | |
1829 | } | |
4388f060 A |
1830 | _initializeULanguageTag(t); |
1831 | t->buf = tagBuf; | |
729e4ab9 A |
1832 | |
1833 | if (tagLen < MINLEN) { | |
1834 | /* the input tag is too short - return empty ULanguageTag */ | |
1835 | return t; | |
1836 | } | |
1837 | ||
1838 | /* check if the tag is grandfathered */ | |
1839 | for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { | |
4388f060 | 1840 | if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { |
51004dcb A |
1841 | int32_t newTagLength; |
1842 | ||
1843 | grandfatheredLen = tagLen; /* back up for output parsedLen */ | |
1844 | newTagLength = uprv_strlen(GRANDFATHERED[i+1]); | |
4388f060 A |
1845 | if (tagLen < newTagLength) { |
1846 | uprv_free(tagBuf); | |
1847 | tagBuf = (char*)uprv_malloc(newTagLength + 1); | |
1848 | if (tagBuf == NULL) { | |
1849 | *status = U_MEMORY_ALLOCATION_ERROR; | |
2ca993e8 | 1850 | ultag_close(t); |
4388f060 A |
1851 | return NULL; |
1852 | } | |
1853 | t->buf = tagBuf; | |
1854 | tagLen = newTagLength; | |
729e4ab9 | 1855 | } |
4388f060 | 1856 | uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); |
4388f060 | 1857 | break; |
729e4ab9 A |
1858 | } |
1859 | } | |
1860 | ||
1861 | /* | |
1862 | * langtag = language | |
1863 | * ["-" script] | |
1864 | * ["-" region] | |
1865 | * *("-" variant) | |
1866 | * *("-" extension) | |
1867 | * ["-" privateuse] | |
1868 | */ | |
1869 | ||
1870 | next = LANG | PRIV; | |
1871 | pNext = pLastGoodPosition = tagBuf; | |
1872 | extlangIdx = 0; | |
1873 | pExtension = NULL; | |
1874 | pExtValueSubtag = NULL; | |
1875 | pExtValueSubtagEnd = NULL; | |
729e4ab9 A |
1876 | |
1877 | while (pNext) { | |
1878 | char *pSep; | |
1879 | ||
1880 | pSubtag = pNext; | |
1881 | ||
1882 | /* locate next separator char */ | |
1883 | pSep = pSubtag; | |
1884 | while (*pSep) { | |
1885 | if (*pSep == SEP) { | |
1886 | break; | |
1887 | } | |
1888 | pSep++; | |
1889 | } | |
1890 | if (*pSep == 0) { | |
1891 | /* last subtag */ | |
1892 | pNext = NULL; | |
1893 | } else { | |
1894 | pNext = pSep + 1; | |
1895 | } | |
1896 | subtagLen = (int32_t)(pSep - pSubtag); | |
1897 | ||
1898 | if (next & LANG) { | |
1899 | if (_isLanguageSubtag(pSubtag, subtagLen)) { | |
1900 | *pSep = 0; /* terminate */ | |
1901 | t->language = T_CString_toLowerCase(pSubtag); | |
1902 | ||
1903 | pLastGoodPosition = pSep; | |
1904 | next = EXTL | SCRT | REGN | VART | EXTS | PRIV; | |
1905 | continue; | |
1906 | } | |
1907 | } | |
1908 | if (next & EXTL) { | |
1909 | if (_isExtlangSubtag(pSubtag, subtagLen)) { | |
1910 | *pSep = 0; | |
1911 | t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); | |
1912 | ||
1913 | pLastGoodPosition = pSep; | |
1914 | if (extlangIdx < 3) { | |
1915 | next = EXTL | SCRT | REGN | VART | EXTS | PRIV; | |
1916 | } else { | |
1917 | next = SCRT | REGN | VART | EXTS | PRIV; | |
1918 | } | |
1919 | continue; | |
1920 | } | |
1921 | } | |
1922 | if (next & SCRT) { | |
1923 | if (_isScriptSubtag(pSubtag, subtagLen)) { | |
1924 | char *p = pSubtag; | |
1925 | ||
1926 | *pSep = 0; | |
1927 | ||
1928 | /* to title case */ | |
1929 | *p = uprv_toupper(*p); | |
1930 | p++; | |
1931 | for (; *p; p++) { | |
1932 | *p = uprv_tolower(*p); | |
1933 | } | |
1934 | ||
1935 | t->script = pSubtag; | |
1936 | ||
1937 | pLastGoodPosition = pSep; | |
1938 | next = REGN | VART | EXTS | PRIV; | |
1939 | continue; | |
1940 | } | |
1941 | } | |
1942 | if (next & REGN) { | |
1943 | if (_isRegionSubtag(pSubtag, subtagLen)) { | |
1944 | *pSep = 0; | |
1945 | t->region = T_CString_toUpperCase(pSubtag); | |
1946 | ||
1947 | pLastGoodPosition = pSep; | |
1948 | next = VART | EXTS | PRIV; | |
1949 | continue; | |
1950 | } | |
1951 | } | |
1952 | if (next & VART) { | |
4388f060 A |
1953 | if (_isVariantSubtag(pSubtag, subtagLen) || |
1954 | (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { | |
729e4ab9 A |
1955 | VariantListEntry *var; |
1956 | UBool isAdded; | |
1957 | ||
1958 | var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); | |
1959 | if (var == NULL) { | |
1960 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1961 | goto error; | |
1962 | } | |
1963 | *pSep = 0; | |
1964 | var->variant = T_CString_toUpperCase(pSubtag); | |
1965 | isAdded = _addVariantToList(&(t->variants), var); | |
1966 | if (!isAdded) { | |
1967 | /* duplicated variant entry */ | |
1968 | uprv_free(var); | |
1969 | break; | |
1970 | } | |
1971 | pLastGoodPosition = pSep; | |
1972 | next = VART | EXTS | PRIV; | |
1973 | continue; | |
1974 | } | |
1975 | } | |
1976 | if (next & EXTS) { | |
1977 | if (_isExtensionSingleton(pSubtag, subtagLen)) { | |
1978 | if (pExtension != NULL) { | |
1979 | if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { | |
1980 | /* the previous extension is incomplete */ | |
1981 | uprv_free(pExtension); | |
1982 | pExtension = NULL; | |
1983 | break; | |
1984 | } | |
1985 | ||
1986 | /* terminate the previous extension value */ | |
1987 | *pExtValueSubtagEnd = 0; | |
1988 | pExtension->value = T_CString_toLowerCase(pExtValueSubtag); | |
1989 | ||
1990 | /* insert the extension to the list */ | |
1991 | if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { | |
1992 | pLastGoodPosition = pExtValueSubtagEnd; | |
1993 | } else { | |
1994 | /* stop parsing here */ | |
1995 | uprv_free(pExtension); | |
1996 | pExtension = NULL; | |
1997 | break; | |
1998 | } | |
729e4ab9 A |
1999 | } |
2000 | ||
729e4ab9 | 2001 | /* create a new extension */ |
51004dcb | 2002 | pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
729e4ab9 A |
2003 | if (pExtension == NULL) { |
2004 | *status = U_MEMORY_ALLOCATION_ERROR; | |
2005 | goto error; | |
2006 | } | |
2007 | *pSep = 0; | |
2008 | pExtension->key = T_CString_toLowerCase(pSubtag); | |
2009 | pExtension->value = NULL; /* will be set later */ | |
2010 | ||
2011 | /* | |
2012 | * reset the start and the end location of extension value | |
2013 | * subtags for this extension | |
2014 | */ | |
2015 | pExtValueSubtag = NULL; | |
2016 | pExtValueSubtagEnd = NULL; | |
2017 | ||
2018 | next = EXTV; | |
2019 | continue; | |
2020 | } | |
2021 | } | |
2022 | if (next & EXTV) { | |
2023 | if (_isExtensionSubtag(pSubtag, subtagLen)) { | |
51004dcb A |
2024 | if (pExtValueSubtag == NULL) { |
2025 | /* if the start postion of this extension's value is not yet, | |
2026 | this one is the first value subtag */ | |
2027 | pExtValueSubtag = pSubtag; | |
729e4ab9 A |
2028 | } |
2029 | ||
51004dcb A |
2030 | /* Mark the end of this subtag */ |
2031 | pExtValueSubtagEnd = pSep; | |
2032 | next = EXTS | EXTV | PRIV; | |
4388f060 | 2033 | |
51004dcb | 2034 | continue; |
729e4ab9 A |
2035 | } |
2036 | } | |
2037 | if (next & PRIV) { | |
2038 | if (uprv_tolower(*pSubtag) == PRIVATEUSE) { | |
2039 | char *pPrivuseVal; | |
2040 | ||
2041 | if (pExtension != NULL) { | |
2042 | /* Process the last extension */ | |
2043 | if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { | |
2044 | /* the previous extension is incomplete */ | |
2045 | uprv_free(pExtension); | |
2046 | pExtension = NULL; | |
2047 | break; | |
2048 | } else { | |
2049 | /* terminate the previous extension value */ | |
2050 | *pExtValueSubtagEnd = 0; | |
2051 | pExtension->value = T_CString_toLowerCase(pExtValueSubtag); | |
2052 | ||
2053 | /* insert the extension to the list */ | |
2054 | if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { | |
2055 | pLastGoodPosition = pExtValueSubtagEnd; | |
2056 | pExtension = NULL; | |
2057 | } else { | |
2058 | /* stop parsing here */ | |
2059 | uprv_free(pExtension); | |
2060 | pExtension = NULL; | |
2061 | break; | |
2062 | } | |
2063 | } | |
2064 | } | |
2065 | ||
2066 | /* The rest of part will be private use value subtags */ | |
2067 | if (pNext == NULL) { | |
2068 | /* empty private use subtag */ | |
2069 | break; | |
2070 | } | |
2071 | /* back up the private use value start position */ | |
2072 | pPrivuseVal = pNext; | |
2073 | ||
2074 | /* validate private use value subtags */ | |
2075 | while (pNext) { | |
2076 | pSubtag = pNext; | |
2077 | pSep = pSubtag; | |
2078 | while (*pSep) { | |
2079 | if (*pSep == SEP) { | |
2080 | break; | |
2081 | } | |
2082 | pSep++; | |
2083 | } | |
2084 | if (*pSep == 0) { | |
2085 | /* last subtag */ | |
2086 | pNext = NULL; | |
2087 | } else { | |
2088 | pNext = pSep + 1; | |
2089 | } | |
2090 | subtagLen = (int32_t)(pSep - pSubtag); | |
2091 | ||
4388f060 A |
2092 | if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { |
2093 | *pSep = 0; | |
2094 | next = VART; | |
2095 | privateuseVar = TRUE; | |
2096 | break; | |
2097 | } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { | |
729e4ab9 A |
2098 | pLastGoodPosition = pSep; |
2099 | } else { | |
2100 | break; | |
2101 | } | |
2102 | } | |
4388f060 A |
2103 | |
2104 | if (next == VART) { | |
2105 | continue; | |
2106 | } | |
2107 | ||
729e4ab9 A |
2108 | if (pLastGoodPosition - pPrivuseVal > 0) { |
2109 | *pLastGoodPosition = 0; | |
2110 | t->privateuse = T_CString_toLowerCase(pPrivuseVal); | |
2111 | } | |
2112 | /* No more subtags, exiting the parse loop */ | |
2113 | break; | |
2114 | } | |
2115 | break; | |
2116 | } | |
4388f060 | 2117 | |
729e4ab9 A |
2118 | /* If we fell through here, it means this subtag is illegal - quit parsing */ |
2119 | break; | |
2120 | } | |
2121 | ||
2122 | if (pExtension != NULL) { | |
2123 | /* Process the last extension */ | |
2124 | if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { | |
2125 | /* the previous extension is incomplete */ | |
2126 | uprv_free(pExtension); | |
2127 | } else { | |
2128 | /* terminate the previous extension value */ | |
2129 | *pExtValueSubtagEnd = 0; | |
2130 | pExtension->value = T_CString_toLowerCase(pExtValueSubtag); | |
2131 | /* insert the extension to the list */ | |
2132 | if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { | |
2133 | pLastGoodPosition = pExtValueSubtagEnd; | |
2134 | } else { | |
2135 | uprv_free(pExtension); | |
2136 | } | |
2137 | } | |
2138 | } | |
2139 | ||
2140 | if (parsedLen != NULL) { | |
51004dcb | 2141 | *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf); |
729e4ab9 A |
2142 | } |
2143 | ||
2144 | return t; | |
2145 | ||
2146 | error: | |
2ca993e8 | 2147 | ultag_close(t); |
729e4ab9 A |
2148 | return NULL; |
2149 | } | |
2150 | ||
f3c0d7a5 A |
2151 | /** |
2152 | * Ticket #12705 - Turn optimization back on. | |
2153 | */ | |
2154 | #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) | |
2155 | #pragma optimize( "", on ) | |
2156 | #endif | |
2157 | ||
729e4ab9 A |
2158 | static void |
2159 | ultag_close(ULanguageTag* langtag) { | |
2160 | ||
2161 | if (langtag == NULL) { | |
2162 | return; | |
2163 | } | |
2164 | ||
2165 | uprv_free(langtag->buf); | |
2166 | ||
2167 | if (langtag->variants) { | |
2168 | VariantListEntry *curVar = langtag->variants; | |
2169 | while (curVar) { | |
2170 | VariantListEntry *nextVar = curVar->next; | |
2171 | uprv_free(curVar); | |
2172 | curVar = nextVar; | |
2173 | } | |
2174 | } | |
2175 | ||
2176 | if (langtag->extensions) { | |
2177 | ExtensionListEntry *curExt = langtag->extensions; | |
2178 | while (curExt) { | |
2179 | ExtensionListEntry *nextExt = curExt->next; | |
2180 | uprv_free(curExt); | |
2181 | curExt = nextExt; | |
2182 | } | |
2183 | } | |
2184 | ||
2185 | uprv_free(langtag); | |
2186 | } | |
2187 | ||
2188 | static const char* | |
2189 | ultag_getLanguage(const ULanguageTag* langtag) { | |
2190 | return langtag->language; | |
2191 | } | |
2192 | ||
2193 | #if 0 | |
2194 | static const char* | |
2195 | ultag_getJDKLanguage(const ULanguageTag* langtag) { | |
2196 | int32_t i; | |
2197 | for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { | |
2198 | if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { | |
2199 | return DEPRECATEDLANGS[i + 1]; | |
2200 | } | |
2201 | } | |
2202 | return langtag->language; | |
2203 | } | |
2204 | #endif | |
2205 | ||
2206 | static const char* | |
2207 | ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { | |
2208 | if (idx >= 0 && idx < MAXEXTLANG) { | |
2209 | return langtag->extlang[idx]; | |
2210 | } | |
2211 | return NULL; | |
2212 | } | |
2213 | ||
2214 | static int32_t | |
2215 | ultag_getExtlangSize(const ULanguageTag* langtag) { | |
2216 | int32_t size = 0; | |
2217 | int32_t i; | |
2218 | for (i = 0; i < MAXEXTLANG; i++) { | |
2219 | if (langtag->extlang[i]) { | |
2220 | size++; | |
2221 | } | |
2222 | } | |
2223 | return size; | |
2224 | } | |
2225 | ||
2226 | static const char* | |
2227 | ultag_getScript(const ULanguageTag* langtag) { | |
2228 | return langtag->script; | |
2229 | } | |
2230 | ||
2231 | static const char* | |
2232 | ultag_getRegion(const ULanguageTag* langtag) { | |
2233 | return langtag->region; | |
2234 | } | |
2235 | ||
2236 | static const char* | |
2237 | ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { | |
2238 | const char *var = NULL; | |
2239 | VariantListEntry *cur = langtag->variants; | |
2240 | int32_t i = 0; | |
2241 | while (cur) { | |
2242 | if (i == idx) { | |
2243 | var = cur->variant; | |
2244 | break; | |
2245 | } | |
2246 | cur = cur->next; | |
2247 | i++; | |
2248 | } | |
2249 | return var; | |
2250 | } | |
2251 | ||
2252 | static int32_t | |
2253 | ultag_getVariantsSize(const ULanguageTag* langtag) { | |
2254 | int32_t size = 0; | |
2255 | VariantListEntry *cur = langtag->variants; | |
2256 | while (TRUE) { | |
2257 | if (cur == NULL) { | |
2258 | break; | |
2259 | } | |
2260 | size++; | |
2261 | cur = cur->next; | |
2262 | } | |
2263 | return size; | |
2264 | } | |
2265 | ||
2266 | static const char* | |
2267 | ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { | |
2268 | const char *key = NULL; | |
2269 | ExtensionListEntry *cur = langtag->extensions; | |
2270 | int32_t i = 0; | |
2271 | while (cur) { | |
2272 | if (i == idx) { | |
2273 | key = cur->key; | |
2274 | break; | |
2275 | } | |
2276 | cur = cur->next; | |
2277 | i++; | |
2278 | } | |
2279 | return key; | |
2280 | } | |
2281 | ||
2282 | static const char* | |
2283 | ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { | |
2284 | const char *val = NULL; | |
2285 | ExtensionListEntry *cur = langtag->extensions; | |
2286 | int32_t i = 0; | |
2287 | while (cur) { | |
2288 | if (i == idx) { | |
2289 | val = cur->value; | |
2290 | break; | |
2291 | } | |
2292 | cur = cur->next; | |
2293 | i++; | |
2294 | } | |
2295 | return val; | |
2296 | } | |
2297 | ||
2298 | static int32_t | |
2299 | ultag_getExtensionsSize(const ULanguageTag* langtag) { | |
2300 | int32_t size = 0; | |
2301 | ExtensionListEntry *cur = langtag->extensions; | |
2302 | while (TRUE) { | |
2303 | if (cur == NULL) { | |
2304 | break; | |
2305 | } | |
2306 | size++; | |
2307 | cur = cur->next; | |
2308 | } | |
2309 | return size; | |
2310 | } | |
2311 | ||
2312 | static const char* | |
2313 | ultag_getPrivateUse(const ULanguageTag* langtag) { | |
2314 | return langtag->privateuse; | |
2315 | } | |
2316 | ||
2317 | #if 0 | |
2318 | static const char* | |
2319 | ultag_getGrandfathered(const ULanguageTag* langtag) { | |
2320 | return langtag->grandfathered; | |
2321 | } | |
2322 | #endif | |
2323 | ||
2324 | ||
2325 | /* | |
2326 | * ------------------------------------------------- | |
2327 | * | |
2328 | * Locale/BCP47 conversion APIs, exposed as uloc_* | |
2329 | * | |
2330 | * ------------------------------------------------- | |
2331 | */ | |
51004dcb | 2332 | U_CAPI int32_t U_EXPORT2 |
729e4ab9 A |
2333 | uloc_toLanguageTag(const char* localeID, |
2334 | char* langtag, | |
2335 | int32_t langtagCapacity, | |
2336 | UBool strict, | |
2337 | UErrorCode* status) { | |
2338 | /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ | |
2339 | char canonical[256]; | |
2340 | int32_t reslen = 0; | |
2341 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
2342 | UBool hadPosix = FALSE; | |
2343 | const char* pKeywordStart; | |
2344 | ||
2345 | /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ | |
2346 | canonical[0] = 0; | |
2347 | if (uprv_strlen(localeID) > 0) { | |
2348 | uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); | |
2349 | if (tmpStatus != U_ZERO_ERROR) { | |
2350 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
2351 | return 0; | |
2352 | } | |
2353 | } | |
2354 | ||
2355 | /* For handling special case - private use only tag */ | |
2356 | pKeywordStart = locale_getKeywordsStart(canonical); | |
2357 | if (pKeywordStart == canonical) { | |
2358 | UEnumeration *kwdEnum; | |
2359 | int kwdCnt = 0; | |
2360 | UBool done = FALSE; | |
2361 | ||
2362 | kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus); | |
2363 | if (kwdEnum != NULL) { | |
2364 | kwdCnt = uenum_count(kwdEnum, &tmpStatus); | |
2365 | if (kwdCnt == 1) { | |
2366 | const char *key; | |
2367 | int32_t len = 0; | |
2368 | ||
2369 | key = uenum_next(kwdEnum, &len, &tmpStatus); | |
2370 | if (len == 1 && *key == PRIVATEUSE) { | |
2371 | char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
2372 | buf[0] = PRIVATEUSE; | |
2373 | buf[1] = SEP; | |
2374 | len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus); | |
2375 | if (U_SUCCESS(tmpStatus)) { | |
2376 | if (_isPrivateuseValueSubtags(&buf[2], len)) { | |
2377 | /* return private use only tag */ | |
2378 | reslen = len + 2; | |
2379 | uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity)); | |
2380 | u_terminateChars(langtag, langtagCapacity, reslen, status); | |
2381 | done = TRUE; | |
2382 | } else if (strict) { | |
2383 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
2384 | done = TRUE; | |
2385 | } | |
2386 | /* if not strict mode, then "und" will be returned */ | |
2387 | } else { | |
2388 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
2389 | done = TRUE; | |
2390 | } | |
2391 | } | |
2392 | } | |
2393 | uenum_close(kwdEnum); | |
2394 | if (done) { | |
2395 | return reslen; | |
2396 | } | |
2397 | } | |
2398 | } | |
2399 | ||
2400 | reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); | |
2401 | reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); | |
2402 | reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); | |
2403 | reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); | |
2404 | reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); | |
4388f060 | 2405 | reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); |
729e4ab9 A |
2406 | |
2407 | return reslen; | |
2408 | } | |
2409 | ||
2410 | ||
51004dcb | 2411 | U_CAPI int32_t U_EXPORT2 |
729e4ab9 A |
2412 | uloc_forLanguageTag(const char* langtag, |
2413 | char* localeID, | |
2414 | int32_t localeIDCapacity, | |
2415 | int32_t* parsedLength, | |
2416 | UErrorCode* status) { | |
2417 | ULanguageTag *lt; | |
2418 | int32_t reslen = 0; | |
2419 | const char *subtag, *p; | |
2420 | int32_t len; | |
51004dcb | 2421 | int32_t i, n; |
729e4ab9 A |
2422 | UBool noRegion = TRUE; |
2423 | ||
2424 | lt = ultag_parse(langtag, -1, parsedLength, status); | |
2425 | if (U_FAILURE(*status)) { | |
2426 | return 0; | |
2427 | } | |
2428 | ||
2429 | /* language */ | |
2430 | subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); | |
2431 | if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { | |
2432 | len = (int32_t)uprv_strlen(subtag); | |
2433 | if (len > 0) { | |
2434 | if (reslen < localeIDCapacity) { | |
2435 | uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); | |
2436 | } | |
2437 | reslen += len; | |
2438 | } | |
2439 | } | |
2440 | ||
2441 | /* script */ | |
2442 | subtag = ultag_getScript(lt); | |
2443 | len = (int32_t)uprv_strlen(subtag); | |
2444 | if (len > 0) { | |
2445 | if (reslen < localeIDCapacity) { | |
2446 | *(localeID + reslen) = LOCALE_SEP; | |
2447 | } | |
2448 | reslen++; | |
2449 | ||
2450 | /* write out the script in title case */ | |
2451 | p = subtag; | |
2452 | while (*p) { | |
2453 | if (reslen < localeIDCapacity) { | |
2454 | if (p == subtag) { | |
2455 | *(localeID + reslen) = uprv_toupper(*p); | |
2456 | } else { | |
2457 | *(localeID + reslen) = *p; | |
2458 | } | |
2459 | } | |
2460 | reslen++; | |
2461 | p++; | |
2462 | } | |
2463 | } | |
2464 | ||
2465 | /* region */ | |
2466 | subtag = ultag_getRegion(lt); | |
2467 | len = (int32_t)uprv_strlen(subtag); | |
2468 | if (len > 0) { | |
2469 | if (reslen < localeIDCapacity) { | |
2470 | *(localeID + reslen) = LOCALE_SEP; | |
2471 | } | |
2472 | reslen++; | |
2473 | /* write out the retion in upper case */ | |
2474 | p = subtag; | |
2475 | while (*p) { | |
2476 | if (reslen < localeIDCapacity) { | |
2477 | *(localeID + reslen) = uprv_toupper(*p); | |
2478 | } | |
2479 | reslen++; | |
2480 | p++; | |
2481 | } | |
2482 | noRegion = FALSE; | |
2483 | } | |
2484 | ||
2485 | /* variants */ | |
2486 | n = ultag_getVariantsSize(lt); | |
2487 | if (n > 0) { | |
2488 | if (noRegion) { | |
2489 | if (reslen < localeIDCapacity) { | |
2490 | *(localeID + reslen) = LOCALE_SEP; | |
2491 | } | |
2492 | reslen++; | |
2493 | } | |
2494 | ||
2495 | for (i = 0; i < n; i++) { | |
2496 | subtag = ultag_getVariant(lt, i); | |
2497 | if (reslen < localeIDCapacity) { | |
2498 | *(localeID + reslen) = LOCALE_SEP; | |
2499 | } | |
2500 | reslen++; | |
2501 | /* write out the variant in upper case */ | |
2502 | p = subtag; | |
2503 | while (*p) { | |
2504 | if (reslen < localeIDCapacity) { | |
2505 | *(localeID + reslen) = uprv_toupper(*p); | |
2506 | } | |
2507 | reslen++; | |
2508 | p++; | |
2509 | } | |
2510 | } | |
2511 | } | |
2512 | ||
2513 | /* keywords */ | |
2514 | n = ultag_getExtensionsSize(lt); | |
2515 | subtag = ultag_getPrivateUse(lt); | |
51004dcb A |
2516 | if (n > 0 || uprv_strlen(subtag) > 0) { |
2517 | if (reslen == 0 && n > 0) { | |
729e4ab9 A |
2518 | /* need a language */ |
2519 | if (reslen < localeIDCapacity) { | |
2520 | uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); | |
2521 | } | |
2522 | reslen += LANG_UND_LEN; | |
2523 | } | |
2524 | len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); | |
2525 | reslen += len; | |
2526 | } | |
2527 | ||
2528 | ultag_close(lt); | |
2529 | return u_terminateChars(localeID, localeIDCapacity, reslen, status); | |
2530 | } | |
2531 | ||
2532 |