]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | /* |
2 | ********************************************************************** | |
b331163b | 3 | * Copyright (C) 2009-2014, International Business Machines |
729e4ab9 A |
4 | * Corporation and others. All Rights Reserved. |
5 | ********************************************************************** | |
6 | */ | |
7 | ||
8 | #include "unicode/utypes.h" | |
9 | #include "unicode/ures.h" | |
10 | #include "unicode/putil.h" | |
11 | #include "unicode/uloc.h" | |
12 | #include "ustr_imp.h" | |
13 | #include "cmemory.h" | |
14 | #include "cstring.h" | |
15 | #include "putilimp.h" | |
16 | #include "uinvchar.h" | |
17 | #include "ulocimp.h" | |
51004dcb A |
18 | #include "uassert.h" |
19 | ||
729e4ab9 A |
20 | /* struct holding a single variant */ |
21 | typedef struct VariantListEntry { | |
22 | const char *variant; | |
23 | struct VariantListEntry *next; | |
24 | } VariantListEntry; | |
25 | ||
4388f060 A |
26 | /* struct holding a single attribute value */ |
27 | typedef struct AttributeListEntry { | |
28 | const char *attribute; | |
29 | struct AttributeListEntry *next; | |
30 | } AttributeListEntry; | |
31 | ||
729e4ab9 A |
32 | /* struct holding a single extension */ |
33 | typedef struct ExtensionListEntry { | |
34 | const char *key; | |
35 | const char *value; | |
36 | struct ExtensionListEntry *next; | |
37 | } ExtensionListEntry; | |
38 | ||
39 | #define MAXEXTLANG 3 | |
40 | typedef struct ULanguageTag { | |
41 | char *buf; /* holding parsed subtags */ | |
42 | const char *language; | |
43 | const char *extlang[MAXEXTLANG]; | |
44 | const char *script; | |
45 | const char *region; | |
46 | VariantListEntry *variants; | |
47 | ExtensionListEntry *extensions; | |
48 | const char *privateuse; | |
49 | const char *grandfathered; | |
50 | } ULanguageTag; | |
51 | ||
52 | #define MINLEN 2 | |
53 | #define SEP '-' | |
54 | #define PRIVATEUSE 'x' | |
55 | #define LDMLEXT 'u' | |
56 | ||
57 | #define LOCALE_SEP '_' | |
58 | #define LOCALE_EXT_SEP '@' | |
59 | #define LOCALE_KEYWORD_SEP ';' | |
60 | #define LOCALE_KEY_TYPE_SEP '=' | |
61 | ||
4388f060 | 62 | #define ISALPHA(c) uprv_isASCIILetter(c) |
729e4ab9 A |
63 | #define ISNUMERIC(c) ((c)>='0' && (c)<='9') |
64 | ||
51004dcb A |
65 | static const char EMPTY[] = ""; |
66 | static const char LANG_UND[] = "und"; | |
67 | static const char PRIVATEUSE_KEY[] = "x"; | |
68 | static const char _POSIX[] = "_POSIX"; | |
69 | static const char POSIX_KEY[] = "va"; | |
70 | static const char POSIX_VALUE[] = "posix"; | |
71 | static const char LOCALE_ATTRIBUTE_KEY[] = "attribute"; | |
72 | static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant"; | |
73 | static const char LOCALE_TYPE_YES[] = "yes"; | |
729e4ab9 A |
74 | |
75 | #define LANG_UND_LEN 3 | |
76 | ||
51004dcb | 77 | static const char* const GRANDFATHERED[] = { |
729e4ab9 A |
78 | /* grandfathered preferred */ |
79 | "art-lojban", "jbo", | |
4388f060 A |
80 | "cel-gaulish", "xtg-x-cel-gaulish", |
81 | "en-GB-oed", "en-GB-x-oed", | |
729e4ab9 A |
82 | "i-ami", "ami", |
83 | "i-bnn", "bnn", | |
4388f060 A |
84 | "i-default", "en-x-i-default", |
85 | "i-enochian", "und-x-i-enochian", | |
729e4ab9 A |
86 | "i-hak", "hak", |
87 | "i-klingon", "tlh", | |
88 | "i-lux", "lb", | |
4388f060 | 89 | "i-mingo", "see-x-i-mingo", |
729e4ab9 A |
90 | "i-navajo", "nv", |
91 | "i-pwn", "pwn", | |
92 | "i-tao", "tao", | |
93 | "i-tay", "tay", | |
94 | "i-tsu", "tsu", | |
95 | "no-bok", "nb", | |
96 | "no-nyn", "nn", | |
97 | "sgn-be-fr", "sfb", | |
98 | "sgn-be-nl", "vgt", | |
99 | "sgn-ch-de", "sgg", | |
100 | "zh-guoyu", "cmn", | |
101 | "zh-hakka", "hak", | |
4388f060 | 102 | "zh-min", "nan-x-zh-min", |
729e4ab9 A |
103 | "zh-min-nan", "nan", |
104 | "zh-xiang", "hsn", | |
105 | NULL, NULL | |
106 | }; | |
107 | ||
51004dcb | 108 | static const char DEPRECATEDLANGS[][4] = { |
729e4ab9 A |
109 | /* deprecated new */ |
110 | "iw", "he", | |
111 | "ji", "yi", | |
51004dcb | 112 | "in", "id" |
729e4ab9 A |
113 | }; |
114 | ||
115 | /* | |
116 | * ------------------------------------------------- | |
117 | * | |
118 | * These ultag_ functions may be exposed as APIs later | |
119 | * | |
120 | * ------------------------------------------------- | |
121 | */ | |
122 | ||
123 | static ULanguageTag* | |
124 | ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); | |
125 | ||
126 | static void | |
127 | ultag_close(ULanguageTag* langtag); | |
128 | ||
129 | static const char* | |
130 | ultag_getLanguage(const ULanguageTag* langtag); | |
131 | ||
132 | #if 0 | |
133 | static const char* | |
134 | ultag_getJDKLanguage(const ULanguageTag* langtag); | |
135 | #endif | |
136 | ||
137 | static const char* | |
138 | ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); | |
139 | ||
140 | static int32_t | |
141 | ultag_getExtlangSize(const ULanguageTag* langtag); | |
142 | ||
143 | static const char* | |
144 | ultag_getScript(const ULanguageTag* langtag); | |
145 | ||
146 | static const char* | |
147 | ultag_getRegion(const ULanguageTag* langtag); | |
148 | ||
149 | static const char* | |
150 | ultag_getVariant(const ULanguageTag* langtag, int32_t idx); | |
151 | ||
152 | static int32_t | |
153 | ultag_getVariantsSize(const ULanguageTag* langtag); | |
154 | ||
155 | static const char* | |
156 | ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); | |
157 | ||
158 | static const char* | |
159 | ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); | |
160 | ||
161 | static int32_t | |
162 | ultag_getExtensionsSize(const ULanguageTag* langtag); | |
163 | ||
164 | static const char* | |
165 | ultag_getPrivateUse(const ULanguageTag* langtag); | |
166 | ||
167 | #if 0 | |
168 | static const char* | |
169 | ultag_getGrandfathered(const ULanguageTag* langtag); | |
170 | #endif | |
171 | ||
172 | /* | |
173 | * ------------------------------------------------- | |
174 | * | |
175 | * Language subtag syntax validation functions | |
176 | * | |
177 | * ------------------------------------------------- | |
178 | */ | |
179 | ||
180 | static UBool | |
181 | _isAlphaString(const char* s, int32_t len) { | |
182 | int32_t i; | |
183 | for (i = 0; i < len; i++) { | |
184 | if (!ISALPHA(*(s + i))) { | |
185 | return FALSE; | |
186 | } | |
187 | } | |
188 | return TRUE; | |
189 | } | |
190 | ||
191 | static UBool | |
192 | _isNumericString(const char* s, int32_t len) { | |
193 | int32_t i; | |
194 | for (i = 0; i < len; i++) { | |
195 | if (!ISNUMERIC(*(s + i))) { | |
196 | return FALSE; | |
197 | } | |
198 | } | |
199 | return TRUE; | |
200 | } | |
201 | ||
202 | static UBool | |
203 | _isAlphaNumericString(const char* s, int32_t len) { | |
204 | int32_t i; | |
205 | for (i = 0; i < len; i++) { | |
206 | if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { | |
207 | return FALSE; | |
208 | } | |
209 | } | |
210 | return TRUE; | |
211 | } | |
212 | ||
213 | static UBool | |
214 | _isLanguageSubtag(const char* s, int32_t len) { | |
215 | /* | |
216 | * language = 2*3ALPHA ; shortest ISO 639 code | |
217 | * ["-" extlang] ; sometimes followed by | |
218 | * ; extended language subtags | |
219 | * / 4ALPHA ; or reserved for future use | |
220 | * / 5*8ALPHA ; or registered language subtag | |
221 | */ | |
222 | if (len < 0) { | |
223 | len = (int32_t)uprv_strlen(s); | |
224 | } | |
225 | if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { | |
226 | return TRUE; | |
227 | } | |
228 | return FALSE; | |
229 | } | |
230 | ||
231 | static UBool | |
232 | _isExtlangSubtag(const char* s, int32_t len) { | |
233 | /* | |
234 | * extlang = 3ALPHA ; selected ISO 639 codes | |
235 | * *2("-" 3ALPHA) ; permanently reserved | |
236 | */ | |
237 | if (len < 0) { | |
238 | len = (int32_t)uprv_strlen(s); | |
239 | } | |
240 | if (len == 3 && _isAlphaString(s, len)) { | |
241 | return TRUE; | |
242 | } | |
243 | return FALSE; | |
244 | } | |
245 | ||
246 | static UBool | |
247 | _isScriptSubtag(const char* s, int32_t len) { | |
248 | /* | |
249 | * script = 4ALPHA ; ISO 15924 code | |
250 | */ | |
251 | if (len < 0) { | |
252 | len = (int32_t)uprv_strlen(s); | |
253 | } | |
254 | if (len == 4 && _isAlphaString(s, len)) { | |
255 | return TRUE; | |
256 | } | |
257 | return FALSE; | |
258 | } | |
259 | ||
260 | static UBool | |
261 | _isRegionSubtag(const char* s, int32_t len) { | |
262 | /* | |
263 | * region = 2ALPHA ; ISO 3166-1 code | |
264 | * / 3DIGIT ; UN M.49 code | |
265 | */ | |
266 | if (len < 0) { | |
267 | len = (int32_t)uprv_strlen(s); | |
268 | } | |
269 | if (len == 2 && _isAlphaString(s, len)) { | |
270 | return TRUE; | |
271 | } | |
272 | if (len == 3 && _isNumericString(s, len)) { | |
273 | return TRUE; | |
274 | } | |
275 | return FALSE; | |
276 | } | |
277 | ||
278 | static UBool | |
279 | _isVariantSubtag(const char* s, int32_t len) { | |
280 | /* | |
281 | * variant = 5*8alphanum ; registered variants | |
282 | * / (DIGIT 3alphanum) | |
283 | */ | |
284 | if (len < 0) { | |
285 | len = (int32_t)uprv_strlen(s); | |
286 | } | |
4388f060 | 287 | if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) { |
729e4ab9 A |
288 | return TRUE; |
289 | } | |
290 | if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { | |
291 | return TRUE; | |
292 | } | |
293 | return FALSE; | |
294 | } | |
295 | ||
4388f060 A |
296 | static UBool |
297 | _isPrivateuseVariantSubtag(const char* s, int32_t len) { | |
298 | /* | |
299 | * variant = 1*8alphanum ; registered variants | |
300 | * / (DIGIT 3alphanum) | |
301 | */ | |
302 | if (len < 0) { | |
303 | len = (int32_t)uprv_strlen(s); | |
304 | } | |
305 | if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { | |
306 | return TRUE; | |
307 | } | |
308 | return FALSE; | |
309 | } | |
310 | ||
729e4ab9 A |
311 | static UBool |
312 | _isExtensionSingleton(const char* s, int32_t len) { | |
313 | /* | |
314 | * extension = singleton 1*("-" (2*8alphanum)) | |
315 | */ | |
316 | if (len < 0) { | |
317 | len = (int32_t)uprv_strlen(s); | |
318 | } | |
319 | if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { | |
320 | return TRUE; | |
321 | } | |
322 | return FALSE; | |
323 | } | |
324 | ||
325 | static UBool | |
326 | _isExtensionSubtag(const char* s, int32_t len) { | |
327 | /* | |
328 | * extension = singleton 1*("-" (2*8alphanum)) | |
329 | */ | |
330 | if (len < 0) { | |
331 | len = (int32_t)uprv_strlen(s); | |
332 | } | |
333 | if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { | |
334 | return TRUE; | |
335 | } | |
336 | return FALSE; | |
337 | } | |
338 | ||
339 | static UBool | |
340 | _isExtensionSubtags(const char* s, int32_t len) { | |
341 | const char *p = s; | |
342 | const char *pSubtag = NULL; | |
343 | ||
344 | if (len < 0) { | |
345 | len = (int32_t)uprv_strlen(s); | |
346 | } | |
347 | ||
348 | while ((p - s) < len) { | |
349 | if (*p == SEP) { | |
350 | if (pSubtag == NULL) { | |
351 | return FALSE; | |
352 | } | |
353 | if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { | |
354 | return FALSE; | |
355 | } | |
356 | pSubtag = NULL; | |
357 | } else if (pSubtag == NULL) { | |
358 | pSubtag = p; | |
359 | } | |
360 | p++; | |
361 | } | |
362 | if (pSubtag == NULL) { | |
363 | return FALSE; | |
364 | } | |
365 | return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); | |
366 | } | |
367 | ||
368 | static UBool | |
369 | _isPrivateuseValueSubtag(const char* s, int32_t len) { | |
370 | /* | |
371 | * privateuse = "x" 1*("-" (1*8alphanum)) | |
372 | */ | |
373 | if (len < 0) { | |
374 | len = (int32_t)uprv_strlen(s); | |
375 | } | |
376 | if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { | |
377 | return TRUE; | |
378 | } | |
379 | return FALSE; | |
380 | } | |
381 | ||
382 | static UBool | |
383 | _isPrivateuseValueSubtags(const char* s, int32_t len) { | |
384 | const char *p = s; | |
385 | const char *pSubtag = NULL; | |
386 | ||
387 | if (len < 0) { | |
388 | len = (int32_t)uprv_strlen(s); | |
389 | } | |
390 | ||
391 | while ((p - s) < len) { | |
392 | if (*p == SEP) { | |
393 | if (pSubtag == NULL) { | |
394 | return FALSE; | |
395 | } | |
396 | if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { | |
397 | return FALSE; | |
398 | } | |
399 | pSubtag = NULL; | |
400 | } else if (pSubtag == NULL) { | |
401 | pSubtag = p; | |
402 | } | |
403 | p++; | |
404 | } | |
405 | if (pSubtag == NULL) { | |
406 | return FALSE; | |
407 | } | |
408 | return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); | |
409 | } | |
410 | ||
b331163b A |
411 | U_CFUNC UBool |
412 | ultag_isUnicodeLocaleKey(const char* s, int32_t len) { | |
729e4ab9 A |
413 | if (len < 0) { |
414 | len = (int32_t)uprv_strlen(s); | |
415 | } | |
416 | if (len == 2 && _isAlphaNumericString(s, len)) { | |
417 | return TRUE; | |
418 | } | |
419 | return FALSE; | |
420 | } | |
421 | ||
b331163b A |
422 | U_CFUNC UBool |
423 | ultag_isUnicodeLocaleType(const char*s, int32_t len) { | |
424 | const char* p; | |
425 | int32_t subtagLen = 0; | |
426 | ||
729e4ab9 A |
427 | if (len < 0) { |
428 | len = (int32_t)uprv_strlen(s); | |
429 | } | |
b331163b A |
430 | |
431 | for (p = s; len > 0; p++, len--) { | |
432 | if (*p == SEP) { | |
433 | if (subtagLen < 3) { | |
434 | return FALSE; | |
435 | } | |
436 | subtagLen = 0; | |
437 | } else if (ISALPHA(*p) || ISNUMERIC(*p)) { | |
438 | subtagLen++; | |
439 | if (subtagLen > 8) { | |
440 | return FALSE; | |
441 | } | |
442 | } else { | |
443 | return FALSE; | |
444 | } | |
729e4ab9 | 445 | } |
729e4ab9 | 446 | |
b331163b A |
447 | return (subtagLen >= 3); |
448 | } | |
729e4ab9 A |
449 | /* |
450 | * ------------------------------------------------- | |
451 | * | |
452 | * Helper functions | |
453 | * | |
454 | * ------------------------------------------------- | |
455 | */ | |
456 | ||
457 | static UBool | |
458 | _addVariantToList(VariantListEntry **first, VariantListEntry *var) { | |
459 | UBool bAdded = TRUE; | |
460 | ||
461 | if (*first == NULL) { | |
462 | var->next = NULL; | |
463 | *first = var; | |
464 | } else { | |
465 | VariantListEntry *prev, *cur; | |
466 | int32_t cmp; | |
467 | ||
4388f060 | 468 | /* variants order should be preserved */ |
729e4ab9 A |
469 | prev = NULL; |
470 | cur = *first; | |
471 | while (TRUE) { | |
472 | if (cur == NULL) { | |
473 | prev->next = var; | |
474 | var->next = NULL; | |
475 | break; | |
476 | } | |
4388f060 A |
477 | |
478 | /* Checking for duplicate variant */ | |
729e4ab9 | 479 | cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); |
4388f060 A |
480 | if (cmp == 0) { |
481 | /* duplicated variant */ | |
482 | bAdded = FALSE; | |
483 | break; | |
484 | } | |
485 | prev = cur; | |
486 | cur = cur->next; | |
487 | } | |
488 | } | |
489 | ||
490 | return bAdded; | |
491 | } | |
492 | ||
493 | static UBool | |
494 | _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { | |
495 | UBool bAdded = TRUE; | |
496 | ||
497 | if (*first == NULL) { | |
498 | attr->next = NULL; | |
499 | *first = attr; | |
500 | } else { | |
501 | AttributeListEntry *prev, *cur; | |
502 | int32_t cmp; | |
503 | ||
504 | /* reorder variants in alphabetical order */ | |
505 | prev = NULL; | |
506 | cur = *first; | |
507 | while (TRUE) { | |
508 | if (cur == NULL) { | |
509 | prev->next = attr; | |
510 | attr->next = NULL; | |
511 | break; | |
512 | } | |
513 | cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); | |
729e4ab9 A |
514 | if (cmp < 0) { |
515 | if (prev == NULL) { | |
4388f060 | 516 | *first = attr; |
729e4ab9 | 517 | } else { |
4388f060 | 518 | prev->next = attr; |
729e4ab9 | 519 | } |
4388f060 | 520 | attr->next = cur; |
729e4ab9 A |
521 | break; |
522 | } | |
523 | if (cmp == 0) { | |
524 | /* duplicated variant */ | |
525 | bAdded = FALSE; | |
526 | break; | |
527 | } | |
528 | prev = cur; | |
529 | cur = cur->next; | |
530 | } | |
531 | } | |
532 | ||
533 | return bAdded; | |
534 | } | |
535 | ||
536 | ||
537 | static UBool | |
538 | _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { | |
539 | UBool bAdded = TRUE; | |
540 | ||
541 | if (*first == NULL) { | |
542 | ext->next = NULL; | |
543 | *first = ext; | |
544 | } else { | |
545 | ExtensionListEntry *prev, *cur; | |
546 | int32_t cmp; | |
547 | ||
548 | /* reorder variants in alphabetical order */ | |
549 | prev = NULL; | |
550 | cur = *first; | |
551 | while (TRUE) { | |
552 | if (cur == NULL) { | |
553 | prev->next = ext; | |
554 | ext->next = NULL; | |
555 | break; | |
556 | } | |
557 | if (localeToBCP) { | |
558 | /* special handling for locale to bcp conversion */ | |
559 | int32_t len, curlen; | |
560 | ||
561 | len = (int32_t)uprv_strlen(ext->key); | |
562 | curlen = (int32_t)uprv_strlen(cur->key); | |
563 | ||
564 | if (len == 1 && curlen == 1) { | |
565 | if (*(ext->key) == *(cur->key)) { | |
566 | cmp = 0; | |
567 | } else if (*(ext->key) == PRIVATEUSE) { | |
568 | cmp = 1; | |
569 | } else if (*(cur->key) == PRIVATEUSE) { | |
570 | cmp = -1; | |
571 | } else { | |
572 | cmp = *(ext->key) - *(cur->key); | |
573 | } | |
574 | } else if (len == 1) { | |
575 | cmp = *(ext->key) - LDMLEXT; | |
576 | } else if (curlen == 1) { | |
577 | cmp = LDMLEXT - *(cur->key); | |
578 | } else { | |
579 | cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); | |
580 | } | |
581 | } else { | |
582 | cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); | |
583 | } | |
584 | if (cmp < 0) { | |
585 | if (prev == NULL) { | |
586 | *first = ext; | |
587 | } else { | |
588 | prev->next = ext; | |
589 | } | |
590 | ext->next = cur; | |
591 | break; | |
592 | } | |
593 | if (cmp == 0) { | |
594 | /* duplicated extension key */ | |
595 | bAdded = FALSE; | |
596 | break; | |
597 | } | |
598 | prev = cur; | |
599 | cur = cur->next; | |
600 | } | |
601 | } | |
602 | ||
603 | return bAdded; | |
604 | } | |
605 | ||
606 | static void | |
607 | _initializeULanguageTag(ULanguageTag* langtag) { | |
608 | int32_t i; | |
609 | ||
610 | langtag->buf = NULL; | |
611 | ||
612 | langtag->language = EMPTY; | |
613 | for (i = 0; i < MAXEXTLANG; i++) { | |
614 | langtag->extlang[i] = NULL; | |
615 | } | |
616 | ||
617 | langtag->script = EMPTY; | |
618 | langtag->region = EMPTY; | |
619 | ||
620 | langtag->variants = NULL; | |
621 | langtag->extensions = NULL; | |
622 | ||
623 | langtag->grandfathered = EMPTY; | |
624 | langtag->privateuse = EMPTY; | |
625 | } | |
626 | ||
729e4ab9 A |
627 | static int32_t |
628 | _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { | |
629 | char buf[ULOC_LANG_CAPACITY]; | |
630 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
631 | int32_t len, i; | |
632 | int32_t reslen = 0; | |
633 | ||
634 | if (U_FAILURE(*status)) { | |
635 | return 0; | |
636 | } | |
637 | ||
638 | len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); | |
639 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { | |
640 | if (strict) { | |
641 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
642 | return 0; | |
643 | } | |
644 | len = 0; | |
645 | } | |
646 | ||
647 | /* Note: returned language code is in lower case letters */ | |
648 | ||
649 | if (len == 0) { | |
650 | if (reslen < capacity) { | |
651 | uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); | |
652 | } | |
653 | reslen += LANG_UND_LEN; | |
654 | } else if (!_isLanguageSubtag(buf, len)) { | |
655 | /* invalid language code */ | |
656 | if (strict) { | |
657 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
658 | return 0; | |
659 | } | |
660 | if (reslen < capacity) { | |
661 | uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); | |
662 | } | |
663 | reslen += LANG_UND_LEN; | |
664 | } else { | |
665 | /* resolve deprecated */ | |
b331163b | 666 | for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) { |
729e4ab9 A |
667 | if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { |
668 | uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); | |
669 | len = (int32_t)uprv_strlen(buf); | |
670 | break; | |
671 | } | |
672 | } | |
673 | if (reslen < capacity) { | |
674 | uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); | |
675 | } | |
676 | reslen += len; | |
677 | } | |
678 | u_terminateChars(appendAt, capacity, reslen, status); | |
679 | return reslen; | |
680 | } | |
681 | ||
682 | static int32_t | |
683 | _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { | |
684 | char buf[ULOC_SCRIPT_CAPACITY]; | |
685 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
686 | int32_t len; | |
687 | int32_t reslen = 0; | |
688 | ||
689 | if (U_FAILURE(*status)) { | |
690 | return 0; | |
691 | } | |
692 | ||
693 | len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); | |
694 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { | |
695 | if (strict) { | |
696 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
697 | } | |
698 | return 0; | |
699 | } | |
700 | ||
701 | if (len > 0) { | |
702 | if (!_isScriptSubtag(buf, len)) { | |
703 | /* invalid script code */ | |
704 | if (strict) { | |
705 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
706 | } | |
707 | return 0; | |
708 | } else { | |
709 | if (reslen < capacity) { | |
710 | *(appendAt + reslen) = SEP; | |
711 | } | |
712 | reslen++; | |
713 | ||
714 | if (reslen < capacity) { | |
715 | uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); | |
716 | } | |
717 | reslen += len; | |
718 | } | |
719 | } | |
720 | u_terminateChars(appendAt, capacity, reslen, status); | |
721 | return reslen; | |
722 | } | |
723 | ||
724 | static int32_t | |
725 | _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { | |
726 | char buf[ULOC_COUNTRY_CAPACITY]; | |
727 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
728 | int32_t len; | |
729 | int32_t reslen = 0; | |
730 | ||
731 | if (U_FAILURE(*status)) { | |
732 | return 0; | |
733 | } | |
734 | ||
735 | len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); | |
736 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { | |
737 | if (strict) { | |
738 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
739 | } | |
740 | return 0; | |
741 | } | |
742 | ||
743 | if (len > 0) { | |
744 | if (!_isRegionSubtag(buf, len)) { | |
745 | /* invalid region code */ | |
746 | if (strict) { | |
747 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
748 | } | |
749 | return 0; | |
750 | } else { | |
751 | if (reslen < capacity) { | |
752 | *(appendAt + reslen) = SEP; | |
753 | } | |
754 | reslen++; | |
755 | ||
756 | if (reslen < capacity) { | |
757 | uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); | |
758 | } | |
759 | reslen += len; | |
760 | } | |
761 | } | |
762 | u_terminateChars(appendAt, capacity, reslen, status); | |
763 | return reslen; | |
764 | } | |
765 | ||
766 | static int32_t | |
767 | _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { | |
768 | char buf[ULOC_FULLNAME_CAPACITY]; | |
769 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
770 | int32_t len, i; | |
771 | int32_t reslen = 0; | |
772 | ||
773 | if (U_FAILURE(*status)) { | |
774 | return 0; | |
775 | } | |
776 | ||
777 | len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); | |
778 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { | |
779 | if (strict) { | |
780 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
781 | } | |
782 | return 0; | |
783 | } | |
784 | ||
785 | if (len > 0) { | |
786 | char *p, *pVar; | |
787 | UBool bNext = TRUE; | |
788 | VariantListEntry *var; | |
789 | VariantListEntry *varFirst = NULL; | |
790 | ||
791 | pVar = NULL; | |
792 | p = buf; | |
793 | while (bNext) { | |
794 | if (*p == SEP || *p == LOCALE_SEP || *p == 0) { | |
795 | if (*p == 0) { | |
796 | bNext = FALSE; | |
797 | } else { | |
798 | *p = 0; /* terminate */ | |
799 | } | |
800 | if (pVar == NULL) { | |
801 | if (strict) { | |
802 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
803 | break; | |
804 | } | |
805 | /* ignore empty variant */ | |
806 | } else { | |
807 | /* ICU uses upper case letters for variants, but | |
808 | the canonical format is lowercase in BCP47 */ | |
809 | for (i = 0; *(pVar + i) != 0; i++) { | |
810 | *(pVar + i) = uprv_tolower(*(pVar + i)); | |
811 | } | |
812 | ||
813 | /* validate */ | |
814 | if (_isVariantSubtag(pVar, -1)) { | |
4388f060 | 815 | if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) { |
729e4ab9 | 816 | /* emit the variant to the list */ |
51004dcb | 817 | var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); |
729e4ab9 A |
818 | if (var == NULL) { |
819 | *status = U_MEMORY_ALLOCATION_ERROR; | |
820 | break; | |
821 | } | |
822 | var->variant = pVar; | |
823 | if (!_addVariantToList(&varFirst, var)) { | |
824 | /* duplicated variant */ | |
825 | uprv_free(var); | |
826 | if (strict) { | |
827 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
828 | break; | |
829 | } | |
830 | } | |
831 | } else { | |
832 | /* Special handling for POSIX variant, need to remember that we had it and then */ | |
833 | /* treat it like an extension later. */ | |
834 | *hadPosix = TRUE; | |
835 | } | |
836 | } else if (strict) { | |
837 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
838 | break; | |
4388f060 A |
839 | } else if (_isPrivateuseValueSubtag(pVar, -1)) { |
840 | /* Handle private use subtags separately */ | |
841 | break; | |
729e4ab9 A |
842 | } |
843 | } | |
844 | /* reset variant starting position */ | |
845 | pVar = NULL; | |
846 | } else if (pVar == NULL) { | |
847 | pVar = p; | |
848 | } | |
849 | p++; | |
850 | } | |
851 | ||
852 | if (U_SUCCESS(*status)) { | |
853 | if (varFirst != NULL) { | |
854 | int32_t varLen; | |
855 | ||
4388f060 | 856 | /* write out validated/normalized variants to the target */ |
729e4ab9 A |
857 | var = varFirst; |
858 | while (var != NULL) { | |
859 | if (reslen < capacity) { | |
860 | *(appendAt + reslen) = SEP; | |
861 | } | |
862 | reslen++; | |
863 | varLen = (int32_t)uprv_strlen(var->variant); | |
864 | if (reslen < capacity) { | |
865 | uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); | |
866 | } | |
867 | reslen += varLen; | |
868 | var = var->next; | |
869 | } | |
870 | } | |
871 | } | |
872 | ||
873 | /* clean up */ | |
874 | var = varFirst; | |
875 | while (var != NULL) { | |
876 | VariantListEntry *tmpVar = var->next; | |
877 | uprv_free(var); | |
878 | var = tmpVar; | |
879 | } | |
880 | ||
881 | if (U_FAILURE(*status)) { | |
882 | return 0; | |
883 | } | |
884 | } | |
885 | ||
886 | u_terminateChars(appendAt, capacity, reslen, status); | |
887 | return reslen; | |
888 | } | |
889 | ||
890 | static int32_t | |
891 | _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { | |
892 | char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
4388f060 A |
893 | char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; |
894 | int32_t attrBufLength = 0; | |
895 | UBool isAttribute = FALSE; | |
729e4ab9 A |
896 | UEnumeration *keywordEnum = NULL; |
897 | int32_t reslen = 0; | |
898 | ||
899 | keywordEnum = uloc_openKeywords(localeID, status); | |
900 | if (U_FAILURE(*status) && !hadPosix) { | |
901 | uenum_close(keywordEnum); | |
902 | return 0; | |
903 | } | |
904 | if (keywordEnum != NULL || hadPosix) { | |
905 | /* reorder extensions */ | |
906 | int32_t len; | |
907 | const char *key; | |
908 | ExtensionListEntry *firstExt = NULL; | |
909 | ExtensionListEntry *ext; | |
4388f060 A |
910 | AttributeListEntry *firstAttr = NULL; |
911 | AttributeListEntry *attr; | |
912 | char *attrValue; | |
729e4ab9 A |
913 | char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
914 | char *pExtBuf = extBuf; | |
915 | int32_t extBufCapacity = sizeof(extBuf); | |
916 | const char *bcpKey, *bcpValue; | |
917 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
918 | int32_t keylen; | |
b331163b | 919 | UBool isBcpUExt; |
729e4ab9 A |
920 | |
921 | while (TRUE) { | |
4388f060 | 922 | isAttribute = FALSE; |
729e4ab9 A |
923 | key = uenum_next(keywordEnum, NULL, status); |
924 | if (key == NULL) { | |
925 | break; | |
926 | } | |
927 | len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); | |
b331163b A |
928 | /* buf must be null-terminated */ |
929 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { | |
729e4ab9 A |
930 | if (strict) { |
931 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
932 | break; | |
933 | } | |
934 | /* ignore this keyword */ | |
935 | tmpStatus = U_ZERO_ERROR; | |
936 | continue; | |
937 | } | |
938 | ||
939 | keylen = (int32_t)uprv_strlen(key); | |
b331163b | 940 | isBcpUExt = (keylen > 1); |
729e4ab9 | 941 | |
4388f060 A |
942 | /* special keyword used for representing Unicode locale attributes */ |
943 | if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { | |
944 | isAttribute = TRUE; | |
945 | if (len > 0) { | |
946 | int32_t i = 0; | |
947 | while (TRUE) { | |
948 | attrBufLength = 0; | |
949 | for (; i < len; i++) { | |
950 | if (buf[i] != '-') { | |
951 | attrBuf[attrBufLength++] = buf[i]; | |
952 | } else { | |
953 | i++; | |
954 | break; | |
955 | } | |
956 | } | |
957 | if (attrBufLength > 0) { | |
958 | attrBuf[attrBufLength] = 0; | |
959 | ||
960 | } else if (i >= len){ | |
961 | break; | |
962 | } | |
963 | ||
964 | /* create AttributeListEntry */ | |
51004dcb | 965 | attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); |
4388f060 A |
966 | if (attr == NULL) { |
967 | *status = U_MEMORY_ALLOCATION_ERROR; | |
968 | break; | |
969 | } | |
51004dcb | 970 | attrValue = (char*)uprv_malloc(attrBufLength + 1); |
4388f060 A |
971 | if (attrValue == NULL) { |
972 | *status = U_MEMORY_ALLOCATION_ERROR; | |
973 | break; | |
974 | } | |
975 | uprv_strcpy(attrValue, attrBuf); | |
976 | attr->attribute = attrValue; | |
977 | ||
978 | if (!_addAttributeToList(&firstAttr, attr)) { | |
979 | uprv_free(attr); | |
980 | uprv_free(attrValue); | |
981 | if (strict) { | |
982 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
983 | break; | |
984 | } | |
985 | } | |
986 | } | |
987 | } | |
b331163b A |
988 | } else if (isBcpUExt) { |
989 | bcpKey = uloc_toUnicodeLocaleKey(key); | |
990 | if (bcpKey == NULL) { | |
729e4ab9 A |
991 | if (strict) { |
992 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
993 | break; | |
994 | } | |
729e4ab9 A |
995 | continue; |
996 | } | |
997 | ||
b331163b A |
998 | /* we've checked buf is null-terminated above */ |
999 | bcpValue = uloc_toUnicodeLocaleType(key, buf); | |
1000 | if (bcpValue == NULL) { | |
729e4ab9 A |
1001 | if (strict) { |
1002 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1003 | break; | |
1004 | } | |
729e4ab9 A |
1005 | continue; |
1006 | } | |
b331163b A |
1007 | if (bcpValue == buf) { |
1008 | /* | |
1009 | When uloc_toUnicodeLocaleType(key, buf) returns the | |
1010 | input value as is, the value is well-formed, but has | |
1011 | no known mapping. This implementation normalizes the | |
1012 | the value to lower case | |
1013 | */ | |
1014 | int32_t bcpValueLen = uprv_strlen(bcpValue); | |
1015 | if (bcpValueLen < extBufCapacity) { | |
1016 | uprv_strcpy(pExtBuf, bcpValue); | |
1017 | T_CString_toLowerCase(pExtBuf); | |
1018 | ||
1019 | bcpValue = pExtBuf; | |
1020 | ||
1021 | pExtBuf += (bcpValueLen + 1); | |
1022 | extBufCapacity -= (bcpValueLen + 1); | |
1023 | } else { | |
1024 | if (strict) { | |
1025 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1026 | break; | |
1027 | } | |
1028 | continue; | |
1029 | } | |
1030 | } | |
729e4ab9 A |
1031 | } else { |
1032 | if (*key == PRIVATEUSE) { | |
1033 | if (!_isPrivateuseValueSubtags(buf, len)) { | |
1034 | if (strict) { | |
1035 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1036 | break; | |
1037 | } | |
1038 | continue; | |
1039 | } | |
1040 | } else { | |
1041 | if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { | |
1042 | if (strict) { | |
1043 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1044 | break; | |
1045 | } | |
1046 | continue; | |
1047 | } | |
1048 | } | |
1049 | bcpKey = key; | |
1050 | if ((len + 1) < extBufCapacity) { | |
1051 | uprv_memcpy(pExtBuf, buf, len); | |
1052 | bcpValue = pExtBuf; | |
1053 | ||
1054 | pExtBuf += len; | |
1055 | ||
1056 | *pExtBuf = 0; | |
1057 | pExtBuf++; | |
1058 | ||
1059 | extBufCapacity -= (len + 1); | |
1060 | } else { | |
1061 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1062 | break; | |
1063 | } | |
1064 | } | |
1065 | ||
4388f060 A |
1066 | if (!isAttribute) { |
1067 | /* create ExtensionListEntry */ | |
51004dcb | 1068 | ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
4388f060 A |
1069 | if (ext == NULL) { |
1070 | *status = U_MEMORY_ALLOCATION_ERROR; | |
729e4ab9 A |
1071 | break; |
1072 | } | |
4388f060 A |
1073 | ext->key = bcpKey; |
1074 | ext->value = bcpValue; | |
1075 | ||
1076 | if (!_addExtensionToList(&firstExt, ext, TRUE)) { | |
1077 | uprv_free(ext); | |
1078 | if (strict) { | |
1079 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1080 | break; | |
1081 | } | |
1082 | } | |
729e4ab9 A |
1083 | } |
1084 | } | |
1085 | ||
1086 | /* Special handling for POSIX variant - add the keywords for POSIX */ | |
1087 | if (hadPosix) { | |
1088 | /* create ExtensionListEntry for POSIX */ | |
51004dcb | 1089 | ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
729e4ab9 A |
1090 | if (ext == NULL) { |
1091 | *status = U_MEMORY_ALLOCATION_ERROR; | |
4388f060 | 1092 | goto cleanup; |
729e4ab9 A |
1093 | } |
1094 | ext->key = POSIX_KEY; | |
1095 | ext->value = POSIX_VALUE; | |
1096 | ||
1097 | if (!_addExtensionToList(&firstExt, ext, TRUE)) { | |
1098 | uprv_free(ext); | |
1099 | } | |
1100 | } | |
1101 | ||
4388f060 | 1102 | if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { |
729e4ab9 A |
1103 | UBool startLDMLExtension = FALSE; |
1104 | ||
4388f060 | 1105 | attr = firstAttr; |
729e4ab9 | 1106 | ext = firstExt; |
4388f060 A |
1107 | do { |
1108 | if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) { | |
1109 | /* write LDML singleton extension */ | |
1110 | if (reslen < capacity) { | |
1111 | *(appendAt + reslen) = SEP; | |
1112 | } | |
1113 | reslen++; | |
1114 | if (reslen < capacity) { | |
1115 | *(appendAt + reslen) = LDMLEXT; | |
1116 | } | |
1117 | reslen++; | |
1118 | ||
1119 | startLDMLExtension = TRUE; | |
1120 | } | |
1121 | ||
1122 | /* write out the sorted BCP47 attributes, extensions and private use */ | |
1123 | if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) { | |
729e4ab9 A |
1124 | if (reslen < capacity) { |
1125 | *(appendAt + reslen) = SEP; | |
1126 | } | |
1127 | reslen++; | |
4388f060 | 1128 | len = (int32_t)uprv_strlen(ext->key); |
729e4ab9 | 1129 | if (reslen < capacity) { |
4388f060 A |
1130 | uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); |
1131 | } | |
1132 | reslen += len; | |
1133 | if (reslen < capacity) { | |
1134 | *(appendAt + reslen) = SEP; | |
729e4ab9 A |
1135 | } |
1136 | reslen++; | |
4388f060 A |
1137 | len = (int32_t)uprv_strlen(ext->value); |
1138 | if (reslen < capacity) { | |
1139 | uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); | |
1140 | } | |
1141 | reslen += len; | |
729e4ab9 | 1142 | |
4388f060 A |
1143 | ext = ext->next; |
1144 | } else if (attr) { | |
1145 | /* write the value for the attributes */ | |
1146 | if (reslen < capacity) { | |
1147 | *(appendAt + reslen) = SEP; | |
1148 | } | |
1149 | reslen++; | |
1150 | len = (int32_t)uprv_strlen(attr->attribute); | |
1151 | if (reslen < capacity) { | |
1152 | uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); | |
1153 | } | |
1154 | reslen += len; | |
729e4ab9 | 1155 | |
4388f060 A |
1156 | attr = attr->next; |
1157 | } | |
1158 | } while (attr != NULL || ext != NULL); | |
729e4ab9 | 1159 | } |
4388f060 | 1160 | cleanup: |
729e4ab9 A |
1161 | /* clean up */ |
1162 | ext = firstExt; | |
1163 | while (ext != NULL) { | |
1164 | ExtensionListEntry *tmpExt = ext->next; | |
1165 | uprv_free(ext); | |
1166 | ext = tmpExt; | |
1167 | } | |
1168 | ||
4388f060 A |
1169 | attr = firstAttr; |
1170 | while (attr != NULL) { | |
1171 | AttributeListEntry *tmpAttr = attr->next; | |
1172 | char *pValue = (char *)attr->attribute; | |
1173 | uprv_free(pValue); | |
1174 | uprv_free(attr); | |
1175 | attr = tmpAttr; | |
1176 | } | |
1177 | ||
729e4ab9 A |
1178 | uenum_close(keywordEnum); |
1179 | ||
1180 | if (U_FAILURE(*status)) { | |
1181 | return 0; | |
1182 | } | |
1183 | } | |
1184 | ||
1185 | return u_terminateChars(appendAt, capacity, reslen, status); | |
1186 | } | |
1187 | ||
1188 | /** | |
1189 | * Append keywords parsed from LDML extension value | |
1190 | * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} | |
1191 | * Note: char* buf is used for storing keywords | |
1192 | */ | |
1193 | static void | |
1194 | _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { | |
51004dcb A |
1195 | const char *pTag; /* beginning of current subtag */ |
1196 | const char *pKwds; /* beginning of key-type pairs */ | |
1197 | UBool variantExists = *posixVariant; | |
1198 | ||
1199 | ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ | |
729e4ab9 | 1200 | ExtensionListEntry *kwd, *nextKwd; |
51004dcb A |
1201 | |
1202 | AttributeListEntry *attrFirst = NULL; /* first attribute */ | |
1203 | AttributeListEntry *attr, *nextAttr; | |
1204 | ||
1205 | int32_t len; | |
729e4ab9 | 1206 | int32_t bufIdx = 0; |
51004dcb A |
1207 | |
1208 | char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
1209 | int32_t attrBufIdx = 0; | |
4388f060 A |
1210 | |
1211 | /* Reset the posixVariant value */ | |
1212 | *posixVariant = FALSE; | |
729e4ab9 | 1213 | |
51004dcb A |
1214 | pTag = ldmlext; |
1215 | pKwds = NULL; | |
729e4ab9 | 1216 | |
51004dcb A |
1217 | /* Iterate through u extension attributes */ |
1218 | while (*pTag) { | |
729e4ab9 | 1219 | /* locate next separator char */ |
51004dcb A |
1220 | for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); |
1221 | ||
b331163b | 1222 | if (ultag_isUnicodeLocaleKey(pTag, len)) { |
51004dcb A |
1223 | pKwds = pTag; |
1224 | break; | |
729e4ab9 | 1225 | } |
51004dcb A |
1226 | |
1227 | /* add this attribute to the list */ | |
1228 | attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); | |
1229 | if (attr == NULL) { | |
1230 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1231 | goto cleanup; | |
729e4ab9 A |
1232 | } |
1233 | ||
51004dcb A |
1234 | if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { |
1235 | uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); | |
1236 | attrBuf[attrBufIdx + len] = 0; | |
1237 | attr->attribute = &attrBuf[attrBufIdx]; | |
1238 | attrBufIdx += (len + 1); | |
729e4ab9 | 1239 | } else { |
51004dcb A |
1240 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1241 | goto cleanup; | |
1242 | } | |
729e4ab9 | 1243 | |
51004dcb A |
1244 | if (!_addAttributeToList(&attrFirst, attr)) { |
1245 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1246 | uprv_free(attr); | |
1247 | goto cleanup; | |
1248 | } | |
729e4ab9 | 1249 | |
51004dcb A |
1250 | /* next tag */ |
1251 | pTag += len; | |
1252 | if (*pTag) { | |
1253 | /* next to the separator */ | |
1254 | pTag++; | |
1255 | } | |
1256 | } | |
1257 | ||
1258 | if (attrFirst) { | |
1259 | /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ | |
1260 | ||
1261 | if (attrBufIdx > bufSize) { | |
1262 | /* attrBufIdx == <total length of attribute subtag> + 1 */ | |
1263 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1264 | goto cleanup; | |
1265 | } | |
1266 | ||
1267 | kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); | |
1268 | if (kwd == NULL) { | |
1269 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1270 | goto cleanup; | |
1271 | } | |
1272 | ||
1273 | kwd->key = LOCALE_ATTRIBUTE_KEY; | |
1274 | kwd->value = buf; | |
1275 | ||
1276 | /* attribute subtags sorted in alphabetical order as type */ | |
1277 | attr = attrFirst; | |
1278 | while (attr != NULL) { | |
1279 | nextAttr = attr->next; | |
1280 | ||
1281 | /* buffer size check is done above */ | |
1282 | if (attr != attrFirst) { | |
1283 | *(buf + bufIdx) = SEP; | |
1284 | bufIdx++; | |
729e4ab9 | 1285 | } |
51004dcb A |
1286 | |
1287 | len = uprv_strlen(attr->attribute); | |
1288 | uprv_memcpy(buf + bufIdx, attr->attribute, len); | |
729e4ab9 | 1289 | bufIdx += len; |
729e4ab9 | 1290 | |
51004dcb A |
1291 | attr = nextAttr; |
1292 | } | |
1293 | *(buf + bufIdx) = 0; | |
1294 | bufIdx++; | |
1295 | ||
1296 | if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { | |
1297 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1298 | uprv_free(kwd); | |
1299 | goto cleanup; | |
1300 | } | |
1301 | ||
1302 | /* once keyword entry is created, delete the attribute list */ | |
1303 | attr = attrFirst; | |
1304 | while (attr != NULL) { | |
1305 | nextAttr = attr->next; | |
1306 | uprv_free(attr); | |
1307 | attr = nextAttr; | |
1308 | } | |
1309 | attrFirst = NULL; | |
1310 | } | |
1311 | ||
1312 | if (pKwds) { | |
1313 | const char *pBcpKey = NULL; /* u extenstion key subtag */ | |
1314 | const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ | |
1315 | int32_t bcpKeyLen = 0; | |
1316 | int32_t bcpTypeLen = 0; | |
1317 | UBool isDone = FALSE; | |
1318 | ||
1319 | pTag = pKwds; | |
1320 | /* BCP47 representation of LDML key/type pairs */ | |
1321 | while (!isDone) { | |
1322 | const char *pNextBcpKey = NULL; | |
b331163b | 1323 | int32_t nextBcpKeyLen = 0; |
51004dcb A |
1324 | UBool emitKeyword = FALSE; |
1325 | ||
1326 | if (*pTag) { | |
1327 | /* locate next separator char */ | |
1328 | for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); | |
1329 | ||
b331163b | 1330 | if (ultag_isUnicodeLocaleKey(pTag, len)) { |
51004dcb A |
1331 | if (pBcpKey) { |
1332 | emitKeyword = TRUE; | |
1333 | pNextBcpKey = pTag; | |
1334 | nextBcpKeyLen = len; | |
1335 | } else { | |
1336 | pBcpKey = pTag; | |
1337 | bcpKeyLen = len; | |
1338 | } | |
1339 | } else { | |
1340 | U_ASSERT(pBcpKey != NULL); | |
1341 | /* within LDML type subtags */ | |
1342 | if (pBcpType) { | |
1343 | bcpTypeLen += (len + 1); | |
1344 | } else { | |
1345 | pBcpType = pTag; | |
1346 | bcpTypeLen = len; | |
1347 | } | |
1348 | } | |
729e4ab9 | 1349 | |
51004dcb A |
1350 | /* next tag */ |
1351 | pTag += len; | |
1352 | if (*pTag) { | |
1353 | /* next to the separator */ | |
1354 | pTag++; | |
1355 | } | |
729e4ab9 | 1356 | } else { |
51004dcb A |
1357 | /* processing last one */ |
1358 | emitKeyword = TRUE; | |
1359 | isDone = TRUE; | |
1360 | } | |
1361 | ||
1362 | if (emitKeyword) { | |
1363 | const char *pKey = NULL; /* LDML key */ | |
1364 | const char *pType = NULL; /* LDML type */ | |
1365 | ||
b331163b A |
1366 | char bcpKeyBuf[9]; /* BCP key length is always 2 for now */ |
1367 | ||
51004dcb A |
1368 | U_ASSERT(pBcpKey != NULL); |
1369 | ||
b331163b A |
1370 | if (bcpKeyLen >= sizeof(bcpKeyBuf)) { |
1371 | /* the BCP key is invalid */ | |
1372 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1373 | goto cleanup; | |
1374 | } | |
1375 | ||
1376 | uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen); | |
1377 | bcpKeyBuf[bcpKeyLen] = 0; | |
1378 | ||
51004dcb | 1379 | /* u extension key to LDML key */ |
b331163b A |
1380 | pKey = uloc_toLegacyKey(bcpKeyBuf); |
1381 | if (pKey == NULL) { | |
1382 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
729e4ab9 A |
1383 | goto cleanup; |
1384 | } | |
b331163b A |
1385 | if (pKey == bcpKeyBuf) { |
1386 | /* | |
1387 | The key returned by toLegacyKey points to the input buffer. | |
1388 | We normalize the result key to lower case. | |
1389 | */ | |
1390 | T_CString_toLowerCase(bcpKeyBuf); | |
1391 | if (bufSize - bufIdx - 1 >= bcpKeyLen) { | |
1392 | uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen); | |
1393 | pKey = buf + bufIdx; | |
1394 | bufIdx += bcpKeyLen; | |
1395 | *(buf + bufIdx) = 0; | |
1396 | bufIdx++; | |
1397 | } else { | |
1398 | *status = U_BUFFER_OVERFLOW_ERROR; | |
1399 | goto cleanup; | |
1400 | } | |
1401 | } | |
51004dcb A |
1402 | |
1403 | if (pBcpType) { | |
b331163b A |
1404 | char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */ |
1405 | if (bcpTypeLen >= sizeof(bcpTypeBuf)) { | |
1406 | /* the BCP type is too long */ | |
1407 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1408 | goto cleanup; | |
1409 | } | |
1410 | ||
1411 | uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen); | |
1412 | bcpTypeBuf[bcpTypeLen] = 0; | |
1413 | ||
51004dcb | 1414 | /* BCP type to locale type */ |
b331163b A |
1415 | pType = uloc_toLegacyType(pKey, bcpTypeBuf); |
1416 | if (pType == NULL) { | |
1417 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
51004dcb A |
1418 | goto cleanup; |
1419 | } | |
b331163b A |
1420 | if (pType == bcpTypeBuf) { |
1421 | /* | |
1422 | The type returned by toLegacyType points to the input buffer. | |
1423 | We normalize the result type to lower case. | |
1424 | */ | |
1425 | /* normalize to lower case */ | |
1426 | T_CString_toLowerCase(bcpTypeBuf); | |
1427 | if (bufSize - bufIdx - 1 >= bcpTypeLen) { | |
1428 | uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen); | |
1429 | pType = buf + bufIdx; | |
1430 | bufIdx += bcpTypeLen; | |
1431 | *(buf + bufIdx) = 0; | |
1432 | bufIdx++; | |
1433 | } else { | |
1434 | *status = U_BUFFER_OVERFLOW_ERROR; | |
1435 | goto cleanup; | |
1436 | } | |
1437 | } | |
51004dcb A |
1438 | } else { |
1439 | /* typeless - default type value is "yes" */ | |
1440 | pType = LOCALE_TYPE_YES; | |
1441 | } | |
729e4ab9 | 1442 | |
51004dcb A |
1443 | /* Special handling for u-va-posix, since we want to treat this as a variant, |
1444 | not as a keyword */ | |
1445 | if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { | |
1446 | *posixVariant = TRUE; | |
1447 | } else { | |
1448 | /* create an ExtensionListEntry for this keyword */ | |
1449 | kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); | |
1450 | if (kwd == NULL) { | |
1451 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1452 | goto cleanup; | |
1453 | } | |
729e4ab9 | 1454 | |
51004dcb A |
1455 | kwd->key = pKey; |
1456 | kwd->value = pType; | |
1457 | ||
1458 | if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { | |
1459 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1460 | uprv_free(kwd); | |
1461 | goto cleanup; | |
1462 | } | |
729e4ab9 | 1463 | } |
729e4ab9 | 1464 | |
51004dcb A |
1465 | pBcpKey = pNextBcpKey; |
1466 | bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; | |
1467 | pBcpType = NULL; | |
1468 | bcpTypeLen = 0; | |
1469 | } | |
729e4ab9 A |
1470 | } |
1471 | } | |
1472 | ||
729e4ab9 A |
1473 | kwd = kwdFirst; |
1474 | while (kwd != NULL) { | |
1475 | nextKwd = kwd->next; | |
1476 | _addExtensionToList(appendTo, kwd, FALSE); | |
1477 | kwd = nextKwd; | |
1478 | } | |
1479 | ||
1480 | return; | |
1481 | ||
1482 | cleanup: | |
51004dcb A |
1483 | attr = attrFirst; |
1484 | while (attr != NULL) { | |
1485 | nextAttr = attr->next; | |
1486 | uprv_free(attr); | |
1487 | attr = nextAttr; | |
1488 | } | |
1489 | ||
729e4ab9 A |
1490 | kwd = kwdFirst; |
1491 | while (kwd != NULL) { | |
1492 | nextKwd = kwd->next; | |
1493 | uprv_free(kwd); | |
1494 | kwd = nextKwd; | |
1495 | } | |
1496 | } | |
1497 | ||
1498 | ||
1499 | static int32_t | |
1500 | _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { | |
1501 | int32_t reslen = 0; | |
1502 | int32_t i, n; | |
1503 | int32_t len; | |
1504 | ExtensionListEntry *kwdFirst = NULL; | |
1505 | ExtensionListEntry *kwd; | |
1506 | const char *key, *type; | |
4388f060 A |
1507 | char *kwdBuf = NULL; |
1508 | int32_t kwdBufLength = capacity; | |
729e4ab9 A |
1509 | UBool posixVariant = FALSE; |
1510 | ||
1511 | if (U_FAILURE(*status)) { | |
1512 | return 0; | |
1513 | } | |
1514 | ||
51004dcb | 1515 | kwdBuf = (char*)uprv_malloc(kwdBufLength); |
4388f060 A |
1516 | if (kwdBuf == NULL) { |
1517 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1518 | return 0; | |
1519 | } | |
1520 | ||
1521 | /* Determine if variants already exists */ | |
1522 | if (ultag_getVariantsSize(langtag)) { | |
1523 | posixVariant = TRUE; | |
1524 | } | |
1525 | ||
729e4ab9 A |
1526 | n = ultag_getExtensionsSize(langtag); |
1527 | ||
1528 | /* resolve locale keywords and reordering keys */ | |
1529 | for (i = 0; i < n; i++) { | |
1530 | key = ultag_getExtensionKey(langtag, i); | |
1531 | type = ultag_getExtensionValue(langtag, i); | |
1532 | if (*key == LDMLEXT) { | |
4388f060 | 1533 | _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status); |
729e4ab9 A |
1534 | if (U_FAILURE(*status)) { |
1535 | break; | |
1536 | } | |
1537 | } else { | |
51004dcb | 1538 | kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
729e4ab9 A |
1539 | if (kwd == NULL) { |
1540 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1541 | break; | |
1542 | } | |
1543 | kwd->key = key; | |
1544 | kwd->value = type; | |
1545 | if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { | |
1546 | uprv_free(kwd); | |
1547 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1548 | break; | |
1549 | } | |
1550 | } | |
1551 | } | |
1552 | ||
1553 | if (U_SUCCESS(*status)) { | |
1554 | type = ultag_getPrivateUse(langtag); | |
1555 | if ((int32_t)uprv_strlen(type) > 0) { | |
1556 | /* add private use as a keyword */ | |
51004dcb | 1557 | kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
729e4ab9 A |
1558 | if (kwd == NULL) { |
1559 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1560 | } else { | |
1561 | kwd->key = PRIVATEUSE_KEY; | |
1562 | kwd->value = type; | |
1563 | if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { | |
1564 | uprv_free(kwd); | |
1565 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1566 | } | |
1567 | } | |
1568 | } | |
1569 | } | |
1570 | ||
1571 | /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ | |
1572 | ||
1573 | if (U_SUCCESS(*status) && posixVariant) { | |
1574 | len = (int32_t) uprv_strlen(_POSIX); | |
1575 | if (reslen < capacity) { | |
1576 | uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); | |
1577 | } | |
1578 | reslen += len; | |
1579 | } | |
1580 | ||
51004dcb | 1581 | if (U_SUCCESS(*status) && kwdFirst != NULL) { |
729e4ab9 | 1582 | /* write out the sorted keywords */ |
4388f060 | 1583 | UBool firstValue = TRUE; |
729e4ab9 | 1584 | kwd = kwdFirst; |
4388f060 | 1585 | do { |
729e4ab9 | 1586 | if (reslen < capacity) { |
4388f060 | 1587 | if (firstValue) { |
729e4ab9 A |
1588 | /* '@' */ |
1589 | *(appendAt + reslen) = LOCALE_EXT_SEP; | |
4388f060 | 1590 | firstValue = FALSE; |
51004dcb | 1591 | } else { |
729e4ab9 A |
1592 | /* ';' */ |
1593 | *(appendAt + reslen) = LOCALE_KEYWORD_SEP; | |
1594 | } | |
1595 | } | |
1596 | reslen++; | |
1597 | ||
51004dcb A |
1598 | /* key */ |
1599 | len = (int32_t)uprv_strlen(kwd->key); | |
1600 | if (reslen < capacity) { | |
1601 | uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); | |
1602 | } | |
1603 | reslen += len; | |
4388f060 | 1604 | |
51004dcb A |
1605 | /* '=' */ |
1606 | if (reslen < capacity) { | |
1607 | *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; | |
1608 | } | |
1609 | reslen++; | |
4388f060 | 1610 | |
51004dcb A |
1611 | /* type */ |
1612 | len = (int32_t)uprv_strlen(kwd->value); | |
1613 | if (reslen < capacity) { | |
1614 | uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); | |
4388f060 | 1615 | } |
51004dcb A |
1616 | reslen += len; |
1617 | ||
1618 | kwd = kwd->next; | |
1619 | } while (kwd); | |
729e4ab9 A |
1620 | } |
1621 | ||
1622 | /* clean up */ | |
1623 | kwd = kwdFirst; | |
1624 | while (kwd != NULL) { | |
1625 | ExtensionListEntry *tmpKwd = kwd->next; | |
1626 | uprv_free(kwd); | |
1627 | kwd = tmpKwd; | |
1628 | } | |
1629 | ||
4388f060 A |
1630 | uprv_free(kwdBuf); |
1631 | ||
729e4ab9 A |
1632 | if (U_FAILURE(*status)) { |
1633 | return 0; | |
1634 | } | |
1635 | ||
1636 | return u_terminateChars(appendAt, capacity, reslen, status); | |
1637 | } | |
1638 | ||
4388f060 A |
1639 | static int32_t |
1640 | _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { | |
1641 | char buf[ULOC_FULLNAME_CAPACITY]; | |
1642 | char tmpAppend[ULOC_FULLNAME_CAPACITY]; | |
1643 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
1644 | int32_t len, i; | |
1645 | int32_t reslen = 0; | |
1646 | ||
1647 | if (U_FAILURE(*status)) { | |
1648 | return 0; | |
1649 | } | |
1650 | ||
1651 | len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); | |
1652 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { | |
1653 | if (strict) { | |
1654 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1655 | } | |
1656 | return 0; | |
1657 | } | |
1658 | ||
1659 | if (len > 0) { | |
1660 | char *p, *pPriv; | |
1661 | UBool bNext = TRUE; | |
1662 | UBool firstValue = TRUE; | |
1663 | UBool writeValue; | |
1664 | ||
1665 | pPriv = NULL; | |
1666 | p = buf; | |
1667 | while (bNext) { | |
1668 | writeValue = FALSE; | |
1669 | if (*p == SEP || *p == LOCALE_SEP || *p == 0) { | |
1670 | if (*p == 0) { | |
1671 | bNext = FALSE; | |
1672 | } else { | |
1673 | *p = 0; /* terminate */ | |
1674 | } | |
1675 | if (pPriv != NULL) { | |
1676 | /* Private use in the canonical format is lowercase in BCP47 */ | |
1677 | for (i = 0; *(pPriv + i) != 0; i++) { | |
1678 | *(pPriv + i) = uprv_tolower(*(pPriv + i)); | |
1679 | } | |
1680 | ||
1681 | /* validate */ | |
1682 | if (_isPrivateuseValueSubtag(pPriv, -1)) { | |
1683 | if (firstValue) { | |
1684 | if (!_isVariantSubtag(pPriv, -1)) { | |
1685 | writeValue = TRUE; | |
1686 | } | |
1687 | } else { | |
1688 | writeValue = TRUE; | |
1689 | } | |
1690 | } else if (strict) { | |
1691 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
1692 | break; | |
1693 | } else { | |
1694 | break; | |
1695 | } | |
1696 | ||
1697 | if (writeValue) { | |
1698 | if (reslen < capacity) { | |
1699 | tmpAppend[reslen++] = SEP; | |
1700 | } | |
1701 | ||
1702 | if (firstValue) { | |
1703 | if (reslen < capacity) { | |
1704 | tmpAppend[reslen++] = *PRIVATEUSE_KEY; | |
1705 | } | |
1706 | ||
1707 | if (reslen < capacity) { | |
1708 | tmpAppend[reslen++] = SEP; | |
1709 | } | |
1710 | ||
1711 | len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); | |
1712 | if (reslen < capacity) { | |
1713 | uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); | |
1714 | } | |
1715 | reslen += len; | |
1716 | ||
1717 | if (reslen < capacity) { | |
1718 | tmpAppend[reslen++] = SEP; | |
1719 | } | |
1720 | ||
1721 | firstValue = FALSE; | |
1722 | } | |
1723 | ||
1724 | len = (int32_t)uprv_strlen(pPriv); | |
1725 | if (reslen < capacity) { | |
1726 | uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); | |
1727 | } | |
1728 | reslen += len; | |
1729 | } | |
1730 | } | |
1731 | /* reset private use starting position */ | |
1732 | pPriv = NULL; | |
1733 | } else if (pPriv == NULL) { | |
1734 | pPriv = p; | |
1735 | } | |
1736 | p++; | |
1737 | } | |
1738 | ||
1739 | if (U_FAILURE(*status)) { | |
1740 | return 0; | |
1741 | } | |
1742 | } | |
1743 | ||
1744 | if (U_SUCCESS(*status)) { | |
1745 | len = reslen; | |
1746 | if (reslen < capacity) { | |
1747 | uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen)); | |
1748 | } | |
1749 | } | |
1750 | ||
1751 | u_terminateChars(appendAt, capacity, reslen, status); | |
1752 | ||
1753 | return reslen; | |
1754 | } | |
1755 | ||
729e4ab9 A |
1756 | /* |
1757 | * ------------------------------------------------- | |
1758 | * | |
1759 | * ultag_ functions | |
1760 | * | |
1761 | * ------------------------------------------------- | |
1762 | */ | |
1763 | ||
1764 | /* Bit flags used by the parser */ | |
1765 | #define LANG 0x0001 | |
1766 | #define EXTL 0x0002 | |
1767 | #define SCRT 0x0004 | |
1768 | #define REGN 0x0008 | |
1769 | #define VART 0x0010 | |
1770 | #define EXTS 0x0020 | |
1771 | #define EXTV 0x0040 | |
1772 | #define PRIV 0x0080 | |
1773 | ||
1774 | static ULanguageTag* | |
1775 | ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { | |
1776 | ULanguageTag *t; | |
1777 | char *tagBuf; | |
1778 | int16_t next; | |
1779 | char *pSubtag, *pNext, *pLastGoodPosition; | |
1780 | int32_t subtagLen; | |
1781 | int32_t extlangIdx; | |
1782 | ExtensionListEntry *pExtension; | |
1783 | char *pExtValueSubtag, *pExtValueSubtagEnd; | |
1784 | int32_t i; | |
51004dcb A |
1785 | UBool privateuseVar = FALSE; |
1786 | int32_t grandfatheredLen = 0; | |
729e4ab9 A |
1787 | |
1788 | if (parsedLen != NULL) { | |
1789 | *parsedLen = 0; | |
1790 | } | |
1791 | ||
1792 | if (U_FAILURE(*status)) { | |
1793 | return NULL; | |
1794 | } | |
1795 | ||
1796 | if (tagLen < 0) { | |
1797 | tagLen = (int32_t)uprv_strlen(tag); | |
1798 | } | |
1799 | ||
1800 | /* copy the entire string */ | |
1801 | tagBuf = (char*)uprv_malloc(tagLen + 1); | |
1802 | if (tagBuf == NULL) { | |
1803 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1804 | return NULL; | |
1805 | } | |
1806 | uprv_memcpy(tagBuf, tag, tagLen); | |
1807 | *(tagBuf + tagLen) = 0; | |
1808 | ||
1809 | /* create a ULanguageTag */ | |
1810 | t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); | |
729e4ab9 A |
1811 | if (t == NULL) { |
1812 | uprv_free(tagBuf); | |
1813 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1814 | return NULL; | |
1815 | } | |
4388f060 A |
1816 | _initializeULanguageTag(t); |
1817 | t->buf = tagBuf; | |
729e4ab9 A |
1818 | |
1819 | if (tagLen < MINLEN) { | |
1820 | /* the input tag is too short - return empty ULanguageTag */ | |
1821 | return t; | |
1822 | } | |
1823 | ||
1824 | /* check if the tag is grandfathered */ | |
1825 | for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { | |
4388f060 | 1826 | if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { |
51004dcb A |
1827 | int32_t newTagLength; |
1828 | ||
1829 | grandfatheredLen = tagLen; /* back up for output parsedLen */ | |
1830 | newTagLength = uprv_strlen(GRANDFATHERED[i+1]); | |
4388f060 A |
1831 | if (tagLen < newTagLength) { |
1832 | uprv_free(tagBuf); | |
1833 | tagBuf = (char*)uprv_malloc(newTagLength + 1); | |
1834 | if (tagBuf == NULL) { | |
1835 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1836 | return NULL; | |
1837 | } | |
1838 | t->buf = tagBuf; | |
1839 | tagLen = newTagLength; | |
729e4ab9 | 1840 | } |
4388f060 | 1841 | uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); |
4388f060 | 1842 | break; |
729e4ab9 A |
1843 | } |
1844 | } | |
1845 | ||
1846 | /* | |
1847 | * langtag = language | |
1848 | * ["-" script] | |
1849 | * ["-" region] | |
1850 | * *("-" variant) | |
1851 | * *("-" extension) | |
1852 | * ["-" privateuse] | |
1853 | */ | |
1854 | ||
1855 | next = LANG | PRIV; | |
1856 | pNext = pLastGoodPosition = tagBuf; | |
1857 | extlangIdx = 0; | |
1858 | pExtension = NULL; | |
1859 | pExtValueSubtag = NULL; | |
1860 | pExtValueSubtagEnd = NULL; | |
729e4ab9 A |
1861 | |
1862 | while (pNext) { | |
1863 | char *pSep; | |
1864 | ||
1865 | pSubtag = pNext; | |
1866 | ||
1867 | /* locate next separator char */ | |
1868 | pSep = pSubtag; | |
1869 | while (*pSep) { | |
1870 | if (*pSep == SEP) { | |
1871 | break; | |
1872 | } | |
1873 | pSep++; | |
1874 | } | |
1875 | if (*pSep == 0) { | |
1876 | /* last subtag */ | |
1877 | pNext = NULL; | |
1878 | } else { | |
1879 | pNext = pSep + 1; | |
1880 | } | |
1881 | subtagLen = (int32_t)(pSep - pSubtag); | |
1882 | ||
1883 | if (next & LANG) { | |
1884 | if (_isLanguageSubtag(pSubtag, subtagLen)) { | |
1885 | *pSep = 0; /* terminate */ | |
1886 | t->language = T_CString_toLowerCase(pSubtag); | |
1887 | ||
1888 | pLastGoodPosition = pSep; | |
1889 | next = EXTL | SCRT | REGN | VART | EXTS | PRIV; | |
1890 | continue; | |
1891 | } | |
1892 | } | |
1893 | if (next & EXTL) { | |
1894 | if (_isExtlangSubtag(pSubtag, subtagLen)) { | |
1895 | *pSep = 0; | |
1896 | t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); | |
1897 | ||
1898 | pLastGoodPosition = pSep; | |
1899 | if (extlangIdx < 3) { | |
1900 | next = EXTL | SCRT | REGN | VART | EXTS | PRIV; | |
1901 | } else { | |
1902 | next = SCRT | REGN | VART | EXTS | PRIV; | |
1903 | } | |
1904 | continue; | |
1905 | } | |
1906 | } | |
1907 | if (next & SCRT) { | |
1908 | if (_isScriptSubtag(pSubtag, subtagLen)) { | |
1909 | char *p = pSubtag; | |
1910 | ||
1911 | *pSep = 0; | |
1912 | ||
1913 | /* to title case */ | |
1914 | *p = uprv_toupper(*p); | |
1915 | p++; | |
1916 | for (; *p; p++) { | |
1917 | *p = uprv_tolower(*p); | |
1918 | } | |
1919 | ||
1920 | t->script = pSubtag; | |
1921 | ||
1922 | pLastGoodPosition = pSep; | |
1923 | next = REGN | VART | EXTS | PRIV; | |
1924 | continue; | |
1925 | } | |
1926 | } | |
1927 | if (next & REGN) { | |
1928 | if (_isRegionSubtag(pSubtag, subtagLen)) { | |
1929 | *pSep = 0; | |
1930 | t->region = T_CString_toUpperCase(pSubtag); | |
1931 | ||
1932 | pLastGoodPosition = pSep; | |
1933 | next = VART | EXTS | PRIV; | |
1934 | continue; | |
1935 | } | |
1936 | } | |
1937 | if (next & VART) { | |
4388f060 A |
1938 | if (_isVariantSubtag(pSubtag, subtagLen) || |
1939 | (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { | |
729e4ab9 A |
1940 | VariantListEntry *var; |
1941 | UBool isAdded; | |
1942 | ||
1943 | var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); | |
1944 | if (var == NULL) { | |
1945 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1946 | goto error; | |
1947 | } | |
1948 | *pSep = 0; | |
1949 | var->variant = T_CString_toUpperCase(pSubtag); | |
1950 | isAdded = _addVariantToList(&(t->variants), var); | |
1951 | if (!isAdded) { | |
1952 | /* duplicated variant entry */ | |
1953 | uprv_free(var); | |
1954 | break; | |
1955 | } | |
1956 | pLastGoodPosition = pSep; | |
1957 | next = VART | EXTS | PRIV; | |
1958 | continue; | |
1959 | } | |
1960 | } | |
1961 | if (next & EXTS) { | |
1962 | if (_isExtensionSingleton(pSubtag, subtagLen)) { | |
1963 | if (pExtension != NULL) { | |
1964 | if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { | |
1965 | /* the previous extension is incomplete */ | |
1966 | uprv_free(pExtension); | |
1967 | pExtension = NULL; | |
1968 | break; | |
1969 | } | |
1970 | ||
1971 | /* terminate the previous extension value */ | |
1972 | *pExtValueSubtagEnd = 0; | |
1973 | pExtension->value = T_CString_toLowerCase(pExtValueSubtag); | |
1974 | ||
1975 | /* insert the extension to the list */ | |
1976 | if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { | |
1977 | pLastGoodPosition = pExtValueSubtagEnd; | |
1978 | } else { | |
1979 | /* stop parsing here */ | |
1980 | uprv_free(pExtension); | |
1981 | pExtension = NULL; | |
1982 | break; | |
1983 | } | |
729e4ab9 A |
1984 | } |
1985 | ||
729e4ab9 | 1986 | /* create a new extension */ |
51004dcb | 1987 | pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
729e4ab9 A |
1988 | if (pExtension == NULL) { |
1989 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1990 | goto error; | |
1991 | } | |
1992 | *pSep = 0; | |
1993 | pExtension->key = T_CString_toLowerCase(pSubtag); | |
1994 | pExtension->value = NULL; /* will be set later */ | |
1995 | ||
1996 | /* | |
1997 | * reset the start and the end location of extension value | |
1998 | * subtags for this extension | |
1999 | */ | |
2000 | pExtValueSubtag = NULL; | |
2001 | pExtValueSubtagEnd = NULL; | |
2002 | ||
2003 | next = EXTV; | |
2004 | continue; | |
2005 | } | |
2006 | } | |
2007 | if (next & EXTV) { | |
2008 | if (_isExtensionSubtag(pSubtag, subtagLen)) { | |
51004dcb A |
2009 | if (pExtValueSubtag == NULL) { |
2010 | /* if the start postion of this extension's value is not yet, | |
2011 | this one is the first value subtag */ | |
2012 | pExtValueSubtag = pSubtag; | |
729e4ab9 A |
2013 | } |
2014 | ||
51004dcb A |
2015 | /* Mark the end of this subtag */ |
2016 | pExtValueSubtagEnd = pSep; | |
2017 | next = EXTS | EXTV | PRIV; | |
4388f060 | 2018 | |
51004dcb | 2019 | continue; |
729e4ab9 A |
2020 | } |
2021 | } | |
2022 | if (next & PRIV) { | |
2023 | if (uprv_tolower(*pSubtag) == PRIVATEUSE) { | |
2024 | char *pPrivuseVal; | |
2025 | ||
2026 | if (pExtension != NULL) { | |
2027 | /* Process the last extension */ | |
2028 | if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { | |
2029 | /* the previous extension is incomplete */ | |
2030 | uprv_free(pExtension); | |
2031 | pExtension = NULL; | |
2032 | break; | |
2033 | } else { | |
2034 | /* terminate the previous extension value */ | |
2035 | *pExtValueSubtagEnd = 0; | |
2036 | pExtension->value = T_CString_toLowerCase(pExtValueSubtag); | |
2037 | ||
2038 | /* insert the extension to the list */ | |
2039 | if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { | |
2040 | pLastGoodPosition = pExtValueSubtagEnd; | |
2041 | pExtension = NULL; | |
2042 | } else { | |
2043 | /* stop parsing here */ | |
2044 | uprv_free(pExtension); | |
2045 | pExtension = NULL; | |
2046 | break; | |
2047 | } | |
2048 | } | |
2049 | } | |
2050 | ||
2051 | /* The rest of part will be private use value subtags */ | |
2052 | if (pNext == NULL) { | |
2053 | /* empty private use subtag */ | |
2054 | break; | |
2055 | } | |
2056 | /* back up the private use value start position */ | |
2057 | pPrivuseVal = pNext; | |
2058 | ||
2059 | /* validate private use value subtags */ | |
2060 | while (pNext) { | |
2061 | pSubtag = pNext; | |
2062 | pSep = pSubtag; | |
2063 | while (*pSep) { | |
2064 | if (*pSep == SEP) { | |
2065 | break; | |
2066 | } | |
2067 | pSep++; | |
2068 | } | |
2069 | if (*pSep == 0) { | |
2070 | /* last subtag */ | |
2071 | pNext = NULL; | |
2072 | } else { | |
2073 | pNext = pSep + 1; | |
2074 | } | |
2075 | subtagLen = (int32_t)(pSep - pSubtag); | |
2076 | ||
4388f060 A |
2077 | if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { |
2078 | *pSep = 0; | |
2079 | next = VART; | |
2080 | privateuseVar = TRUE; | |
2081 | break; | |
2082 | } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { | |
729e4ab9 A |
2083 | pLastGoodPosition = pSep; |
2084 | } else { | |
2085 | break; | |
2086 | } | |
2087 | } | |
4388f060 A |
2088 | |
2089 | if (next == VART) { | |
2090 | continue; | |
2091 | } | |
2092 | ||
729e4ab9 A |
2093 | if (pLastGoodPosition - pPrivuseVal > 0) { |
2094 | *pLastGoodPosition = 0; | |
2095 | t->privateuse = T_CString_toLowerCase(pPrivuseVal); | |
2096 | } | |
2097 | /* No more subtags, exiting the parse loop */ | |
2098 | break; | |
2099 | } | |
2100 | break; | |
2101 | } | |
4388f060 | 2102 | |
729e4ab9 A |
2103 | /* If we fell through here, it means this subtag is illegal - quit parsing */ |
2104 | break; | |
2105 | } | |
2106 | ||
2107 | if (pExtension != NULL) { | |
2108 | /* Process the last extension */ | |
2109 | if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { | |
2110 | /* the previous extension is incomplete */ | |
2111 | uprv_free(pExtension); | |
2112 | } else { | |
2113 | /* terminate the previous extension value */ | |
2114 | *pExtValueSubtagEnd = 0; | |
2115 | pExtension->value = T_CString_toLowerCase(pExtValueSubtag); | |
2116 | /* insert the extension to the list */ | |
2117 | if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { | |
2118 | pLastGoodPosition = pExtValueSubtagEnd; | |
2119 | } else { | |
2120 | uprv_free(pExtension); | |
2121 | } | |
2122 | } | |
2123 | } | |
2124 | ||
2125 | if (parsedLen != NULL) { | |
51004dcb | 2126 | *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf); |
729e4ab9 A |
2127 | } |
2128 | ||
2129 | return t; | |
2130 | ||
2131 | error: | |
2132 | uprv_free(t); | |
2133 | return NULL; | |
2134 | } | |
2135 | ||
2136 | static void | |
2137 | ultag_close(ULanguageTag* langtag) { | |
2138 | ||
2139 | if (langtag == NULL) { | |
2140 | return; | |
2141 | } | |
2142 | ||
2143 | uprv_free(langtag->buf); | |
2144 | ||
2145 | if (langtag->variants) { | |
2146 | VariantListEntry *curVar = langtag->variants; | |
2147 | while (curVar) { | |
2148 | VariantListEntry *nextVar = curVar->next; | |
2149 | uprv_free(curVar); | |
2150 | curVar = nextVar; | |
2151 | } | |
2152 | } | |
2153 | ||
2154 | if (langtag->extensions) { | |
2155 | ExtensionListEntry *curExt = langtag->extensions; | |
2156 | while (curExt) { | |
2157 | ExtensionListEntry *nextExt = curExt->next; | |
2158 | uprv_free(curExt); | |
2159 | curExt = nextExt; | |
2160 | } | |
2161 | } | |
2162 | ||
2163 | uprv_free(langtag); | |
2164 | } | |
2165 | ||
2166 | static const char* | |
2167 | ultag_getLanguage(const ULanguageTag* langtag) { | |
2168 | return langtag->language; | |
2169 | } | |
2170 | ||
2171 | #if 0 | |
2172 | static const char* | |
2173 | ultag_getJDKLanguage(const ULanguageTag* langtag) { | |
2174 | int32_t i; | |
2175 | for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { | |
2176 | if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { | |
2177 | return DEPRECATEDLANGS[i + 1]; | |
2178 | } | |
2179 | } | |
2180 | return langtag->language; | |
2181 | } | |
2182 | #endif | |
2183 | ||
2184 | static const char* | |
2185 | ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { | |
2186 | if (idx >= 0 && idx < MAXEXTLANG) { | |
2187 | return langtag->extlang[idx]; | |
2188 | } | |
2189 | return NULL; | |
2190 | } | |
2191 | ||
2192 | static int32_t | |
2193 | ultag_getExtlangSize(const ULanguageTag* langtag) { | |
2194 | int32_t size = 0; | |
2195 | int32_t i; | |
2196 | for (i = 0; i < MAXEXTLANG; i++) { | |
2197 | if (langtag->extlang[i]) { | |
2198 | size++; | |
2199 | } | |
2200 | } | |
2201 | return size; | |
2202 | } | |
2203 | ||
2204 | static const char* | |
2205 | ultag_getScript(const ULanguageTag* langtag) { | |
2206 | return langtag->script; | |
2207 | } | |
2208 | ||
2209 | static const char* | |
2210 | ultag_getRegion(const ULanguageTag* langtag) { | |
2211 | return langtag->region; | |
2212 | } | |
2213 | ||
2214 | static const char* | |
2215 | ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { | |
2216 | const char *var = NULL; | |
2217 | VariantListEntry *cur = langtag->variants; | |
2218 | int32_t i = 0; | |
2219 | while (cur) { | |
2220 | if (i == idx) { | |
2221 | var = cur->variant; | |
2222 | break; | |
2223 | } | |
2224 | cur = cur->next; | |
2225 | i++; | |
2226 | } | |
2227 | return var; | |
2228 | } | |
2229 | ||
2230 | static int32_t | |
2231 | ultag_getVariantsSize(const ULanguageTag* langtag) { | |
2232 | int32_t size = 0; | |
2233 | VariantListEntry *cur = langtag->variants; | |
2234 | while (TRUE) { | |
2235 | if (cur == NULL) { | |
2236 | break; | |
2237 | } | |
2238 | size++; | |
2239 | cur = cur->next; | |
2240 | } | |
2241 | return size; | |
2242 | } | |
2243 | ||
2244 | static const char* | |
2245 | ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { | |
2246 | const char *key = NULL; | |
2247 | ExtensionListEntry *cur = langtag->extensions; | |
2248 | int32_t i = 0; | |
2249 | while (cur) { | |
2250 | if (i == idx) { | |
2251 | key = cur->key; | |
2252 | break; | |
2253 | } | |
2254 | cur = cur->next; | |
2255 | i++; | |
2256 | } | |
2257 | return key; | |
2258 | } | |
2259 | ||
2260 | static const char* | |
2261 | ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { | |
2262 | const char *val = NULL; | |
2263 | ExtensionListEntry *cur = langtag->extensions; | |
2264 | int32_t i = 0; | |
2265 | while (cur) { | |
2266 | if (i == idx) { | |
2267 | val = cur->value; | |
2268 | break; | |
2269 | } | |
2270 | cur = cur->next; | |
2271 | i++; | |
2272 | } | |
2273 | return val; | |
2274 | } | |
2275 | ||
2276 | static int32_t | |
2277 | ultag_getExtensionsSize(const ULanguageTag* langtag) { | |
2278 | int32_t size = 0; | |
2279 | ExtensionListEntry *cur = langtag->extensions; | |
2280 | while (TRUE) { | |
2281 | if (cur == NULL) { | |
2282 | break; | |
2283 | } | |
2284 | size++; | |
2285 | cur = cur->next; | |
2286 | } | |
2287 | return size; | |
2288 | } | |
2289 | ||
2290 | static const char* | |
2291 | ultag_getPrivateUse(const ULanguageTag* langtag) { | |
2292 | return langtag->privateuse; | |
2293 | } | |
2294 | ||
2295 | #if 0 | |
2296 | static const char* | |
2297 | ultag_getGrandfathered(const ULanguageTag* langtag) { | |
2298 | return langtag->grandfathered; | |
2299 | } | |
2300 | #endif | |
2301 | ||
2302 | ||
2303 | /* | |
2304 | * ------------------------------------------------- | |
2305 | * | |
2306 | * Locale/BCP47 conversion APIs, exposed as uloc_* | |
2307 | * | |
2308 | * ------------------------------------------------- | |
2309 | */ | |
51004dcb | 2310 | U_CAPI int32_t U_EXPORT2 |
729e4ab9 A |
2311 | uloc_toLanguageTag(const char* localeID, |
2312 | char* langtag, | |
2313 | int32_t langtagCapacity, | |
2314 | UBool strict, | |
2315 | UErrorCode* status) { | |
2316 | /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ | |
2317 | char canonical[256]; | |
2318 | int32_t reslen = 0; | |
2319 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
2320 | UBool hadPosix = FALSE; | |
2321 | const char* pKeywordStart; | |
2322 | ||
2323 | /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ | |
2324 | canonical[0] = 0; | |
2325 | if (uprv_strlen(localeID) > 0) { | |
2326 | uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); | |
2327 | if (tmpStatus != U_ZERO_ERROR) { | |
2328 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
2329 | return 0; | |
2330 | } | |
2331 | } | |
2332 | ||
2333 | /* For handling special case - private use only tag */ | |
2334 | pKeywordStart = locale_getKeywordsStart(canonical); | |
2335 | if (pKeywordStart == canonical) { | |
2336 | UEnumeration *kwdEnum; | |
2337 | int kwdCnt = 0; | |
2338 | UBool done = FALSE; | |
2339 | ||
2340 | kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus); | |
2341 | if (kwdEnum != NULL) { | |
2342 | kwdCnt = uenum_count(kwdEnum, &tmpStatus); | |
2343 | if (kwdCnt == 1) { | |
2344 | const char *key; | |
2345 | int32_t len = 0; | |
2346 | ||
2347 | key = uenum_next(kwdEnum, &len, &tmpStatus); | |
2348 | if (len == 1 && *key == PRIVATEUSE) { | |
2349 | char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
2350 | buf[0] = PRIVATEUSE; | |
2351 | buf[1] = SEP; | |
2352 | len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus); | |
2353 | if (U_SUCCESS(tmpStatus)) { | |
2354 | if (_isPrivateuseValueSubtags(&buf[2], len)) { | |
2355 | /* return private use only tag */ | |
2356 | reslen = len + 2; | |
2357 | uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity)); | |
2358 | u_terminateChars(langtag, langtagCapacity, reslen, status); | |
2359 | done = TRUE; | |
2360 | } else if (strict) { | |
2361 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
2362 | done = TRUE; | |
2363 | } | |
2364 | /* if not strict mode, then "und" will be returned */ | |
2365 | } else { | |
2366 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
2367 | done = TRUE; | |
2368 | } | |
2369 | } | |
2370 | } | |
2371 | uenum_close(kwdEnum); | |
2372 | if (done) { | |
2373 | return reslen; | |
2374 | } | |
2375 | } | |
2376 | } | |
2377 | ||
2378 | reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); | |
2379 | reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); | |
2380 | reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); | |
2381 | reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); | |
2382 | reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); | |
4388f060 | 2383 | reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); |
729e4ab9 A |
2384 | |
2385 | return reslen; | |
2386 | } | |
2387 | ||
2388 | ||
51004dcb | 2389 | U_CAPI int32_t U_EXPORT2 |
729e4ab9 A |
2390 | uloc_forLanguageTag(const char* langtag, |
2391 | char* localeID, | |
2392 | int32_t localeIDCapacity, | |
2393 | int32_t* parsedLength, | |
2394 | UErrorCode* status) { | |
2395 | ULanguageTag *lt; | |
2396 | int32_t reslen = 0; | |
2397 | const char *subtag, *p; | |
2398 | int32_t len; | |
51004dcb | 2399 | int32_t i, n; |
729e4ab9 A |
2400 | UBool noRegion = TRUE; |
2401 | ||
2402 | lt = ultag_parse(langtag, -1, parsedLength, status); | |
2403 | if (U_FAILURE(*status)) { | |
2404 | return 0; | |
2405 | } | |
2406 | ||
2407 | /* language */ | |
2408 | subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); | |
2409 | if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { | |
2410 | len = (int32_t)uprv_strlen(subtag); | |
2411 | if (len > 0) { | |
2412 | if (reslen < localeIDCapacity) { | |
2413 | uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); | |
2414 | } | |
2415 | reslen += len; | |
2416 | } | |
2417 | } | |
2418 | ||
2419 | /* script */ | |
2420 | subtag = ultag_getScript(lt); | |
2421 | len = (int32_t)uprv_strlen(subtag); | |
2422 | if (len > 0) { | |
2423 | if (reslen < localeIDCapacity) { | |
2424 | *(localeID + reslen) = LOCALE_SEP; | |
2425 | } | |
2426 | reslen++; | |
2427 | ||
2428 | /* write out the script in title case */ | |
2429 | p = subtag; | |
2430 | while (*p) { | |
2431 | if (reslen < localeIDCapacity) { | |
2432 | if (p == subtag) { | |
2433 | *(localeID + reslen) = uprv_toupper(*p); | |
2434 | } else { | |
2435 | *(localeID + reslen) = *p; | |
2436 | } | |
2437 | } | |
2438 | reslen++; | |
2439 | p++; | |
2440 | } | |
2441 | } | |
2442 | ||
2443 | /* region */ | |
2444 | subtag = ultag_getRegion(lt); | |
2445 | len = (int32_t)uprv_strlen(subtag); | |
2446 | if (len > 0) { | |
2447 | if (reslen < localeIDCapacity) { | |
2448 | *(localeID + reslen) = LOCALE_SEP; | |
2449 | } | |
2450 | reslen++; | |
2451 | /* write out the retion in upper case */ | |
2452 | p = subtag; | |
2453 | while (*p) { | |
2454 | if (reslen < localeIDCapacity) { | |
2455 | *(localeID + reslen) = uprv_toupper(*p); | |
2456 | } | |
2457 | reslen++; | |
2458 | p++; | |
2459 | } | |
2460 | noRegion = FALSE; | |
2461 | } | |
2462 | ||
2463 | /* variants */ | |
2464 | n = ultag_getVariantsSize(lt); | |
2465 | if (n > 0) { | |
2466 | if (noRegion) { | |
2467 | if (reslen < localeIDCapacity) { | |
2468 | *(localeID + reslen) = LOCALE_SEP; | |
2469 | } | |
2470 | reslen++; | |
2471 | } | |
2472 | ||
2473 | for (i = 0; i < n; i++) { | |
2474 | subtag = ultag_getVariant(lt, i); | |
2475 | if (reslen < localeIDCapacity) { | |
2476 | *(localeID + reslen) = LOCALE_SEP; | |
2477 | } | |
2478 | reslen++; | |
2479 | /* write out the variant in upper case */ | |
2480 | p = subtag; | |
2481 | while (*p) { | |
2482 | if (reslen < localeIDCapacity) { | |
2483 | *(localeID + reslen) = uprv_toupper(*p); | |
2484 | } | |
2485 | reslen++; | |
2486 | p++; | |
2487 | } | |
2488 | } | |
2489 | } | |
2490 | ||
2491 | /* keywords */ | |
2492 | n = ultag_getExtensionsSize(lt); | |
2493 | subtag = ultag_getPrivateUse(lt); | |
51004dcb A |
2494 | if (n > 0 || uprv_strlen(subtag) > 0) { |
2495 | if (reslen == 0 && n > 0) { | |
729e4ab9 A |
2496 | /* need a language */ |
2497 | if (reslen < localeIDCapacity) { | |
2498 | uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); | |
2499 | } | |
2500 | reslen += LANG_UND_LEN; | |
2501 | } | |
2502 | len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); | |
2503 | reslen += len; | |
2504 | } | |
2505 | ||
2506 | ultag_close(lt); | |
2507 | return u_terminateChars(localeID, localeIDCapacity, reslen, status); | |
2508 | } | |
2509 | ||
2510 |