]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/loclikely.cpp
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / common / loclikely.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
729e4ab9
A
3/*
4*******************************************************************************
5*
2ca993e8 6* Copyright (C) 1997-2016, International Business Machines
729e4ab9
A
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: loclikely.cpp
f3c0d7a5 11* encoding: UTF-8
729e4ab9
A
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2010feb25
16* created by: Markus W. Scherer
17*
18* Code for likely and minimized locale subtags, separated out from other .cpp files
19* that then do not depend on resource bundle code and likely-subtags data.
20*/
21
3d1f044b 22#include "unicode/bytestream.h"
729e4ab9 23#include "unicode/utypes.h"
b331163b 24#include "unicode/locid.h"
729e4ab9 25#include "unicode/putil.h"
f3c0d7a5 26#include "unicode/uchar.h"
729e4ab9
A
27#include "unicode/uloc.h"
28#include "unicode/ures.h"
b331163b 29#include "unicode/uscript.h"
3d1f044b
A
30#include "bytesinkutil.h"
31#include "charstr.h"
729e4ab9
A
32#include "cmemory.h"
33#include "cstring.h"
34#include "ulocimp.h"
35#include "ustr_imp.h"
36
3d1f044b
A
37/**
38 * These are the canonical strings for unknown languages, scripts and regions.
39 **/
40static const char* const unknownLanguage = "und";
41static const char* const unknownScript = "Zzzz";
42static const char* const unknownRegion = "ZZ";
43
729e4ab9
A
44/**
45 * This function looks for the localeID in the likelySubtags resource.
46 *
47 * @param localeID The tag to find.
48 * @param buffer A buffer to hold the matching entry
49 * @param bufferLength The length of the output buffer
50 * @return A pointer to "buffer" if found, or a null pointer if not.
51 */
52static const char* U_CALLCONV
53findLikelySubtags(const char* localeID,
54 char* buffer,
55 int32_t bufferLength,
56 UErrorCode* err) {
57 const char* result = NULL;
58
59 if (!U_FAILURE(*err)) {
60 int32_t resLen = 0;
61 const UChar* s = NULL;
62 UErrorCode tmpErr = U_ZERO_ERROR;
3d1f044b 63 icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr));
729e4ab9 64 if (U_SUCCESS(tmpErr)) {
3d1f044b
A
65 icu::CharString und;
66 if (localeID != NULL) {
67 if (*localeID == '\0') {
68 localeID = unknownLanguage;
69 } else if (*localeID == '_') {
70 und.append(unknownLanguage, *err);
71 und.append(localeID, *err);
72 if (U_FAILURE(*err)) {
73 return NULL;
74 }
75 localeID = und.data();
76 }
77 }
78 s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
729e4ab9
A
79
80 if (U_FAILURE(tmpErr)) {
81 /*
82 * If a resource is missing, it's not really an error, it's
83 * just that we don't have any data for that particular locale ID.
84 */
85 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
86 *err = tmpErr;
87 }
88 }
89 else if (resLen >= bufferLength) {
90 /* The buffer should never overflow. */
91 *err = U_INTERNAL_PROGRAM_ERROR;
92 }
93 else {
94 u_UCharsToChars(s, buffer, resLen + 1);
95 result = buffer;
96 }
729e4ab9
A
97 } else {
98 *err = tmpErr;
99 }
100 }
101
102 return result;
103}
104
105/**
106 * Append a tag to a buffer, adding the separator if necessary. The buffer
107 * must be large enough to contain the resulting tag plus any separator
108 * necessary. The tag must not be a zero-length string.
109 *
110 * @param tag The tag to add.
111 * @param tagLength The length of the tag.
112 * @param buffer The output buffer.
113 * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
114 **/
115static void U_CALLCONV
116appendTag(
117 const char* tag,
118 int32_t tagLength,
119 char* buffer,
120 int32_t* bufferLength) {
121
122 if (*bufferLength > 0) {
123 buffer[*bufferLength] = '_';
124 ++(*bufferLength);
125 }
126
127 uprv_memmove(
128 &buffer[*bufferLength],
129 tag,
130 tagLength);
131
132 *bufferLength += tagLength;
133}
134
729e4ab9
A
135/**
136 * Create a tag string from the supplied parameters. The lang, script and region
137 * parameters may be NULL pointers. If they are, their corresponding length parameters
138 * must be less than or equal to 0.
139 *
140 * If any of the language, script or region parameters are empty, and the alternateTags
141 * parameter is not NULL, it will be parsed for potential language, script and region tags
142 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
143 * it contains no language tag, the default tag for the unknown language is used.
144 *
145 * If the length of the new string exceeds the capacity of the output buffer,
146 * the function copies as many bytes to the output buffer as it can, and returns
147 * the error U_BUFFER_OVERFLOW_ERROR.
148 *
149 * If an illegal argument is provided, the function returns the error
150 * U_ILLEGAL_ARGUMENT_ERROR.
151 *
152 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
153 * the tag string fits in the output buffer, but the null terminator doesn't.
154 *
155 * @param lang The language tag to use.
156 * @param langLength The length of the language tag.
157 * @param script The script tag to use.
158 * @param scriptLength The length of the script tag.
159 * @param region The region tag to use.
160 * @param regionLength The length of the region tag.
161 * @param trailing Any trailing data to append to the new tag.
162 * @param trailingLength The length of the trailing data.
163 * @param alternateTags A string containing any alternate tags.
3d1f044b 164 * @param sink The output sink receiving the tag string.
729e4ab9 165 * @param err A pointer to a UErrorCode for error reporting.
729e4ab9 166 **/
3d1f044b 167static void U_CALLCONV
729e4ab9
A
168createTagStringWithAlternates(
169 const char* lang,
170 int32_t langLength,
171 const char* script,
172 int32_t scriptLength,
173 const char* region,
174 int32_t regionLength,
175 const char* trailing,
176 int32_t trailingLength,
177 const char* alternateTags,
3d1f044b 178 icu::ByteSink& sink,
729e4ab9
A
179 UErrorCode* err) {
180
181 if (U_FAILURE(*err)) {
182 goto error;
183 }
3d1f044b 184 else if (langLength >= ULOC_LANG_CAPACITY ||
729e4ab9
A
185 scriptLength >= ULOC_SCRIPT_CAPACITY ||
186 regionLength >= ULOC_COUNTRY_CAPACITY) {
187 goto error;
188 }
189 else {
190 /**
191 * ULOC_FULLNAME_CAPACITY will provide enough capacity
192 * that we can build a string that contains the language,
193 * script and region code without worrying about overrunning
194 * the user-supplied buffer.
195 **/
196 char tagBuffer[ULOC_FULLNAME_CAPACITY];
197 int32_t tagLength = 0;
729e4ab9
A
198 UBool regionAppended = FALSE;
199
200 if (langLength > 0) {
201 appendTag(
202 lang,
203 langLength,
204 tagBuffer,
205 &tagLength);
206 }
207 else if (alternateTags == NULL) {
208 /*
209 * Append the value for an unknown language, if
210 * we found no language.
211 */
212 appendTag(
213 unknownLanguage,
214 (int32_t)uprv_strlen(unknownLanguage),
215 tagBuffer,
216 &tagLength);
217 }
218 else {
219 /*
220 * Parse the alternateTags string for the language.
221 */
222 char alternateLang[ULOC_LANG_CAPACITY];
223 int32_t alternateLangLength = sizeof(alternateLang);
224
225 alternateLangLength =
226 uloc_getLanguage(
227 alternateTags,
228 alternateLang,
229 alternateLangLength,
230 err);
231 if(U_FAILURE(*err) ||
232 alternateLangLength >= ULOC_LANG_CAPACITY) {
233 goto error;
234 }
235 else if (alternateLangLength == 0) {
236 /*
237 * Append the value for an unknown language, if
238 * we found no language.
239 */
240 appendTag(
241 unknownLanguage,
242 (int32_t)uprv_strlen(unknownLanguage),
243 tagBuffer,
244 &tagLength);
245 }
246 else {
247 appendTag(
248 alternateLang,
249 alternateLangLength,
250 tagBuffer,
251 &tagLength);
252 }
253 }
254
255 if (scriptLength > 0) {
256 appendTag(
257 script,
258 scriptLength,
259 tagBuffer,
260 &tagLength);
261 }
262 else if (alternateTags != NULL) {
263 /*
264 * Parse the alternateTags string for the script.
265 */
266 char alternateScript[ULOC_SCRIPT_CAPACITY];
267
268 const int32_t alternateScriptLength =
269 uloc_getScript(
270 alternateTags,
271 alternateScript,
272 sizeof(alternateScript),
273 err);
274
275 if (U_FAILURE(*err) ||
276 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
277 goto error;
278 }
279 else if (alternateScriptLength > 0) {
280 appendTag(
281 alternateScript,
282 alternateScriptLength,
283 tagBuffer,
284 &tagLength);
285 }
286 }
287
288 if (regionLength > 0) {
289 appendTag(
290 region,
291 regionLength,
292 tagBuffer,
293 &tagLength);
294
295 regionAppended = TRUE;
296 }
297 else if (alternateTags != NULL) {
298 /*
299 * Parse the alternateTags string for the region.
300 */
301 char alternateRegion[ULOC_COUNTRY_CAPACITY];
302
303 const int32_t alternateRegionLength =
304 uloc_getCountry(
305 alternateTags,
306 alternateRegion,
307 sizeof(alternateRegion),
308 err);
309 if (U_FAILURE(*err) ||
310 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
311 goto error;
312 }
313 else if (alternateRegionLength > 0) {
314 appendTag(
315 alternateRegion,
316 alternateRegionLength,
317 tagBuffer,
318 &tagLength);
319
320 regionAppended = TRUE;
321 }
322 }
323
3d1f044b
A
324 /**
325 * Copy the partial tag from our internal buffer to the supplied
326 * target.
327 **/
328 sink.Append(tagBuffer, tagLength);
729e4ab9
A
329
330 if (trailingLength > 0) {
3d1f044b
A
331 if (*trailing != '@') {
332 sink.Append("_", 1);
333 if (!regionAppended) {
729e4ab9 334 /* extra separator is required */
3d1f044b 335 sink.Append("_", 1);
729e4ab9
A
336 }
337 }
338
3d1f044b
A
339 /*
340 * Copy the trailing data into the supplied buffer.
341 */
342 sink.Append(trailing, trailingLength);
729e4ab9
A
343 }
344
3d1f044b 345 return;
729e4ab9
A
346 }
347
348error:
349
350 /**
351 * An overflow indicates the locale ID passed in
352 * is ill-formed. If we got here, and there was
353 * no previous error, it's an implicit overflow.
354 **/
355 if (*err == U_BUFFER_OVERFLOW_ERROR ||
356 U_SUCCESS(*err)) {
357 *err = U_ILLEGAL_ARGUMENT_ERROR;
358 }
729e4ab9
A
359}
360
361/**
362 * Create a tag string from the supplied parameters. The lang, script and region
363 * parameters may be NULL pointers. If they are, their corresponding length parameters
364 * must be less than or equal to 0. If the lang parameter is an empty string, the
365 * default value for an unknown language is written to the output buffer.
366 *
367 * If the length of the new string exceeds the capacity of the output buffer,
368 * the function copies as many bytes to the output buffer as it can, and returns
369 * the error U_BUFFER_OVERFLOW_ERROR.
370 *
371 * If an illegal argument is provided, the function returns the error
372 * U_ILLEGAL_ARGUMENT_ERROR.
373 *
374 * @param lang The language tag to use.
375 * @param langLength The length of the language tag.
376 * @param script The script tag to use.
377 * @param scriptLength The length of the script tag.
378 * @param region The region tag to use.
379 * @param regionLength The length of the region tag.
380 * @param trailing Any trailing data to append to the new tag.
381 * @param trailingLength The length of the trailing data.
3d1f044b 382 * @param sink The output sink receiving the tag string.
729e4ab9 383 * @param err A pointer to a UErrorCode for error reporting.
729e4ab9 384 **/
3d1f044b 385static void U_CALLCONV
729e4ab9
A
386createTagString(
387 const char* lang,
388 int32_t langLength,
389 const char* script,
390 int32_t scriptLength,
391 const char* region,
392 int32_t regionLength,
393 const char* trailing,
394 int32_t trailingLength,
3d1f044b 395 icu::ByteSink& sink,
729e4ab9
A
396 UErrorCode* err)
397{
3d1f044b 398 createTagStringWithAlternates(
729e4ab9
A
399 lang,
400 langLength,
401 script,
402 scriptLength,
403 region,
404 regionLength,
405 trailing,
406 trailingLength,
407 NULL,
3d1f044b 408 sink,
729e4ab9
A
409 err);
410}
411
412/**
413 * Parse the language, script, and region subtags from a tag string, and copy the
414 * results into the corresponding output parameters. The buffers are null-terminated,
415 * unless overflow occurs.
416 *
417 * The langLength, scriptLength, and regionLength parameters are input/output
418 * parameters, and must contain the capacity of their corresponding buffers on
419 * input. On output, they will contain the actual length of the buffers, not
420 * including the null terminator.
421 *
422 * If the length of any of the output subtags exceeds the capacity of the corresponding
423 * buffer, the function copies as many bytes to the output buffer as it can, and returns
424 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
425 * occurs.
426 *
427 * If an illegal argument is provided, the function returns the error
428 * U_ILLEGAL_ARGUMENT_ERROR.
429 *
430 * @param localeID The locale ID to parse.
431 * @param lang The language tag buffer.
432 * @param langLength The length of the language tag.
433 * @param script The script tag buffer.
434 * @param scriptLength The length of the script tag.
435 * @param region The region tag buffer.
436 * @param regionLength The length of the region tag.
437 * @param err A pointer to a UErrorCode for error reporting.
438 * @return The number of chars of the localeID parameter consumed.
439 **/
440static int32_t U_CALLCONV
441parseTagString(
442 const char* localeID,
443 char* lang,
444 int32_t* langLength,
445 char* script,
446 int32_t* scriptLength,
447 char* region,
448 int32_t* regionLength,
449 UErrorCode* err)
450{
451 const char* position = localeID;
452 int32_t subtagLength = 0;
453
454 if(U_FAILURE(*err) ||
455 localeID == NULL ||
456 lang == NULL ||
457 langLength == NULL ||
458 script == NULL ||
459 scriptLength == NULL ||
460 region == NULL ||
461 regionLength == NULL) {
462 goto error;
463 }
464
465 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
466 u_terminateChars(lang, *langLength, subtagLength, err);
467
468 /*
469 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
470 * to be an error, because it indicates the user-supplied tag is
471 * not well-formed.
472 */
473 if(U_FAILURE(*err)) {
474 goto error;
475 }
476
477 *langLength = subtagLength;
478
479 /*
480 * If no language was present, use the value of unknownLanguage
481 * instead. Otherwise, move past any separator.
482 */
483 if (*langLength == 0) {
484 uprv_strcpy(
485 lang,
486 unknownLanguage);
487 *langLength = (int32_t)uprv_strlen(lang);
488 }
0f5d89e8 489 if (_isIDSeparator(*position)) {
729e4ab9
A
490 ++position;
491 }
492
493 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
494 u_terminateChars(script, *scriptLength, subtagLength, err);
495
496 if(U_FAILURE(*err)) {
497 goto error;
498 }
499
500 *scriptLength = subtagLength;
501
502 if (*scriptLength > 0) {
503 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
504 /**
505 * If the script part is the "unknown" script, then don't return it.
506 **/
507 *scriptLength = 0;
508 }
509
510 /*
511 * Move past any separator.
512 */
513 if (_isIDSeparator(*position)) {
514 ++position;
515 }
516 }
517
518 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
519 u_terminateChars(region, *regionLength, subtagLength, err);
520
521 if(U_FAILURE(*err)) {
522 goto error;
523 }
524
525 *regionLength = subtagLength;
526
527 if (*regionLength > 0) {
528 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
529 /**
530 * If the region part is the "unknown" region, then don't return it.
531 **/
532 *regionLength = 0;
533 }
534 } else if (*position != 0 && *position != '@') {
535 /* back up over consumed trailing separator */
536 --position;
537 }
538
539exit:
540
541 return (int32_t)(position - localeID);
542
543error:
544
545 /**
546 * If we get here, we have no explicit error, it's the result of an
547 * illegal argument.
548 **/
549 if (!U_FAILURE(*err)) {
550 *err = U_ILLEGAL_ARGUMENT_ERROR;
551 }
552
553 goto exit;
554}
555
3d1f044b 556static UBool U_CALLCONV
729e4ab9
A
557createLikelySubtagsString(
558 const char* lang,
559 int32_t langLength,
560 const char* script,
561 int32_t scriptLength,
562 const char* region,
563 int32_t regionLength,
564 const char* variants,
565 int32_t variantsLength,
3d1f044b
A
566 icu::ByteSink& sink,
567 UErrorCode* err) {
729e4ab9
A
568 /**
569 * ULOC_FULLNAME_CAPACITY will provide enough capacity
570 * that we can build a string that contains the language,
571 * script and region code without worrying about overrunning
572 * the user-supplied buffer.
573 **/
729e4ab9 574 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
729e4ab9
A
575
576 if(U_FAILURE(*err)) {
577 goto error;
578 }
579
580 /**
581 * Try the language with the script and region first.
582 **/
583 if (scriptLength > 0 && regionLength > 0) {
584
585 const char* likelySubtags = NULL;
586
3d1f044b
A
587 icu::CharString tagBuffer;
588 {
589 icu::CharStringByteSink sink(&tagBuffer);
590 createTagString(
591 lang,
592 langLength,
593 script,
594 scriptLength,
595 region,
596 regionLength,
597 NULL,
598 0,
599 sink,
600 err);
601 }
729e4ab9
A
602 if(U_FAILURE(*err)) {
603 goto error;
604 }
605
606 likelySubtags =
607 findLikelySubtags(
3d1f044b 608 tagBuffer.data(),
729e4ab9
A
609 likelySubtagsBuffer,
610 sizeof(likelySubtagsBuffer),
611 err);
612 if(U_FAILURE(*err)) {
613 goto error;
614 }
615
616 if (likelySubtags != NULL) {
617 /* Always use the language tag from the
618 maximal string, since it may be more
619 specific than the one provided. */
3d1f044b 620 createTagStringWithAlternates(
729e4ab9
A
621 NULL,
622 0,
623 NULL,
624 0,
625 NULL,
626 0,
627 variants,
628 variantsLength,
629 likelySubtags,
3d1f044b 630 sink,
729e4ab9 631 err);
3d1f044b 632 return TRUE;
729e4ab9
A
633 }
634 }
635
636 /**
637 * Try the language with just the script.
638 **/
639 if (scriptLength > 0) {
640
641 const char* likelySubtags = NULL;
642
3d1f044b
A
643 icu::CharString tagBuffer;
644 {
645 icu::CharStringByteSink sink(&tagBuffer);
646 createTagString(
647 lang,
648 langLength,
649 script,
650 scriptLength,
651 NULL,
652 0,
653 NULL,
654 0,
655 sink,
656 err);
657 }
729e4ab9
A
658 if(U_FAILURE(*err)) {
659 goto error;
660 }
661
662 likelySubtags =
663 findLikelySubtags(
3d1f044b 664 tagBuffer.data(),
729e4ab9
A
665 likelySubtagsBuffer,
666 sizeof(likelySubtagsBuffer),
667 err);
668 if(U_FAILURE(*err)) {
669 goto error;
670 }
671
672 if (likelySubtags != NULL) {
673 /* Always use the language tag from the
674 maximal string, since it may be more
675 specific than the one provided. */
3d1f044b 676 createTagStringWithAlternates(
729e4ab9
A
677 NULL,
678 0,
679 NULL,
680 0,
681 region,
682 regionLength,
683 variants,
684 variantsLength,
685 likelySubtags,
3d1f044b 686 sink,
729e4ab9 687 err);
3d1f044b 688 return TRUE;
729e4ab9
A
689 }
690 }
691
692 /**
693 * Try the language with just the region.
694 **/
695 if (regionLength > 0) {
696
697 const char* likelySubtags = NULL;
698
3d1f044b
A
699 icu::CharString tagBuffer;
700 {
701 icu::CharStringByteSink sink(&tagBuffer);
702 createTagString(
703 lang,
704 langLength,
705 NULL,
706 0,
707 region,
708 regionLength,
709 NULL,
710 0,
711 sink,
712 err);
713 }
729e4ab9
A
714 if(U_FAILURE(*err)) {
715 goto error;
716 }
717
718 likelySubtags =
719 findLikelySubtags(
3d1f044b 720 tagBuffer.data(),
729e4ab9
A
721 likelySubtagsBuffer,
722 sizeof(likelySubtagsBuffer),
723 err);
724 if(U_FAILURE(*err)) {
725 goto error;
726 }
727
728 if (likelySubtags != NULL) {
729 /* Always use the language tag from the
730 maximal string, since it may be more
731 specific than the one provided. */
3d1f044b 732 createTagStringWithAlternates(
729e4ab9
A
733 NULL,
734 0,
735 script,
736 scriptLength,
737 NULL,
738 0,
739 variants,
740 variantsLength,
741 likelySubtags,
3d1f044b 742 sink,
729e4ab9 743 err);
3d1f044b 744 return TRUE;
729e4ab9
A
745 }
746 }
747
748 /**
749 * Finally, try just the language.
750 **/
751 {
752 const char* likelySubtags = NULL;
753
3d1f044b
A
754 icu::CharString tagBuffer;
755 {
756 icu::CharStringByteSink sink(&tagBuffer);
757 createTagString(
758 lang,
759 langLength,
760 NULL,
761 0,
762 NULL,
763 0,
764 NULL,
765 0,
766 sink,
767 err);
768 }
729e4ab9
A
769 if(U_FAILURE(*err)) {
770 goto error;
771 }
772
773 likelySubtags =
774 findLikelySubtags(
3d1f044b 775 tagBuffer.data(),
729e4ab9
A
776 likelySubtagsBuffer,
777 sizeof(likelySubtagsBuffer),
778 err);
779 if(U_FAILURE(*err)) {
780 goto error;
781 }
782
783 if (likelySubtags != NULL) {
784 /* Always use the language tag from the
785 maximal string, since it may be more
786 specific than the one provided. */
3d1f044b 787 createTagStringWithAlternates(
729e4ab9
A
788 NULL,
789 0,
790 script,
791 scriptLength,
792 region,
793 regionLength,
794 variants,
795 variantsLength,
796 likelySubtags,
3d1f044b 797 sink,
729e4ab9 798 err);
3d1f044b 799 return TRUE;
729e4ab9
A
800 }
801 }
802
3d1f044b 803 return FALSE;
729e4ab9
A
804
805error:
806
807 if (!U_FAILURE(*err)) {
808 *err = U_ILLEGAL_ARGUMENT_ERROR;
809 }
810
3d1f044b 811 return FALSE;
729e4ab9
A
812}
813
814#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
815 { int32_t count = 0; \
816 int32_t i; \
817 for (i = 0; i < trailingLength; i++) { \
818 if (trailing[i] == '-' || trailing[i] == '_') { \
819 count = 0; \
820 if (count > 8) { \
821 goto error; \
822 } \
823 } else if (trailing[i] == '@') { \
824 break; \
825 } else if (count > 8) { \
826 goto error; \
827 } else { \
828 count++; \
829 } \
830 } \
831 }
832
3d1f044b
A
833static void
834_uloc_addLikelySubtags(const char* localeID,
835 icu::ByteSink& sink,
836 UErrorCode* err) {
729e4ab9
A
837 char lang[ULOC_LANG_CAPACITY];
838 int32_t langLength = sizeof(lang);
839 char script[ULOC_SCRIPT_CAPACITY];
840 int32_t scriptLength = sizeof(script);
841 char region[ULOC_COUNTRY_CAPACITY];
842 int32_t regionLength = sizeof(region);
843 const char* trailing = "";
844 int32_t trailingLength = 0;
845 int32_t trailingIndex = 0;
3d1f044b 846 UBool success = FALSE;
729e4ab9
A
847
848 if(U_FAILURE(*err)) {
849 goto error;
850 }
3d1f044b 851 if (localeID == NULL) {
729e4ab9
A
852 goto error;
853 }
854
855 trailingIndex = parseTagString(
856 localeID,
857 lang,
858 &langLength,
859 script,
860 &scriptLength,
861 region,
862 &regionLength,
863 err);
864 if(U_FAILURE(*err)) {
865 /* Overflow indicates an illegal argument error */
866 if (*err == U_BUFFER_OVERFLOW_ERROR) {
867 *err = U_ILLEGAL_ARGUMENT_ERROR;
868 }
869
870 goto error;
871 }
872
873 /* Find the length of the trailing portion. */
874 while (_isIDSeparator(localeID[trailingIndex])) {
875 trailingIndex++;
876 }
877 trailing = &localeID[trailingIndex];
878 trailingLength = (int32_t)uprv_strlen(trailing);
879
880 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
881
3d1f044b 882 success =
729e4ab9
A
883 createLikelySubtagsString(
884 lang,
885 langLength,
886 script,
887 scriptLength,
888 region,
889 regionLength,
890 trailing,
891 trailingLength,
3d1f044b 892 sink,
729e4ab9
A
893 err);
894
3d1f044b 895 if (!success) {
729e4ab9
A
896 const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
897
898 /*
899 * If we get here, we need to return localeID.
900 */
3d1f044b 901 sink.Append(localeID, localIDLength);
729e4ab9
A
902 }
903
3d1f044b 904 return;
729e4ab9
A
905
906error:
907
908 if (!U_FAILURE(*err)) {
909 *err = U_ILLEGAL_ARGUMENT_ERROR;
910 }
729e4ab9
A
911}
912
3d1f044b
A
913static void
914_uloc_minimizeSubtags(const char* localeID,
915 icu::ByteSink& sink,
916 UErrorCode* err) {
917 icu::CharString maximizedTagBuffer;
729e4ab9
A
918
919 char lang[ULOC_LANG_CAPACITY];
920 int32_t langLength = sizeof(lang);
921 char script[ULOC_SCRIPT_CAPACITY];
922 int32_t scriptLength = sizeof(script);
923 char region[ULOC_COUNTRY_CAPACITY];
924 int32_t regionLength = sizeof(region);
925 const char* trailing = "";
926 int32_t trailingLength = 0;
927 int32_t trailingIndex = 0;
928
929 if(U_FAILURE(*err)) {
930 goto error;
931 }
3d1f044b 932 else if (localeID == NULL) {
729e4ab9
A
933 goto error;
934 }
935
936 trailingIndex =
937 parseTagString(
938 localeID,
939 lang,
940 &langLength,
941 script,
942 &scriptLength,
943 region,
944 &regionLength,
945 err);
946 if(U_FAILURE(*err)) {
947
948 /* Overflow indicates an illegal argument error */
949 if (*err == U_BUFFER_OVERFLOW_ERROR) {
950 *err = U_ILLEGAL_ARGUMENT_ERROR;
951 }
952
953 goto error;
954 }
955
956 /* Find the spot where the variants or the keywords begin, if any. */
957 while (_isIDSeparator(localeID[trailingIndex])) {
958 trailingIndex++;
959 }
960 trailing = &localeID[trailingIndex];
961 trailingLength = (int32_t)uprv_strlen(trailing);
962
963 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
964
3d1f044b
A
965 {
966 icu::CharString base;
967 {
968 icu::CharStringByteSink sink(&base);
969 createTagString(
970 lang,
971 langLength,
972 script,
973 scriptLength,
974 region,
975 regionLength,
976 NULL,
977 0,
978 sink,
979 err);
980 }
729e4ab9 981
3d1f044b
A
982 /**
983 * First, we need to first get the maximization
984 * from AddLikelySubtags.
985 **/
986 {
987 icu::CharStringByteSink sink(&maximizedTagBuffer);
988 ulocimp_addLikelySubtags(base.data(), sink, err);
989 }
990 }
729e4ab9
A
991
992 if(U_FAILURE(*err)) {
993 goto error;
994 }
995
996 /**
997 * Start first with just the language.
998 **/
999 {
3d1f044b
A
1000 icu::CharString tagBuffer;
1001 {
1002 icu::CharStringByteSink sink(&tagBuffer);
729e4ab9
A
1003 createLikelySubtagsString(
1004 lang,
1005 langLength,
1006 NULL,
1007 0,
1008 NULL,
1009 0,
1010 NULL,
1011 0,
3d1f044b 1012 sink,
729e4ab9 1013 err);
3d1f044b 1014 }
729e4ab9
A
1015
1016 if(U_FAILURE(*err)) {
1017 goto error;
1018 }
1019 else if (uprv_strnicmp(
3d1f044b
A
1020 maximizedTagBuffer.data(),
1021 tagBuffer.data(),
1022 tagBuffer.length()) == 0) {
729e4ab9 1023
3d1f044b 1024 createTagString(
729e4ab9
A
1025 lang,
1026 langLength,
1027 NULL,
1028 0,
1029 NULL,
1030 0,
1031 trailing,
1032 trailingLength,
3d1f044b 1033 sink,
729e4ab9 1034 err);
3d1f044b 1035 return;
729e4ab9
A
1036 }
1037 }
1038
1039 /**
1040 * Next, try the language and region.
1041 **/
1042 if (regionLength > 0) {
1043
3d1f044b
A
1044 icu::CharString tagBuffer;
1045 {
1046 icu::CharStringByteSink sink(&tagBuffer);
729e4ab9
A
1047 createLikelySubtagsString(
1048 lang,
1049 langLength,
1050 NULL,
1051 0,
1052 region,
1053 regionLength,
1054 NULL,
1055 0,
3d1f044b 1056 sink,
729e4ab9 1057 err);
3d1f044b 1058 }
729e4ab9
A
1059
1060 if(U_FAILURE(*err)) {
1061 goto error;
1062 }
1063 else if (uprv_strnicmp(
3d1f044b
A
1064 maximizedTagBuffer.data(),
1065 tagBuffer.data(),
1066 tagBuffer.length()) == 0) {
729e4ab9 1067
3d1f044b 1068 createTagString(
729e4ab9
A
1069 lang,
1070 langLength,
1071 NULL,
1072 0,
1073 region,
1074 regionLength,
1075 trailing,
1076 trailingLength,
3d1f044b 1077 sink,
729e4ab9 1078 err);
3d1f044b 1079 return;
729e4ab9
A
1080 }
1081 }
1082
1083 /**
1084 * Finally, try the language and script. This is our last chance,
1085 * since trying with all three subtags would only yield the
1086 * maximal version that we already have.
1087 **/
1088 if (scriptLength > 0 && regionLength > 0) {
3d1f044b
A
1089 icu::CharString tagBuffer;
1090 {
1091 icu::CharStringByteSink sink(&tagBuffer);
729e4ab9
A
1092 createLikelySubtagsString(
1093 lang,
1094 langLength,
1095 script,
1096 scriptLength,
1097 NULL,
1098 0,
1099 NULL,
1100 0,
3d1f044b 1101 sink,
729e4ab9 1102 err);
3d1f044b 1103 }
729e4ab9
A
1104
1105 if(U_FAILURE(*err)) {
1106 goto error;
1107 }
1108 else if (uprv_strnicmp(
3d1f044b
A
1109 maximizedTagBuffer.data(),
1110 tagBuffer.data(),
1111 tagBuffer.length()) == 0) {
729e4ab9 1112
3d1f044b 1113 createTagString(
729e4ab9
A
1114 lang,
1115 langLength,
1116 script,
1117 scriptLength,
1118 NULL,
1119 0,
1120 trailing,
1121 trailingLength,
3d1f044b 1122 sink,
729e4ab9 1123 err);
3d1f044b 1124 return;
729e4ab9
A
1125 }
1126 }
1127
1128 {
1129 /**
1130 * If we got here, return the locale ID parameter.
1131 **/
1132 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
3d1f044b
A
1133 sink.Append(localeID, localeIDLength);
1134 return;
729e4ab9
A
1135 }
1136
1137error:
1138
1139 if (!U_FAILURE(*err)) {
1140 *err = U_ILLEGAL_ARGUMENT_ERROR;
1141 }
729e4ab9
A
1142}
1143
1144static UBool
1145do_canonicalize(const char* localeID,
1146 char* buffer,
1147 int32_t bufferCapacity,
1148 UErrorCode* err)
1149{
1150 uloc_canonicalize(
1151 localeID,
1152 buffer,
1153 bufferCapacity,
1154 err);
1155
1156 if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1157 *err == U_BUFFER_OVERFLOW_ERROR) {
1158 *err = U_ILLEGAL_ARGUMENT_ERROR;
1159
1160 return FALSE;
1161 }
1162 else if (U_FAILURE(*err)) {
1163
1164 return FALSE;
1165 }
1166 else {
1167 return TRUE;
1168 }
1169}
1170
51004dcb 1171U_CAPI int32_t U_EXPORT2
3d1f044b
A
1172uloc_addLikelySubtags(const char* localeID,
1173 char* maximizedLocaleID,
1174 int32_t maximizedLocaleIDCapacity,
1175 UErrorCode* status) {
1176 if (U_FAILURE(*status)) {
1177 return 0;
1178 }
1179
1180 icu::CheckedArrayByteSink sink(
1181 maximizedLocaleID, maximizedLocaleIDCapacity);
1182
1183 ulocimp_addLikelySubtags(localeID, sink, status);
1184 int32_t reslen = sink.NumberOfBytesAppended();
1185
1186 if (U_FAILURE(*status)) {
1187 return sink.Overflowed() ? reslen : -1;
1188 }
1189
1190 if (sink.Overflowed()) {
1191 *status = U_BUFFER_OVERFLOW_ERROR;
1192 } else {
1193 u_terminateChars(
1194 maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
1195 }
1196
1197 return reslen;
1198}
1199
1200U_CAPI void U_EXPORT2
1201ulocimp_addLikelySubtags(const char* localeID,
1202 icu::ByteSink& sink,
1203 UErrorCode* status) {
729e4ab9
A
1204 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1205
3d1f044b
A
1206 if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1207 _uloc_addLikelySubtags(localeBuffer, sink, status);
729e4ab9 1208 }
729e4ab9
A
1209}
1210
51004dcb 1211U_CAPI int32_t U_EXPORT2
3d1f044b
A
1212uloc_minimizeSubtags(const char* localeID,
1213 char* minimizedLocaleID,
1214 int32_t minimizedLocaleIDCapacity,
1215 UErrorCode* status) {
1216 if (U_FAILURE(*status)) {
1217 return 0;
1218 }
1219
1220 icu::CheckedArrayByteSink sink(
1221 minimizedLocaleID, minimizedLocaleIDCapacity);
1222
1223 ulocimp_minimizeSubtags(localeID, sink, status);
1224 int32_t reslen = sink.NumberOfBytesAppended();
1225
1226 if (U_FAILURE(*status)) {
1227 return sink.Overflowed() ? reslen : -1;
1228 }
1229
1230 if (sink.Overflowed()) {
1231 *status = U_BUFFER_OVERFLOW_ERROR;
1232 } else {
1233 u_terminateChars(
1234 minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
1235 }
1236
1237 return reslen;
1238}
1239
1240U_CAPI void U_EXPORT2
1241ulocimp_minimizeSubtags(const char* localeID,
1242 icu::ByteSink& sink,
1243 UErrorCode* status) {
729e4ab9
A
1244 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1245
3d1f044b
A
1246 if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1247 _uloc_minimizeSubtags(localeBuffer, sink, status);
729e4ab9 1248 }
729e4ab9 1249}
b331163b
A
1250
1251// Pairs of (language subtag, + or -) for finding out fast if common languages
1252// are LTR (minus) or RTL (plus).
0f5d89e8 1253static const char LANG_DIR_STRING[] =
b331163b
A
1254 "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1255
3d1f044b 1256// Implemented here because this calls ulocimp_addLikelySubtags().
b331163b
A
1257U_CAPI UBool U_EXPORT2
1258uloc_isRightToLeft(const char *locale) {
1259 UErrorCode errorCode = U_ZERO_ERROR;
1260 char script[8];
1261 int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1262 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1263 scriptLength == 0) {
1264 // Fastpath: We know the likely scripts and their writing direction
1265 // for some common languages.
1266 errorCode = U_ZERO_ERROR;
1267 char lang[8];
1268 int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
3d1f044b 1269 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
b331163b
A
1270 return FALSE;
1271 }
3d1f044b
A
1272 if (langLength > 0) {
1273 const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1274 if (langPtr != NULL) {
1275 switch (langPtr[langLength]) {
1276 case '-': return FALSE;
1277 case '+': return TRUE;
1278 default: break; // partial match of a longer code
1279 }
b331163b
A
1280 }
1281 }
1282 // Otherwise, find the likely script.
1283 errorCode = U_ZERO_ERROR;
3d1f044b
A
1284 icu::CharString likely;
1285 {
1286 icu::CharStringByteSink sink(&likely);
1287 ulocimp_addLikelySubtags(locale, sink, &errorCode);
1288 }
b331163b
A
1289 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1290 return FALSE;
1291 }
3d1f044b 1292 scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
b331163b
A
1293 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1294 scriptLength == 0) {
1295 return FALSE;
1296 }
1297 }
3d1f044b
A
1298 if (uprv_strcmp(script,"Aran")==0) {
1299 uprv_strcpy(script,"Arab"); // The properties functions do not understand Aran
1300 }
b331163b
A
1301 UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1302 return uscript_isRightToLeft(scriptCode);
1303}
1304
1305U_NAMESPACE_BEGIN
1306
1307UBool
1308Locale::isRightToLeft() const {
1309 return uloc_isRightToLeft(getBaseName());
1310}
1311
f3c0d7a5
A
1312U_NAMESPACE_END
1313
2ca993e8
A
1314// The following must at least allow for rg key value (6) plus terminator (1).
1315#define ULOC_RG_BUFLEN 8
1316
1317U_CAPI int32_t U_EXPORT2
1318ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1319 char *region, int32_t regionCapacity, UErrorCode* status) {
1320 if (U_FAILURE(*status)) {
1321 return 0;
1322 }
1323 char rgBuf[ULOC_RG_BUFLEN];
1324 UErrorCode rgStatus = U_ZERO_ERROR;
1325
1326 // First check for rg keyword value
1327 int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1328 if (U_FAILURE(rgStatus) || rgLen != 6) {
1329 rgLen = 0;
1330 } else {
1331 // rgBuf guaranteed to be zero terminated here, with text len 6
1332 char *rgPtr = rgBuf;
1333 for (; *rgPtr!= 0; rgPtr++) {
1334 *rgPtr = uprv_toupper(*rgPtr);
1335 }
1336 rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1337 }
1338
1339 if (rgLen == 0) {
1340 // No valid rg keyword value, try for unicode_region_subtag
1341 rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1342 if (U_FAILURE(*status)) {
1343 rgLen = 0;
1344 } else if (rgLen == 0 && inferRegion) {
1345 // no unicode_region_subtag but inferRegion TRUE, try likely subtags
2ca993e8 1346 rgStatus = U_ZERO_ERROR;
3d1f044b
A
1347 icu::CharString locBuf;
1348 {
1349 icu::CharStringByteSink sink(&locBuf);
1350 ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
1351 }
2ca993e8 1352 if (U_SUCCESS(rgStatus)) {
3d1f044b 1353 rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
2ca993e8
A
1354 if (U_FAILURE(*status)) {
1355 rgLen = 0;
1356 }
1357 }
1358 }
1359 }
1360
1361 rgBuf[rgLen] = 0;
1362 uprv_strncpy(region, rgBuf, regionCapacity);
1363 return u_terminateChars(region, regionCapacity, rgLen, status);
1364}
1365