]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/common/loclikely.cpp
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / common / loclikely.cpp
... / ...
CommitLineData
1/*
2*******************************************************************************
3*
4* Copyright (C) 1997-2016, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: loclikely.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2010feb25
14* created by: Markus W. Scherer
15*
16* Code for likely and minimized locale subtags, separated out from other .cpp files
17* that then do not depend on resource bundle code and likely-subtags data.
18*/
19
20#include "unicode/utypes.h"
21#include "unicode/locid.h"
22#include "unicode/putil.h"
23#include "unicode/uloc.h"
24#include "unicode/ures.h"
25#include "unicode/uscript.h"
26#include "cmemory.h"
27#include "cstring.h"
28#include "ulocimp.h"
29#include "ustr_imp.h"
30
31/**
32 * This function looks for the localeID in the likelySubtags resource.
33 *
34 * @param localeID The tag to find.
35 * @param buffer A buffer to hold the matching entry
36 * @param bufferLength The length of the output buffer
37 * @return A pointer to "buffer" if found, or a null pointer if not.
38 */
39static const char* U_CALLCONV
40findLikelySubtags(const char* localeID,
41 char* buffer,
42 int32_t bufferLength,
43 UErrorCode* err) {
44 const char* result = NULL;
45
46 if (!U_FAILURE(*err)) {
47 int32_t resLen = 0;
48 const UChar* s = NULL;
49 UErrorCode tmpErr = U_ZERO_ERROR;
50 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
51 if (U_SUCCESS(tmpErr)) {
52 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
53
54 if (U_FAILURE(tmpErr)) {
55 /*
56 * If a resource is missing, it's not really an error, it's
57 * just that we don't have any data for that particular locale ID.
58 */
59 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
60 *err = tmpErr;
61 }
62 }
63 else if (resLen >= bufferLength) {
64 /* The buffer should never overflow. */
65 *err = U_INTERNAL_PROGRAM_ERROR;
66 }
67 else {
68 u_UCharsToChars(s, buffer, resLen + 1);
69 result = buffer;
70 }
71
72 ures_close(subtags);
73 } else {
74 *err = tmpErr;
75 }
76 }
77
78 return result;
79}
80
81/**
82 * Append a tag to a buffer, adding the separator if necessary. The buffer
83 * must be large enough to contain the resulting tag plus any separator
84 * necessary. The tag must not be a zero-length string.
85 *
86 * @param tag The tag to add.
87 * @param tagLength The length of the tag.
88 * @param buffer The output buffer.
89 * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
90 **/
91static void U_CALLCONV
92appendTag(
93 const char* tag,
94 int32_t tagLength,
95 char* buffer,
96 int32_t* bufferLength) {
97
98 if (*bufferLength > 0) {
99 buffer[*bufferLength] = '_';
100 ++(*bufferLength);
101 }
102
103 uprv_memmove(
104 &buffer[*bufferLength],
105 tag,
106 tagLength);
107
108 *bufferLength += tagLength;
109}
110
111/**
112 * These are the canonical strings for unknown languages, scripts and regions.
113 **/
114static const char* const unknownLanguage = "und";
115static const char* const unknownScript = "Zzzz";
116static const char* const unknownRegion = "ZZ";
117
118/**
119 * Create a tag string from the supplied parameters. The lang, script and region
120 * parameters may be NULL pointers. If they are, their corresponding length parameters
121 * must be less than or equal to 0.
122 *
123 * If any of the language, script or region parameters are empty, and the alternateTags
124 * parameter is not NULL, it will be parsed for potential language, script and region tags
125 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
126 * it contains no language tag, the default tag for the unknown language is used.
127 *
128 * If the length of the new string exceeds the capacity of the output buffer,
129 * the function copies as many bytes to the output buffer as it can, and returns
130 * the error U_BUFFER_OVERFLOW_ERROR.
131 *
132 * If an illegal argument is provided, the function returns the error
133 * U_ILLEGAL_ARGUMENT_ERROR.
134 *
135 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
136 * the tag string fits in the output buffer, but the null terminator doesn't.
137 *
138 * @param lang The language tag to use.
139 * @param langLength The length of the language tag.
140 * @param script The script tag to use.
141 * @param scriptLength The length of the script tag.
142 * @param region The region tag to use.
143 * @param regionLength The length of the region tag.
144 * @param trailing Any trailing data to append to the new tag.
145 * @param trailingLength The length of the trailing data.
146 * @param alternateTags A string containing any alternate tags.
147 * @param tag The output buffer.
148 * @param tagCapacity The capacity of the output buffer.
149 * @param err A pointer to a UErrorCode for error reporting.
150 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
151 **/
152static int32_t U_CALLCONV
153createTagStringWithAlternates(
154 const char* lang,
155 int32_t langLength,
156 const char* script,
157 int32_t scriptLength,
158 const char* region,
159 int32_t regionLength,
160 const char* trailing,
161 int32_t trailingLength,
162 const char* alternateTags,
163 char* tag,
164 int32_t tagCapacity,
165 UErrorCode* err) {
166
167 if (U_FAILURE(*err)) {
168 goto error;
169 }
170 else if (tag == NULL ||
171 tagCapacity <= 0 ||
172 langLength >= ULOC_LANG_CAPACITY ||
173 scriptLength >= ULOC_SCRIPT_CAPACITY ||
174 regionLength >= ULOC_COUNTRY_CAPACITY) {
175 goto error;
176 }
177 else {
178 /**
179 * ULOC_FULLNAME_CAPACITY will provide enough capacity
180 * that we can build a string that contains the language,
181 * script and region code without worrying about overrunning
182 * the user-supplied buffer.
183 **/
184 char tagBuffer[ULOC_FULLNAME_CAPACITY];
185 int32_t tagLength = 0;
186 int32_t capacityRemaining = tagCapacity;
187 UBool regionAppended = FALSE;
188
189 if (langLength > 0) {
190 appendTag(
191 lang,
192 langLength,
193 tagBuffer,
194 &tagLength);
195 }
196 else if (alternateTags == NULL) {
197 /*
198 * Append the value for an unknown language, if
199 * we found no language.
200 */
201 appendTag(
202 unknownLanguage,
203 (int32_t)uprv_strlen(unknownLanguage),
204 tagBuffer,
205 &tagLength);
206 }
207 else {
208 /*
209 * Parse the alternateTags string for the language.
210 */
211 char alternateLang[ULOC_LANG_CAPACITY];
212 int32_t alternateLangLength = sizeof(alternateLang);
213
214 alternateLangLength =
215 uloc_getLanguage(
216 alternateTags,
217 alternateLang,
218 alternateLangLength,
219 err);
220 if(U_FAILURE(*err) ||
221 alternateLangLength >= ULOC_LANG_CAPACITY) {
222 goto error;
223 }
224 else if (alternateLangLength == 0) {
225 /*
226 * Append the value for an unknown language, if
227 * we found no language.
228 */
229 appendTag(
230 unknownLanguage,
231 (int32_t)uprv_strlen(unknownLanguage),
232 tagBuffer,
233 &tagLength);
234 }
235 else {
236 appendTag(
237 alternateLang,
238 alternateLangLength,
239 tagBuffer,
240 &tagLength);
241 }
242 }
243
244 if (scriptLength > 0) {
245 appendTag(
246 script,
247 scriptLength,
248 tagBuffer,
249 &tagLength);
250 }
251 else if (alternateTags != NULL) {
252 /*
253 * Parse the alternateTags string for the script.
254 */
255 char alternateScript[ULOC_SCRIPT_CAPACITY];
256
257 const int32_t alternateScriptLength =
258 uloc_getScript(
259 alternateTags,
260 alternateScript,
261 sizeof(alternateScript),
262 err);
263
264 if (U_FAILURE(*err) ||
265 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
266 goto error;
267 }
268 else if (alternateScriptLength > 0) {
269 appendTag(
270 alternateScript,
271 alternateScriptLength,
272 tagBuffer,
273 &tagLength);
274 }
275 }
276
277 if (regionLength > 0) {
278 appendTag(
279 region,
280 regionLength,
281 tagBuffer,
282 &tagLength);
283
284 regionAppended = TRUE;
285 }
286 else if (alternateTags != NULL) {
287 /*
288 * Parse the alternateTags string for the region.
289 */
290 char alternateRegion[ULOC_COUNTRY_CAPACITY];
291
292 const int32_t alternateRegionLength =
293 uloc_getCountry(
294 alternateTags,
295 alternateRegion,
296 sizeof(alternateRegion),
297 err);
298 if (U_FAILURE(*err) ||
299 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
300 goto error;
301 }
302 else if (alternateRegionLength > 0) {
303 appendTag(
304 alternateRegion,
305 alternateRegionLength,
306 tagBuffer,
307 &tagLength);
308
309 regionAppended = TRUE;
310 }
311 }
312
313 {
314 const int32_t toCopy =
315 tagLength >= tagCapacity ? tagCapacity : tagLength;
316
317 /**
318 * Copy the partial tag from our internal buffer to the supplied
319 * target.
320 **/
321 uprv_memcpy(
322 tag,
323 tagBuffer,
324 toCopy);
325
326 capacityRemaining -= toCopy;
327 }
328
329 if (trailingLength > 0) {
330 if (*trailing != '@' && capacityRemaining > 0) {
331 tag[tagLength++] = '_';
332 --capacityRemaining;
333 if (capacityRemaining > 0 && !regionAppended) {
334 /* extra separator is required */
335 tag[tagLength++] = '_';
336 --capacityRemaining;
337 }
338 }
339
340 if (capacityRemaining > 0) {
341 /*
342 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we
343 * don't know if the user-supplied buffers overlap.
344 */
345 const int32_t toCopy =
346 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
347
348 uprv_memmove(
349 &tag[tagLength],
350 trailing,
351 toCopy);
352 }
353 }
354
355 tagLength += trailingLength;
356
357 return u_terminateChars(
358 tag,
359 tagCapacity,
360 tagLength,
361 err);
362 }
363
364error:
365
366 /**
367 * An overflow indicates the locale ID passed in
368 * is ill-formed. If we got here, and there was
369 * no previous error, it's an implicit overflow.
370 **/
371 if (*err == U_BUFFER_OVERFLOW_ERROR ||
372 U_SUCCESS(*err)) {
373 *err = U_ILLEGAL_ARGUMENT_ERROR;
374 }
375
376 return -1;
377}
378
379/**
380 * Create a tag string from the supplied parameters. The lang, script and region
381 * parameters may be NULL pointers. If they are, their corresponding length parameters
382 * must be less than or equal to 0. If the lang parameter is an empty string, the
383 * default value for an unknown language is written to the output buffer.
384 *
385 * If the length of the new string exceeds the capacity of the output buffer,
386 * the function copies as many bytes to the output buffer as it can, and returns
387 * the error U_BUFFER_OVERFLOW_ERROR.
388 *
389 * If an illegal argument is provided, the function returns the error
390 * U_ILLEGAL_ARGUMENT_ERROR.
391 *
392 * @param lang The language tag to use.
393 * @param langLength The length of the language tag.
394 * @param script The script tag to use.
395 * @param scriptLength The length of the script tag.
396 * @param region The region tag to use.
397 * @param regionLength The length of the region tag.
398 * @param trailing Any trailing data to append to the new tag.
399 * @param trailingLength The length of the trailing data.
400 * @param tag The output buffer.
401 * @param tagCapacity The capacity of the output buffer.
402 * @param err A pointer to a UErrorCode for error reporting.
403 * @return The length of the tag string, which may be greater than tagCapacity.
404 **/
405static int32_t U_CALLCONV
406createTagString(
407 const char* lang,
408 int32_t langLength,
409 const char* script,
410 int32_t scriptLength,
411 const char* region,
412 int32_t regionLength,
413 const char* trailing,
414 int32_t trailingLength,
415 char* tag,
416 int32_t tagCapacity,
417 UErrorCode* err)
418{
419 return createTagStringWithAlternates(
420 lang,
421 langLength,
422 script,
423 scriptLength,
424 region,
425 regionLength,
426 trailing,
427 trailingLength,
428 NULL,
429 tag,
430 tagCapacity,
431 err);
432}
433
434/**
435 * Parse the language, script, and region subtags from a tag string, and copy the
436 * results into the corresponding output parameters. The buffers are null-terminated,
437 * unless overflow occurs.
438 *
439 * The langLength, scriptLength, and regionLength parameters are input/output
440 * parameters, and must contain the capacity of their corresponding buffers on
441 * input. On output, they will contain the actual length of the buffers, not
442 * including the null terminator.
443 *
444 * If the length of any of the output subtags exceeds the capacity of the corresponding
445 * buffer, the function copies as many bytes to the output buffer as it can, and returns
446 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
447 * occurs.
448 *
449 * If an illegal argument is provided, the function returns the error
450 * U_ILLEGAL_ARGUMENT_ERROR.
451 *
452 * @param localeID The locale ID to parse.
453 * @param lang The language tag buffer.
454 * @param langLength The length of the language tag.
455 * @param script The script tag buffer.
456 * @param scriptLength The length of the script tag.
457 * @param region The region tag buffer.
458 * @param regionLength The length of the region tag.
459 * @param err A pointer to a UErrorCode for error reporting.
460 * @return The number of chars of the localeID parameter consumed.
461 **/
462static int32_t U_CALLCONV
463parseTagString(
464 const char* localeID,
465 char* lang,
466 int32_t* langLength,
467 char* script,
468 int32_t* scriptLength,
469 char* region,
470 int32_t* regionLength,
471 UErrorCode* err)
472{
473 const char* position = localeID;
474 int32_t subtagLength = 0;
475
476 if(U_FAILURE(*err) ||
477 localeID == NULL ||
478 lang == NULL ||
479 langLength == NULL ||
480 script == NULL ||
481 scriptLength == NULL ||
482 region == NULL ||
483 regionLength == NULL) {
484 goto error;
485 }
486
487 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
488 u_terminateChars(lang, *langLength, subtagLength, err);
489
490 /*
491 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
492 * to be an error, because it indicates the user-supplied tag is
493 * not well-formed.
494 */
495 if(U_FAILURE(*err)) {
496 goto error;
497 }
498
499 *langLength = subtagLength;
500
501 /*
502 * If no language was present, use the value of unknownLanguage
503 * instead. Otherwise, move past any separator.
504 */
505 if (*langLength == 0) {
506 uprv_strcpy(
507 lang,
508 unknownLanguage);
509 *langLength = (int32_t)uprv_strlen(lang);
510 }
511 else if (_isIDSeparator(*position)) {
512 ++position;
513 }
514
515 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
516 u_terminateChars(script, *scriptLength, subtagLength, err);
517
518 if(U_FAILURE(*err)) {
519 goto error;
520 }
521
522 *scriptLength = subtagLength;
523
524 if (*scriptLength > 0) {
525 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
526 /**
527 * If the script part is the "unknown" script, then don't return it.
528 **/
529 *scriptLength = 0;
530 }
531
532 /*
533 * Move past any separator.
534 */
535 if (_isIDSeparator(*position)) {
536 ++position;
537 }
538 }
539
540 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
541 u_terminateChars(region, *regionLength, subtagLength, err);
542
543 if(U_FAILURE(*err)) {
544 goto error;
545 }
546
547 *regionLength = subtagLength;
548
549 if (*regionLength > 0) {
550 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
551 /**
552 * If the region part is the "unknown" region, then don't return it.
553 **/
554 *regionLength = 0;
555 }
556 } else if (*position != 0 && *position != '@') {
557 /* back up over consumed trailing separator */
558 --position;
559 }
560
561exit:
562
563 return (int32_t)(position - localeID);
564
565error:
566
567 /**
568 * If we get here, we have no explicit error, it's the result of an
569 * illegal argument.
570 **/
571 if (!U_FAILURE(*err)) {
572 *err = U_ILLEGAL_ARGUMENT_ERROR;
573 }
574
575 goto exit;
576}
577
578static int32_t U_CALLCONV
579createLikelySubtagsString(
580 const char* lang,
581 int32_t langLength,
582 const char* script,
583 int32_t scriptLength,
584 const char* region,
585 int32_t regionLength,
586 const char* variants,
587 int32_t variantsLength,
588 char* tag,
589 int32_t tagCapacity,
590 UErrorCode* err)
591{
592 /**
593 * ULOC_FULLNAME_CAPACITY will provide enough capacity
594 * that we can build a string that contains the language,
595 * script and region code without worrying about overrunning
596 * the user-supplied buffer.
597 **/
598 char tagBuffer[ULOC_FULLNAME_CAPACITY];
599 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
600
601 if(U_FAILURE(*err)) {
602 goto error;
603 }
604
605 /**
606 * Try the language with the script and region first.
607 **/
608 if (scriptLength > 0 && regionLength > 0) {
609
610 const char* likelySubtags = NULL;
611
612 createTagString(
613 lang,
614 langLength,
615 script,
616 scriptLength,
617 region,
618 regionLength,
619 NULL,
620 0,
621 tagBuffer,
622 sizeof(tagBuffer),
623 err);
624 if(U_FAILURE(*err)) {
625 goto error;
626 }
627
628 likelySubtags =
629 findLikelySubtags(
630 tagBuffer,
631 likelySubtagsBuffer,
632 sizeof(likelySubtagsBuffer),
633 err);
634 if(U_FAILURE(*err)) {
635 goto error;
636 }
637
638 if (likelySubtags != NULL) {
639 /* Always use the language tag from the
640 maximal string, since it may be more
641 specific than the one provided. */
642 return createTagStringWithAlternates(
643 NULL,
644 0,
645 NULL,
646 0,
647 NULL,
648 0,
649 variants,
650 variantsLength,
651 likelySubtags,
652 tag,
653 tagCapacity,
654 err);
655 }
656 }
657
658 /**
659 * Try the language with just the script.
660 **/
661 if (scriptLength > 0) {
662
663 const char* likelySubtags = NULL;
664
665 createTagString(
666 lang,
667 langLength,
668 script,
669 scriptLength,
670 NULL,
671 0,
672 NULL,
673 0,
674 tagBuffer,
675 sizeof(tagBuffer),
676 err);
677 if(U_FAILURE(*err)) {
678 goto error;
679 }
680
681 likelySubtags =
682 findLikelySubtags(
683 tagBuffer,
684 likelySubtagsBuffer,
685 sizeof(likelySubtagsBuffer),
686 err);
687 if(U_FAILURE(*err)) {
688 goto error;
689 }
690
691 if (likelySubtags != NULL) {
692 /* Always use the language tag from the
693 maximal string, since it may be more
694 specific than the one provided. */
695 return createTagStringWithAlternates(
696 NULL,
697 0,
698 NULL,
699 0,
700 region,
701 regionLength,
702 variants,
703 variantsLength,
704 likelySubtags,
705 tag,
706 tagCapacity,
707 err);
708 }
709 }
710
711 /**
712 * Try the language with just the region.
713 **/
714 if (regionLength > 0) {
715
716 const char* likelySubtags = NULL;
717
718 createTagString(
719 lang,
720 langLength,
721 NULL,
722 0,
723 region,
724 regionLength,
725 NULL,
726 0,
727 tagBuffer,
728 sizeof(tagBuffer),
729 err);
730 if(U_FAILURE(*err)) {
731 goto error;
732 }
733
734 likelySubtags =
735 findLikelySubtags(
736 tagBuffer,
737 likelySubtagsBuffer,
738 sizeof(likelySubtagsBuffer),
739 err);
740 if(U_FAILURE(*err)) {
741 goto error;
742 }
743
744 if (likelySubtags != NULL) {
745 /* Always use the language tag from the
746 maximal string, since it may be more
747 specific than the one provided. */
748 return createTagStringWithAlternates(
749 NULL,
750 0,
751 script,
752 scriptLength,
753 NULL,
754 0,
755 variants,
756 variantsLength,
757 likelySubtags,
758 tag,
759 tagCapacity,
760 err);
761 }
762 }
763
764 /**
765 * Finally, try just the language.
766 **/
767 {
768 const char* likelySubtags = NULL;
769
770 createTagString(
771 lang,
772 langLength,
773 NULL,
774 0,
775 NULL,
776 0,
777 NULL,
778 0,
779 tagBuffer,
780 sizeof(tagBuffer),
781 err);
782 if(U_FAILURE(*err)) {
783 goto error;
784 }
785
786 likelySubtags =
787 findLikelySubtags(
788 tagBuffer,
789 likelySubtagsBuffer,
790 sizeof(likelySubtagsBuffer),
791 err);
792 if(U_FAILURE(*err)) {
793 goto error;
794 }
795
796 if (likelySubtags != NULL) {
797 /* Always use the language tag from the
798 maximal string, since it may be more
799 specific than the one provided. */
800 return createTagStringWithAlternates(
801 NULL,
802 0,
803 script,
804 scriptLength,
805 region,
806 regionLength,
807 variants,
808 variantsLength,
809 likelySubtags,
810 tag,
811 tagCapacity,
812 err);
813 }
814 }
815
816 return u_terminateChars(
817 tag,
818 tagCapacity,
819 0,
820 err);
821
822error:
823
824 if (!U_FAILURE(*err)) {
825 *err = U_ILLEGAL_ARGUMENT_ERROR;
826 }
827
828 return -1;
829}
830
831#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
832 { int32_t count = 0; \
833 int32_t i; \
834 for (i = 0; i < trailingLength; i++) { \
835 if (trailing[i] == '-' || trailing[i] == '_') { \
836 count = 0; \
837 if (count > 8) { \
838 goto error; \
839 } \
840 } else if (trailing[i] == '@') { \
841 break; \
842 } else if (count > 8) { \
843 goto error; \
844 } else { \
845 count++; \
846 } \
847 } \
848 }
849
850static int32_t
851_uloc_addLikelySubtags(const char* localeID,
852 char* maximizedLocaleID,
853 int32_t maximizedLocaleIDCapacity,
854 UErrorCode* err)
855{
856 char lang[ULOC_LANG_CAPACITY];
857 int32_t langLength = sizeof(lang);
858 char script[ULOC_SCRIPT_CAPACITY];
859 int32_t scriptLength = sizeof(script);
860 char region[ULOC_COUNTRY_CAPACITY];
861 int32_t regionLength = sizeof(region);
862 const char* trailing = "";
863 int32_t trailingLength = 0;
864 int32_t trailingIndex = 0;
865 int32_t resultLength = 0;
866
867 if(U_FAILURE(*err)) {
868 goto error;
869 }
870 else if (localeID == NULL ||
871 maximizedLocaleID == NULL ||
872 maximizedLocaleIDCapacity <= 0) {
873 goto error;
874 }
875
876 trailingIndex = parseTagString(
877 localeID,
878 lang,
879 &langLength,
880 script,
881 &scriptLength,
882 region,
883 &regionLength,
884 err);
885 if(U_FAILURE(*err)) {
886 /* Overflow indicates an illegal argument error */
887 if (*err == U_BUFFER_OVERFLOW_ERROR) {
888 *err = U_ILLEGAL_ARGUMENT_ERROR;
889 }
890
891 goto error;
892 }
893
894 /* Find the length of the trailing portion. */
895 while (_isIDSeparator(localeID[trailingIndex])) {
896 trailingIndex++;
897 }
898 trailing = &localeID[trailingIndex];
899 trailingLength = (int32_t)uprv_strlen(trailing);
900
901 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
902
903 resultLength =
904 createLikelySubtagsString(
905 lang,
906 langLength,
907 script,
908 scriptLength,
909 region,
910 regionLength,
911 trailing,
912 trailingLength,
913 maximizedLocaleID,
914 maximizedLocaleIDCapacity,
915 err);
916
917 if (resultLength == 0) {
918 const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
919
920 /*
921 * If we get here, we need to return localeID.
922 */
923 uprv_memcpy(
924 maximizedLocaleID,
925 localeID,
926 localIDLength <= maximizedLocaleIDCapacity ?
927 localIDLength : maximizedLocaleIDCapacity);
928
929 resultLength =
930 u_terminateChars(
931 maximizedLocaleID,
932 maximizedLocaleIDCapacity,
933 localIDLength,
934 err);
935 }
936
937 return resultLength;
938
939error:
940
941 if (!U_FAILURE(*err)) {
942 *err = U_ILLEGAL_ARGUMENT_ERROR;
943 }
944
945 return -1;
946}
947
948static int32_t
949_uloc_minimizeSubtags(const char* localeID,
950 char* minimizedLocaleID,
951 int32_t minimizedLocaleIDCapacity,
952 UErrorCode* err)
953{
954 /**
955 * ULOC_FULLNAME_CAPACITY will provide enough capacity
956 * that we can build a string that contains the language,
957 * script and region code without worrying about overrunning
958 * the user-supplied buffer.
959 **/
960 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
961 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
962
963 char lang[ULOC_LANG_CAPACITY];
964 int32_t langLength = sizeof(lang);
965 char script[ULOC_SCRIPT_CAPACITY];
966 int32_t scriptLength = sizeof(script);
967 char region[ULOC_COUNTRY_CAPACITY];
968 int32_t regionLength = sizeof(region);
969 const char* trailing = "";
970 int32_t trailingLength = 0;
971 int32_t trailingIndex = 0;
972
973 if(U_FAILURE(*err)) {
974 goto error;
975 }
976 else if (localeID == NULL ||
977 minimizedLocaleID == NULL ||
978 minimizedLocaleIDCapacity <= 0) {
979 goto error;
980 }
981
982 trailingIndex =
983 parseTagString(
984 localeID,
985 lang,
986 &langLength,
987 script,
988 &scriptLength,
989 region,
990 &regionLength,
991 err);
992 if(U_FAILURE(*err)) {
993
994 /* Overflow indicates an illegal argument error */
995 if (*err == U_BUFFER_OVERFLOW_ERROR) {
996 *err = U_ILLEGAL_ARGUMENT_ERROR;
997 }
998
999 goto error;
1000 }
1001
1002 /* Find the spot where the variants or the keywords begin, if any. */
1003 while (_isIDSeparator(localeID[trailingIndex])) {
1004 trailingIndex++;
1005 }
1006 trailing = &localeID[trailingIndex];
1007 trailingLength = (int32_t)uprv_strlen(trailing);
1008
1009 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1010
1011 createTagString(
1012 lang,
1013 langLength,
1014 script,
1015 scriptLength,
1016 region,
1017 regionLength,
1018 NULL,
1019 0,
1020 maximizedTagBuffer,
1021 maximizedTagBufferLength,
1022 err);
1023 if(U_FAILURE(*err)) {
1024 goto error;
1025 }
1026
1027 /**
1028 * First, we need to first get the maximization
1029 * from AddLikelySubtags.
1030 **/
1031 maximizedTagBufferLength =
1032 uloc_addLikelySubtags(
1033 maximizedTagBuffer,
1034 maximizedTagBuffer,
1035 maximizedTagBufferLength,
1036 err);
1037
1038 if(U_FAILURE(*err)) {
1039 goto error;
1040 }
1041
1042 /**
1043 * Start first with just the language.
1044 **/
1045 {
1046 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1047
1048 const int32_t tagBufferLength =
1049 createLikelySubtagsString(
1050 lang,
1051 langLength,
1052 NULL,
1053 0,
1054 NULL,
1055 0,
1056 NULL,
1057 0,
1058 tagBuffer,
1059 sizeof(tagBuffer),
1060 err);
1061
1062 if(U_FAILURE(*err)) {
1063 goto error;
1064 }
1065 else if (uprv_strnicmp(
1066 maximizedTagBuffer,
1067 tagBuffer,
1068 tagBufferLength) == 0) {
1069
1070 return createTagString(
1071 lang,
1072 langLength,
1073 NULL,
1074 0,
1075 NULL,
1076 0,
1077 trailing,
1078 trailingLength,
1079 minimizedLocaleID,
1080 minimizedLocaleIDCapacity,
1081 err);
1082 }
1083 }
1084
1085 /**
1086 * Next, try the language and region.
1087 **/
1088 if (regionLength > 0) {
1089
1090 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1091
1092 const int32_t tagBufferLength =
1093 createLikelySubtagsString(
1094 lang,
1095 langLength,
1096 NULL,
1097 0,
1098 region,
1099 regionLength,
1100 NULL,
1101 0,
1102 tagBuffer,
1103 sizeof(tagBuffer),
1104 err);
1105
1106 if(U_FAILURE(*err)) {
1107 goto error;
1108 }
1109 else if (uprv_strnicmp(
1110 maximizedTagBuffer,
1111 tagBuffer,
1112 tagBufferLength) == 0) {
1113
1114 return createTagString(
1115 lang,
1116 langLength,
1117 NULL,
1118 0,
1119 region,
1120 regionLength,
1121 trailing,
1122 trailingLength,
1123 minimizedLocaleID,
1124 minimizedLocaleIDCapacity,
1125 err);
1126 }
1127 }
1128
1129 /**
1130 * Finally, try the language and script. This is our last chance,
1131 * since trying with all three subtags would only yield the
1132 * maximal version that we already have.
1133 **/
1134 if (scriptLength > 0 && regionLength > 0) {
1135 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1136
1137 const int32_t tagBufferLength =
1138 createLikelySubtagsString(
1139 lang,
1140 langLength,
1141 script,
1142 scriptLength,
1143 NULL,
1144 0,
1145 NULL,
1146 0,
1147 tagBuffer,
1148 sizeof(tagBuffer),
1149 err);
1150
1151 if(U_FAILURE(*err)) {
1152 goto error;
1153 }
1154 else if (uprv_strnicmp(
1155 maximizedTagBuffer,
1156 tagBuffer,
1157 tagBufferLength) == 0) {
1158
1159 return createTagString(
1160 lang,
1161 langLength,
1162 script,
1163 scriptLength,
1164 NULL,
1165 0,
1166 trailing,
1167 trailingLength,
1168 minimizedLocaleID,
1169 minimizedLocaleIDCapacity,
1170 err);
1171 }
1172 }
1173
1174 {
1175 /**
1176 * If we got here, return the locale ID parameter.
1177 **/
1178 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1179
1180 uprv_memcpy(
1181 minimizedLocaleID,
1182 localeID,
1183 localeIDLength <= minimizedLocaleIDCapacity ?
1184 localeIDLength : minimizedLocaleIDCapacity);
1185
1186 return u_terminateChars(
1187 minimizedLocaleID,
1188 minimizedLocaleIDCapacity,
1189 localeIDLength,
1190 err);
1191 }
1192
1193error:
1194
1195 if (!U_FAILURE(*err)) {
1196 *err = U_ILLEGAL_ARGUMENT_ERROR;
1197 }
1198
1199 return -1;
1200
1201
1202}
1203
1204static UBool
1205do_canonicalize(const char* localeID,
1206 char* buffer,
1207 int32_t bufferCapacity,
1208 UErrorCode* err)
1209{
1210 uloc_canonicalize(
1211 localeID,
1212 buffer,
1213 bufferCapacity,
1214 err);
1215
1216 if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1217 *err == U_BUFFER_OVERFLOW_ERROR) {
1218 *err = U_ILLEGAL_ARGUMENT_ERROR;
1219
1220 return FALSE;
1221 }
1222 else if (U_FAILURE(*err)) {
1223
1224 return FALSE;
1225 }
1226 else {
1227 return TRUE;
1228 }
1229}
1230
1231U_CAPI int32_t U_EXPORT2
1232uloc_addLikelySubtags(const char* localeID,
1233 char* maximizedLocaleID,
1234 int32_t maximizedLocaleIDCapacity,
1235 UErrorCode* err)
1236{
1237 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1238
1239 if (!do_canonicalize(
1240 localeID,
1241 localeBuffer,
1242 sizeof(localeBuffer),
1243 err)) {
1244 return -1;
1245 }
1246 else {
1247 return _uloc_addLikelySubtags(
1248 localeBuffer,
1249 maximizedLocaleID,
1250 maximizedLocaleIDCapacity,
1251 err);
1252 }
1253}
1254
1255U_CAPI int32_t U_EXPORT2
1256uloc_minimizeSubtags(const char* localeID,
1257 char* minimizedLocaleID,
1258 int32_t minimizedLocaleIDCapacity,
1259 UErrorCode* err)
1260{
1261 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1262
1263 if (!do_canonicalize(
1264 localeID,
1265 localeBuffer,
1266 sizeof(localeBuffer),
1267 err)) {
1268 return -1;
1269 }
1270 else {
1271 return _uloc_minimizeSubtags(
1272 localeBuffer,
1273 minimizedLocaleID,
1274 minimizedLocaleIDCapacity,
1275 err);
1276 }
1277}
1278
1279// Pairs of (language subtag, + or -) for finding out fast if common languages
1280// are LTR (minus) or RTL (plus).
1281static const char* LANG_DIR_STRING =
1282 "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1283
1284// Implemented here because this calls uloc_addLikelySubtags().
1285U_CAPI UBool U_EXPORT2
1286uloc_isRightToLeft(const char *locale) {
1287 UErrorCode errorCode = U_ZERO_ERROR;
1288 char script[8];
1289 int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1290 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1291 scriptLength == 0) {
1292 // Fastpath: We know the likely scripts and their writing direction
1293 // for some common languages.
1294 errorCode = U_ZERO_ERROR;
1295 char lang[8];
1296 int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1297 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1298 langLength == 0) {
1299 return FALSE;
1300 }
1301 const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1302 if (langPtr != NULL) {
1303 switch (langPtr[langLength]) {
1304 case '-': return FALSE;
1305 case '+': return TRUE;
1306 default: break; // partial match of a longer code
1307 }
1308 }
1309 // Otherwise, find the likely script.
1310 errorCode = U_ZERO_ERROR;
1311 char likely[ULOC_FULLNAME_CAPACITY];
1312 (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
1313 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1314 return FALSE;
1315 }
1316 scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
1317 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1318 scriptLength == 0) {
1319 return FALSE;
1320 }
1321 }
1322 UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1323 return uscript_isRightToLeft(scriptCode);
1324}
1325
1326U_NAMESPACE_BEGIN
1327
1328UBool
1329Locale::isRightToLeft() const {
1330 return uloc_isRightToLeft(getBaseName());
1331}
1332
1333// The following must at least allow for rg key value (6) plus terminator (1).
1334#define ULOC_RG_BUFLEN 8
1335
1336U_CAPI int32_t U_EXPORT2
1337ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1338 char *region, int32_t regionCapacity, UErrorCode* status) {
1339 if (U_FAILURE(*status)) {
1340 return 0;
1341 }
1342 char rgBuf[ULOC_RG_BUFLEN];
1343 UErrorCode rgStatus = U_ZERO_ERROR;
1344
1345 // First check for rg keyword value
1346 int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1347 if (U_FAILURE(rgStatus) || rgLen != 6) {
1348 rgLen = 0;
1349 } else {
1350 // rgBuf guaranteed to be zero terminated here, with text len 6
1351 char *rgPtr = rgBuf;
1352 for (; *rgPtr!= 0; rgPtr++) {
1353 *rgPtr = uprv_toupper(*rgPtr);
1354 }
1355 rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1356 }
1357
1358 if (rgLen == 0) {
1359 // No valid rg keyword value, try for unicode_region_subtag
1360 rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1361 if (U_FAILURE(*status)) {
1362 rgLen = 0;
1363 } else if (rgLen == 0 && inferRegion) {
1364 // no unicode_region_subtag but inferRegion TRUE, try likely subtags
1365 char locBuf[ULOC_FULLNAME_CAPACITY];
1366 rgStatus = U_ZERO_ERROR;
1367 (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus);
1368 if (U_SUCCESS(rgStatus)) {
1369 rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status);
1370 if (U_FAILURE(*status)) {
1371 rgLen = 0;
1372 }
1373 }
1374 }
1375 }
1376
1377 rgBuf[rgLen] = 0;
1378 uprv_strncpy(region, rgBuf, regionCapacity);
1379 return u_terminateChars(region, regionCapacity, rgLen, status);
1380}
1381
1382U_NAMESPACE_END