]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/loclikely.cpp
ICU-62108.0.1.tar.gz
[apple/icu.git] / icuSources / common / loclikely.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
729e4ab9
A
3/*
4*******************************************************************************
5*
2ca993e8 6* Copyright (C) 1997-2016, International Business Machines
729e4ab9
A
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: loclikely.cpp
f3c0d7a5 11* encoding: UTF-8
729e4ab9
A
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2010feb25
16* created by: Markus W. Scherer
17*
18* Code for likely and minimized locale subtags, separated out from other .cpp files
19* that then do not depend on resource bundle code and likely-subtags data.
20*/
21
22#include "unicode/utypes.h"
b331163b 23#include "unicode/locid.h"
729e4ab9 24#include "unicode/putil.h"
f3c0d7a5 25#include "unicode/uchar.h"
729e4ab9
A
26#include "unicode/uloc.h"
27#include "unicode/ures.h"
b331163b 28#include "unicode/uscript.h"
729e4ab9
A
29#include "cmemory.h"
30#include "cstring.h"
31#include "ulocimp.h"
32#include "ustr_imp.h"
33
34/**
35 * This function looks for the localeID in the likelySubtags resource.
36 *
37 * @param localeID The tag to find.
38 * @param buffer A buffer to hold the matching entry
39 * @param bufferLength The length of the output buffer
40 * @return A pointer to "buffer" if found, or a null pointer if not.
41 */
42static const char* U_CALLCONV
43findLikelySubtags(const char* localeID,
44 char* buffer,
45 int32_t bufferLength,
46 UErrorCode* err) {
47 const char* result = NULL;
48
49 if (!U_FAILURE(*err)) {
50 int32_t resLen = 0;
51 const UChar* s = NULL;
52 UErrorCode tmpErr = U_ZERO_ERROR;
53 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
54 if (U_SUCCESS(tmpErr)) {
55 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
56
57 if (U_FAILURE(tmpErr)) {
58 /*
59 * If a resource is missing, it's not really an error, it's
60 * just that we don't have any data for that particular locale ID.
61 */
62 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
63 *err = tmpErr;
64 }
65 }
66 else if (resLen >= bufferLength) {
67 /* The buffer should never overflow. */
68 *err = U_INTERNAL_PROGRAM_ERROR;
69 }
70 else {
71 u_UCharsToChars(s, buffer, resLen + 1);
72 result = buffer;
73 }
74
75 ures_close(subtags);
76 } else {
77 *err = tmpErr;
78 }
79 }
80
81 return result;
82}
83
84/**
85 * Append a tag to a buffer, adding the separator if necessary. The buffer
86 * must be large enough to contain the resulting tag plus any separator
87 * necessary. The tag must not be a zero-length string.
88 *
89 * @param tag The tag to add.
90 * @param tagLength The length of the tag.
91 * @param buffer The output buffer.
92 * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
93 **/
94static void U_CALLCONV
95appendTag(
96 const char* tag,
97 int32_t tagLength,
98 char* buffer,
99 int32_t* bufferLength) {
100
101 if (*bufferLength > 0) {
102 buffer[*bufferLength] = '_';
103 ++(*bufferLength);
104 }
105
106 uprv_memmove(
107 &buffer[*bufferLength],
108 tag,
109 tagLength);
110
111 *bufferLength += tagLength;
112}
113
114/**
115 * These are the canonical strings for unknown languages, scripts and regions.
116 **/
117static const char* const unknownLanguage = "und";
118static const char* const unknownScript = "Zzzz";
119static const char* const unknownRegion = "ZZ";
120
121/**
122 * Create a tag string from the supplied parameters. The lang, script and region
123 * parameters may be NULL pointers. If they are, their corresponding length parameters
124 * must be less than or equal to 0.
125 *
126 * If any of the language, script or region parameters are empty, and the alternateTags
127 * parameter is not NULL, it will be parsed for potential language, script and region tags
128 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
129 * it contains no language tag, the default tag for the unknown language is used.
130 *
131 * If the length of the new string exceeds the capacity of the output buffer,
132 * the function copies as many bytes to the output buffer as it can, and returns
133 * the error U_BUFFER_OVERFLOW_ERROR.
134 *
135 * If an illegal argument is provided, the function returns the error
136 * U_ILLEGAL_ARGUMENT_ERROR.
137 *
138 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
139 * the tag string fits in the output buffer, but the null terminator doesn't.
140 *
141 * @param lang The language tag to use.
142 * @param langLength The length of the language tag.
143 * @param script The script tag to use.
144 * @param scriptLength The length of the script tag.
145 * @param region The region tag to use.
146 * @param regionLength The length of the region tag.
147 * @param trailing Any trailing data to append to the new tag.
148 * @param trailingLength The length of the trailing data.
149 * @param alternateTags A string containing any alternate tags.
150 * @param tag The output buffer.
151 * @param tagCapacity The capacity of the output buffer.
152 * @param err A pointer to a UErrorCode for error reporting.
153 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
154 **/
155static int32_t U_CALLCONV
156createTagStringWithAlternates(
157 const char* lang,
158 int32_t langLength,
159 const char* script,
160 int32_t scriptLength,
161 const char* region,
162 int32_t regionLength,
163 const char* trailing,
164 int32_t trailingLength,
165 const char* alternateTags,
166 char* tag,
167 int32_t tagCapacity,
168 UErrorCode* err) {
169
170 if (U_FAILURE(*err)) {
171 goto error;
172 }
173 else if (tag == NULL ||
174 tagCapacity <= 0 ||
175 langLength >= ULOC_LANG_CAPACITY ||
176 scriptLength >= ULOC_SCRIPT_CAPACITY ||
177 regionLength >= ULOC_COUNTRY_CAPACITY) {
178 goto error;
179 }
180 else {
181 /**
182 * ULOC_FULLNAME_CAPACITY will provide enough capacity
183 * that we can build a string that contains the language,
184 * script and region code without worrying about overrunning
185 * the user-supplied buffer.
186 **/
187 char tagBuffer[ULOC_FULLNAME_CAPACITY];
188 int32_t tagLength = 0;
189 int32_t capacityRemaining = tagCapacity;
190 UBool regionAppended = FALSE;
191
192 if (langLength > 0) {
193 appendTag(
194 lang,
195 langLength,
196 tagBuffer,
197 &tagLength);
198 }
199 else if (alternateTags == NULL) {
200 /*
201 * Append the value for an unknown language, if
202 * we found no language.
203 */
204 appendTag(
205 unknownLanguage,
206 (int32_t)uprv_strlen(unknownLanguage),
207 tagBuffer,
208 &tagLength);
209 }
210 else {
211 /*
212 * Parse the alternateTags string for the language.
213 */
214 char alternateLang[ULOC_LANG_CAPACITY];
215 int32_t alternateLangLength = sizeof(alternateLang);
216
217 alternateLangLength =
218 uloc_getLanguage(
219 alternateTags,
220 alternateLang,
221 alternateLangLength,
222 err);
223 if(U_FAILURE(*err) ||
224 alternateLangLength >= ULOC_LANG_CAPACITY) {
225 goto error;
226 }
227 else if (alternateLangLength == 0) {
228 /*
229 * Append the value for an unknown language, if
230 * we found no language.
231 */
232 appendTag(
233 unknownLanguage,
234 (int32_t)uprv_strlen(unknownLanguage),
235 tagBuffer,
236 &tagLength);
237 }
238 else {
239 appendTag(
240 alternateLang,
241 alternateLangLength,
242 tagBuffer,
243 &tagLength);
244 }
245 }
246
247 if (scriptLength > 0) {
248 appendTag(
249 script,
250 scriptLength,
251 tagBuffer,
252 &tagLength);
253 }
254 else if (alternateTags != NULL) {
255 /*
256 * Parse the alternateTags string for the script.
257 */
258 char alternateScript[ULOC_SCRIPT_CAPACITY];
259
260 const int32_t alternateScriptLength =
261 uloc_getScript(
262 alternateTags,
263 alternateScript,
264 sizeof(alternateScript),
265 err);
266
267 if (U_FAILURE(*err) ||
268 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
269 goto error;
270 }
271 else if (alternateScriptLength > 0) {
272 appendTag(
273 alternateScript,
274 alternateScriptLength,
275 tagBuffer,
276 &tagLength);
277 }
278 }
279
280 if (regionLength > 0) {
281 appendTag(
282 region,
283 regionLength,
284 tagBuffer,
285 &tagLength);
286
287 regionAppended = TRUE;
288 }
289 else if (alternateTags != NULL) {
290 /*
291 * Parse the alternateTags string for the region.
292 */
293 char alternateRegion[ULOC_COUNTRY_CAPACITY];
294
295 const int32_t alternateRegionLength =
296 uloc_getCountry(
297 alternateTags,
298 alternateRegion,
299 sizeof(alternateRegion),
300 err);
301 if (U_FAILURE(*err) ||
302 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
303 goto error;
304 }
305 else if (alternateRegionLength > 0) {
306 appendTag(
307 alternateRegion,
308 alternateRegionLength,
309 tagBuffer,
310 &tagLength);
311
312 regionAppended = TRUE;
313 }
314 }
315
316 {
317 const int32_t toCopy =
318 tagLength >= tagCapacity ? tagCapacity : tagLength;
319
320 /**
321 * Copy the partial tag from our internal buffer to the supplied
322 * target.
323 **/
324 uprv_memcpy(
325 tag,
326 tagBuffer,
327 toCopy);
328
329 capacityRemaining -= toCopy;
330 }
331
332 if (trailingLength > 0) {
333 if (*trailing != '@' && capacityRemaining > 0) {
334 tag[tagLength++] = '_';
335 --capacityRemaining;
336 if (capacityRemaining > 0 && !regionAppended) {
337 /* extra separator is required */
338 tag[tagLength++] = '_';
339 --capacityRemaining;
340 }
341 }
342
343 if (capacityRemaining > 0) {
344 /*
345 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we
346 * don't know if the user-supplied buffers overlap.
347 */
348 const int32_t toCopy =
349 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
350
351 uprv_memmove(
352 &tag[tagLength],
353 trailing,
354 toCopy);
355 }
356 }
357
358 tagLength += trailingLength;
359
360 return u_terminateChars(
361 tag,
362 tagCapacity,
363 tagLength,
364 err);
365 }
366
367error:
368
369 /**
370 * An overflow indicates the locale ID passed in
371 * is ill-formed. If we got here, and there was
372 * no previous error, it's an implicit overflow.
373 **/
374 if (*err == U_BUFFER_OVERFLOW_ERROR ||
375 U_SUCCESS(*err)) {
376 *err = U_ILLEGAL_ARGUMENT_ERROR;
377 }
378
379 return -1;
380}
381
382/**
383 * Create a tag string from the supplied parameters. The lang, script and region
384 * parameters may be NULL pointers. If they are, their corresponding length parameters
385 * must be less than or equal to 0. If the lang parameter is an empty string, the
386 * default value for an unknown language is written to the output buffer.
387 *
388 * If the length of the new string exceeds the capacity of the output buffer,
389 * the function copies as many bytes to the output buffer as it can, and returns
390 * the error U_BUFFER_OVERFLOW_ERROR.
391 *
392 * If an illegal argument is provided, the function returns the error
393 * U_ILLEGAL_ARGUMENT_ERROR.
394 *
395 * @param lang The language tag to use.
396 * @param langLength The length of the language tag.
397 * @param script The script tag to use.
398 * @param scriptLength The length of the script tag.
399 * @param region The region tag to use.
400 * @param regionLength The length of the region tag.
401 * @param trailing Any trailing data to append to the new tag.
402 * @param trailingLength The length of the trailing data.
403 * @param tag The output buffer.
404 * @param tagCapacity The capacity of the output buffer.
405 * @param err A pointer to a UErrorCode for error reporting.
406 * @return The length of the tag string, which may be greater than tagCapacity.
407 **/
408static int32_t U_CALLCONV
409createTagString(
410 const char* lang,
411 int32_t langLength,
412 const char* script,
413 int32_t scriptLength,
414 const char* region,
415 int32_t regionLength,
416 const char* trailing,
417 int32_t trailingLength,
418 char* tag,
419 int32_t tagCapacity,
420 UErrorCode* err)
421{
422 return createTagStringWithAlternates(
423 lang,
424 langLength,
425 script,
426 scriptLength,
427 region,
428 regionLength,
429 trailing,
430 trailingLength,
431 NULL,
432 tag,
433 tagCapacity,
434 err);
435}
436
437/**
438 * Parse the language, script, and region subtags from a tag string, and copy the
439 * results into the corresponding output parameters. The buffers are null-terminated,
440 * unless overflow occurs.
441 *
442 * The langLength, scriptLength, and regionLength parameters are input/output
443 * parameters, and must contain the capacity of their corresponding buffers on
444 * input. On output, they will contain the actual length of the buffers, not
445 * including the null terminator.
446 *
447 * If the length of any of the output subtags exceeds the capacity of the corresponding
448 * buffer, the function copies as many bytes to the output buffer as it can, and returns
449 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
450 * occurs.
451 *
452 * If an illegal argument is provided, the function returns the error
453 * U_ILLEGAL_ARGUMENT_ERROR.
454 *
455 * @param localeID The locale ID to parse.
456 * @param lang The language tag buffer.
457 * @param langLength The length of the language tag.
458 * @param script The script tag buffer.
459 * @param scriptLength The length of the script tag.
460 * @param region The region tag buffer.
461 * @param regionLength The length of the region tag.
462 * @param err A pointer to a UErrorCode for error reporting.
463 * @return The number of chars of the localeID parameter consumed.
464 **/
465static int32_t U_CALLCONV
466parseTagString(
467 const char* localeID,
468 char* lang,
469 int32_t* langLength,
470 char* script,
471 int32_t* scriptLength,
472 char* region,
473 int32_t* regionLength,
474 UErrorCode* err)
475{
476 const char* position = localeID;
477 int32_t subtagLength = 0;
478
479 if(U_FAILURE(*err) ||
480 localeID == NULL ||
481 lang == NULL ||
482 langLength == NULL ||
483 script == NULL ||
484 scriptLength == NULL ||
485 region == NULL ||
486 regionLength == NULL) {
487 goto error;
488 }
489
490 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
491 u_terminateChars(lang, *langLength, subtagLength, err);
492
493 /*
494 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
495 * to be an error, because it indicates the user-supplied tag is
496 * not well-formed.
497 */
498 if(U_FAILURE(*err)) {
499 goto error;
500 }
501
502 *langLength = subtagLength;
503
504 /*
505 * If no language was present, use the value of unknownLanguage
506 * instead. Otherwise, move past any separator.
507 */
508 if (*langLength == 0) {
509 uprv_strcpy(
510 lang,
511 unknownLanguage);
512 *langLength = (int32_t)uprv_strlen(lang);
513 }
0f5d89e8 514 if (_isIDSeparator(*position)) {
729e4ab9
A
515 ++position;
516 }
517
518 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
519 u_terminateChars(script, *scriptLength, subtagLength, err);
520
521 if(U_FAILURE(*err)) {
522 goto error;
523 }
524
525 *scriptLength = subtagLength;
526
527 if (*scriptLength > 0) {
528 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
529 /**
530 * If the script part is the "unknown" script, then don't return it.
531 **/
532 *scriptLength = 0;
533 }
534
535 /*
536 * Move past any separator.
537 */
538 if (_isIDSeparator(*position)) {
539 ++position;
540 }
541 }
542
543 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
544 u_terminateChars(region, *regionLength, subtagLength, err);
545
546 if(U_FAILURE(*err)) {
547 goto error;
548 }
549
550 *regionLength = subtagLength;
551
552 if (*regionLength > 0) {
553 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
554 /**
555 * If the region part is the "unknown" region, then don't return it.
556 **/
557 *regionLength = 0;
558 }
559 } else if (*position != 0 && *position != '@') {
560 /* back up over consumed trailing separator */
561 --position;
562 }
563
564exit:
565
566 return (int32_t)(position - localeID);
567
568error:
569
570 /**
571 * If we get here, we have no explicit error, it's the result of an
572 * illegal argument.
573 **/
574 if (!U_FAILURE(*err)) {
575 *err = U_ILLEGAL_ARGUMENT_ERROR;
576 }
577
578 goto exit;
579}
580
581static int32_t U_CALLCONV
582createLikelySubtagsString(
583 const char* lang,
584 int32_t langLength,
585 const char* script,
586 int32_t scriptLength,
587 const char* region,
588 int32_t regionLength,
589 const char* variants,
590 int32_t variantsLength,
591 char* tag,
592 int32_t tagCapacity,
593 UErrorCode* err)
594{
595 /**
596 * ULOC_FULLNAME_CAPACITY will provide enough capacity
597 * that we can build a string that contains the language,
598 * script and region code without worrying about overrunning
599 * the user-supplied buffer.
600 **/
601 char tagBuffer[ULOC_FULLNAME_CAPACITY];
602 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
729e4ab9
A
603
604 if(U_FAILURE(*err)) {
605 goto error;
606 }
607
608 /**
609 * Try the language with the script and region first.
610 **/
611 if (scriptLength > 0 && regionLength > 0) {
612
613 const char* likelySubtags = NULL;
614
4388f060 615 createTagString(
729e4ab9
A
616 lang,
617 langLength,
618 script,
619 scriptLength,
620 region,
621 regionLength,
622 NULL,
623 0,
624 tagBuffer,
625 sizeof(tagBuffer),
626 err);
627 if(U_FAILURE(*err)) {
628 goto error;
629 }
630
631 likelySubtags =
632 findLikelySubtags(
633 tagBuffer,
634 likelySubtagsBuffer,
635 sizeof(likelySubtagsBuffer),
636 err);
637 if(U_FAILURE(*err)) {
638 goto error;
639 }
640
641 if (likelySubtags != NULL) {
642 /* Always use the language tag from the
643 maximal string, since it may be more
644 specific than the one provided. */
645 return createTagStringWithAlternates(
646 NULL,
647 0,
648 NULL,
649 0,
650 NULL,
651 0,
652 variants,
653 variantsLength,
654 likelySubtags,
655 tag,
656 tagCapacity,
657 err);
658 }
659 }
660
661 /**
662 * Try the language with just the script.
663 **/
664 if (scriptLength > 0) {
665
666 const char* likelySubtags = NULL;
667
4388f060 668 createTagString(
729e4ab9
A
669 lang,
670 langLength,
671 script,
672 scriptLength,
673 NULL,
674 0,
675 NULL,
676 0,
677 tagBuffer,
678 sizeof(tagBuffer),
679 err);
680 if(U_FAILURE(*err)) {
681 goto error;
682 }
683
684 likelySubtags =
685 findLikelySubtags(
686 tagBuffer,
687 likelySubtagsBuffer,
688 sizeof(likelySubtagsBuffer),
689 err);
690 if(U_FAILURE(*err)) {
691 goto error;
692 }
693
694 if (likelySubtags != NULL) {
695 /* Always use the language tag from the
696 maximal string, since it may be more
697 specific than the one provided. */
698 return createTagStringWithAlternates(
699 NULL,
700 0,
701 NULL,
702 0,
703 region,
704 regionLength,
705 variants,
706 variantsLength,
707 likelySubtags,
708 tag,
709 tagCapacity,
710 err);
711 }
712 }
713
714 /**
715 * Try the language with just the region.
716 **/
717 if (regionLength > 0) {
718
719 const char* likelySubtags = NULL;
720
721 createTagString(
722 lang,
723 langLength,
724 NULL,
725 0,
726 region,
727 regionLength,
728 NULL,
729 0,
730 tagBuffer,
731 sizeof(tagBuffer),
732 err);
733 if(U_FAILURE(*err)) {
734 goto error;
735 }
736
737 likelySubtags =
738 findLikelySubtags(
739 tagBuffer,
740 likelySubtagsBuffer,
741 sizeof(likelySubtagsBuffer),
742 err);
743 if(U_FAILURE(*err)) {
744 goto error;
745 }
746
747 if (likelySubtags != NULL) {
748 /* Always use the language tag from the
749 maximal string, since it may be more
750 specific than the one provided. */
751 return createTagStringWithAlternates(
752 NULL,
753 0,
754 script,
755 scriptLength,
756 NULL,
757 0,
758 variants,
759 variantsLength,
760 likelySubtags,
761 tag,
762 tagCapacity,
763 err);
764 }
765 }
766
767 /**
768 * Finally, try just the language.
769 **/
770 {
771 const char* likelySubtags = NULL;
772
773 createTagString(
774 lang,
775 langLength,
776 NULL,
777 0,
778 NULL,
779 0,
780 NULL,
781 0,
782 tagBuffer,
783 sizeof(tagBuffer),
784 err);
785 if(U_FAILURE(*err)) {
786 goto error;
787 }
788
789 likelySubtags =
790 findLikelySubtags(
791 tagBuffer,
792 likelySubtagsBuffer,
793 sizeof(likelySubtagsBuffer),
794 err);
795 if(U_FAILURE(*err)) {
796 goto error;
797 }
798
799 if (likelySubtags != NULL) {
800 /* Always use the language tag from the
801 maximal string, since it may be more
802 specific than the one provided. */
803 return createTagStringWithAlternates(
804 NULL,
805 0,
806 script,
807 scriptLength,
808 region,
809 regionLength,
810 variants,
811 variantsLength,
812 likelySubtags,
813 tag,
814 tagCapacity,
815 err);
816 }
817 }
818
819 return u_terminateChars(
820 tag,
821 tagCapacity,
822 0,
823 err);
824
825error:
826
827 if (!U_FAILURE(*err)) {
828 *err = U_ILLEGAL_ARGUMENT_ERROR;
829 }
830
831 return -1;
832}
833
834#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
835 { int32_t count = 0; \
836 int32_t i; \
837 for (i = 0; i < trailingLength; i++) { \
838 if (trailing[i] == '-' || trailing[i] == '_') { \
839 count = 0; \
840 if (count > 8) { \
841 goto error; \
842 } \
843 } else if (trailing[i] == '@') { \
844 break; \
845 } else if (count > 8) { \
846 goto error; \
847 } else { \
848 count++; \
849 } \
850 } \
851 }
852
853static int32_t
854_uloc_addLikelySubtags(const char* localeID,
855 char* maximizedLocaleID,
856 int32_t maximizedLocaleIDCapacity,
857 UErrorCode* err)
858{
859 char lang[ULOC_LANG_CAPACITY];
860 int32_t langLength = sizeof(lang);
861 char script[ULOC_SCRIPT_CAPACITY];
862 int32_t scriptLength = sizeof(script);
863 char region[ULOC_COUNTRY_CAPACITY];
864 int32_t regionLength = sizeof(region);
865 const char* trailing = "";
866 int32_t trailingLength = 0;
867 int32_t trailingIndex = 0;
868 int32_t resultLength = 0;
869
870 if(U_FAILURE(*err)) {
871 goto error;
872 }
873 else if (localeID == NULL ||
874 maximizedLocaleID == NULL ||
875 maximizedLocaleIDCapacity <= 0) {
876 goto error;
877 }
878
879 trailingIndex = parseTagString(
880 localeID,
881 lang,
882 &langLength,
883 script,
884 &scriptLength,
885 region,
886 &regionLength,
887 err);
888 if(U_FAILURE(*err)) {
889 /* Overflow indicates an illegal argument error */
890 if (*err == U_BUFFER_OVERFLOW_ERROR) {
891 *err = U_ILLEGAL_ARGUMENT_ERROR;
892 }
893
894 goto error;
895 }
896
897 /* Find the length of the trailing portion. */
898 while (_isIDSeparator(localeID[trailingIndex])) {
899 trailingIndex++;
900 }
901 trailing = &localeID[trailingIndex];
902 trailingLength = (int32_t)uprv_strlen(trailing);
903
904 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
905
906 resultLength =
907 createLikelySubtagsString(
908 lang,
909 langLength,
910 script,
911 scriptLength,
912 region,
913 regionLength,
914 trailing,
915 trailingLength,
916 maximizedLocaleID,
917 maximizedLocaleIDCapacity,
918 err);
919
920 if (resultLength == 0) {
921 const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
922
923 /*
924 * If we get here, we need to return localeID.
925 */
926 uprv_memcpy(
927 maximizedLocaleID,
928 localeID,
929 localIDLength <= maximizedLocaleIDCapacity ?
930 localIDLength : maximizedLocaleIDCapacity);
931
932 resultLength =
933 u_terminateChars(
934 maximizedLocaleID,
935 maximizedLocaleIDCapacity,
936 localIDLength,
937 err);
938 }
939
940 return resultLength;
941
942error:
943
944 if (!U_FAILURE(*err)) {
945 *err = U_ILLEGAL_ARGUMENT_ERROR;
946 }
947
948 return -1;
949}
950
951static int32_t
952_uloc_minimizeSubtags(const char* localeID,
953 char* minimizedLocaleID,
954 int32_t minimizedLocaleIDCapacity,
955 UErrorCode* err)
956{
957 /**
958 * ULOC_FULLNAME_CAPACITY will provide enough capacity
959 * that we can build a string that contains the language,
960 * script and region code without worrying about overrunning
961 * the user-supplied buffer.
962 **/
963 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
964 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
965
966 char lang[ULOC_LANG_CAPACITY];
967 int32_t langLength = sizeof(lang);
968 char script[ULOC_SCRIPT_CAPACITY];
969 int32_t scriptLength = sizeof(script);
970 char region[ULOC_COUNTRY_CAPACITY];
971 int32_t regionLength = sizeof(region);
972 const char* trailing = "";
973 int32_t trailingLength = 0;
974 int32_t trailingIndex = 0;
975
976 if(U_FAILURE(*err)) {
977 goto error;
978 }
979 else if (localeID == NULL ||
980 minimizedLocaleID == NULL ||
981 minimizedLocaleIDCapacity <= 0) {
982 goto error;
983 }
984
985 trailingIndex =
986 parseTagString(
987 localeID,
988 lang,
989 &langLength,
990 script,
991 &scriptLength,
992 region,
993 &regionLength,
994 err);
995 if(U_FAILURE(*err)) {
996
997 /* Overflow indicates an illegal argument error */
998 if (*err == U_BUFFER_OVERFLOW_ERROR) {
999 *err = U_ILLEGAL_ARGUMENT_ERROR;
1000 }
1001
1002 goto error;
1003 }
1004
1005 /* Find the spot where the variants or the keywords begin, if any. */
1006 while (_isIDSeparator(localeID[trailingIndex])) {
1007 trailingIndex++;
1008 }
1009 trailing = &localeID[trailingIndex];
1010 trailingLength = (int32_t)uprv_strlen(trailing);
1011
1012 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1013
1014 createTagString(
1015 lang,
1016 langLength,
1017 script,
1018 scriptLength,
1019 region,
1020 regionLength,
1021 NULL,
1022 0,
1023 maximizedTagBuffer,
1024 maximizedTagBufferLength,
1025 err);
1026 if(U_FAILURE(*err)) {
1027 goto error;
1028 }
1029
1030 /**
1031 * First, we need to first get the maximization
1032 * from AddLikelySubtags.
1033 **/
1034 maximizedTagBufferLength =
1035 uloc_addLikelySubtags(
1036 maximizedTagBuffer,
1037 maximizedTagBuffer,
1038 maximizedTagBufferLength,
1039 err);
1040
1041 if(U_FAILURE(*err)) {
1042 goto error;
1043 }
1044
1045 /**
1046 * Start first with just the language.
1047 **/
1048 {
1049 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1050
1051 const int32_t tagBufferLength =
1052 createLikelySubtagsString(
1053 lang,
1054 langLength,
1055 NULL,
1056 0,
1057 NULL,
1058 0,
1059 NULL,
1060 0,
1061 tagBuffer,
1062 sizeof(tagBuffer),
1063 err);
1064
1065 if(U_FAILURE(*err)) {
1066 goto error;
1067 }
1068 else if (uprv_strnicmp(
1069 maximizedTagBuffer,
1070 tagBuffer,
1071 tagBufferLength) == 0) {
1072
1073 return createTagString(
1074 lang,
1075 langLength,
1076 NULL,
1077 0,
1078 NULL,
1079 0,
1080 trailing,
1081 trailingLength,
1082 minimizedLocaleID,
1083 minimizedLocaleIDCapacity,
1084 err);
1085 }
1086 }
1087
1088 /**
1089 * Next, try the language and region.
1090 **/
1091 if (regionLength > 0) {
1092
1093 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1094
1095 const int32_t tagBufferLength =
1096 createLikelySubtagsString(
1097 lang,
1098 langLength,
1099 NULL,
1100 0,
1101 region,
1102 regionLength,
1103 NULL,
1104 0,
1105 tagBuffer,
1106 sizeof(tagBuffer),
1107 err);
1108
1109 if(U_FAILURE(*err)) {
1110 goto error;
1111 }
1112 else if (uprv_strnicmp(
1113 maximizedTagBuffer,
1114 tagBuffer,
1115 tagBufferLength) == 0) {
1116
1117 return createTagString(
1118 lang,
1119 langLength,
1120 NULL,
1121 0,
1122 region,
1123 regionLength,
1124 trailing,
1125 trailingLength,
1126 minimizedLocaleID,
1127 minimizedLocaleIDCapacity,
1128 err);
1129 }
1130 }
1131
1132 /**
1133 * Finally, try the language and script. This is our last chance,
1134 * since trying with all three subtags would only yield the
1135 * maximal version that we already have.
1136 **/
1137 if (scriptLength > 0 && regionLength > 0) {
1138 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1139
1140 const int32_t tagBufferLength =
1141 createLikelySubtagsString(
1142 lang,
1143 langLength,
1144 script,
1145 scriptLength,
1146 NULL,
1147 0,
1148 NULL,
1149 0,
1150 tagBuffer,
1151 sizeof(tagBuffer),
1152 err);
1153
1154 if(U_FAILURE(*err)) {
1155 goto error;
1156 }
1157 else if (uprv_strnicmp(
1158 maximizedTagBuffer,
1159 tagBuffer,
1160 tagBufferLength) == 0) {
1161
1162 return createTagString(
1163 lang,
1164 langLength,
1165 script,
1166 scriptLength,
1167 NULL,
1168 0,
1169 trailing,
1170 trailingLength,
1171 minimizedLocaleID,
1172 minimizedLocaleIDCapacity,
1173 err);
1174 }
1175 }
1176
1177 {
1178 /**
1179 * If we got here, return the locale ID parameter.
1180 **/
1181 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1182
1183 uprv_memcpy(
1184 minimizedLocaleID,
1185 localeID,
1186 localeIDLength <= minimizedLocaleIDCapacity ?
1187 localeIDLength : minimizedLocaleIDCapacity);
1188
1189 return u_terminateChars(
1190 minimizedLocaleID,
1191 minimizedLocaleIDCapacity,
1192 localeIDLength,
1193 err);
1194 }
1195
1196error:
1197
1198 if (!U_FAILURE(*err)) {
1199 *err = U_ILLEGAL_ARGUMENT_ERROR;
1200 }
1201
1202 return -1;
1203
1204
1205}
1206
1207static UBool
1208do_canonicalize(const char* localeID,
1209 char* buffer,
1210 int32_t bufferCapacity,
1211 UErrorCode* err)
1212{
1213 uloc_canonicalize(
1214 localeID,
1215 buffer,
1216 bufferCapacity,
1217 err);
1218
1219 if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1220 *err == U_BUFFER_OVERFLOW_ERROR) {
1221 *err = U_ILLEGAL_ARGUMENT_ERROR;
1222
1223 return FALSE;
1224 }
1225 else if (U_FAILURE(*err)) {
1226
1227 return FALSE;
1228 }
1229 else {
1230 return TRUE;
1231 }
1232}
1233
51004dcb 1234U_CAPI int32_t U_EXPORT2
729e4ab9
A
1235uloc_addLikelySubtags(const char* localeID,
1236 char* maximizedLocaleID,
1237 int32_t maximizedLocaleIDCapacity,
1238 UErrorCode* err)
1239{
1240 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1241
1242 if (!do_canonicalize(
1243 localeID,
1244 localeBuffer,
1245 sizeof(localeBuffer),
1246 err)) {
1247 return -1;
1248 }
1249 else {
1250 return _uloc_addLikelySubtags(
1251 localeBuffer,
1252 maximizedLocaleID,
1253 maximizedLocaleIDCapacity,
1254 err);
1255 }
1256}
1257
51004dcb 1258U_CAPI int32_t U_EXPORT2
729e4ab9
A
1259uloc_minimizeSubtags(const char* localeID,
1260 char* minimizedLocaleID,
1261 int32_t minimizedLocaleIDCapacity,
1262 UErrorCode* err)
1263{
1264 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1265
1266 if (!do_canonicalize(
1267 localeID,
1268 localeBuffer,
1269 sizeof(localeBuffer),
1270 err)) {
1271 return -1;
1272 }
1273 else {
1274 return _uloc_minimizeSubtags(
1275 localeBuffer,
1276 minimizedLocaleID,
1277 minimizedLocaleIDCapacity,
1278 err);
1279 }
1280}
b331163b
A
1281
1282// Pairs of (language subtag, + or -) for finding out fast if common languages
1283// are LTR (minus) or RTL (plus).
0f5d89e8 1284static const char LANG_DIR_STRING[] =
b331163b
A
1285 "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1286
1287// Implemented here because this calls uloc_addLikelySubtags().
1288U_CAPI UBool U_EXPORT2
1289uloc_isRightToLeft(const char *locale) {
1290 UErrorCode errorCode = U_ZERO_ERROR;
1291 char script[8];
1292 int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1293 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1294 scriptLength == 0) {
1295 // Fastpath: We know the likely scripts and their writing direction
1296 // for some common languages.
1297 errorCode = U_ZERO_ERROR;
1298 char lang[8];
1299 int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1300 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1301 langLength == 0) {
1302 return FALSE;
1303 }
1304 const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1305 if (langPtr != NULL) {
1306 switch (langPtr[langLength]) {
1307 case '-': return FALSE;
1308 case '+': return TRUE;
1309 default: break; // partial match of a longer code
1310 }
1311 }
1312 // Otherwise, find the likely script.
1313 errorCode = U_ZERO_ERROR;
1314 char likely[ULOC_FULLNAME_CAPACITY];
1315 (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
1316 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1317 return FALSE;
1318 }
1319 scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
1320 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1321 scriptLength == 0) {
1322 return FALSE;
1323 }
1324 }
1325 UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1326 return uscript_isRightToLeft(scriptCode);
1327}
1328
1329U_NAMESPACE_BEGIN
1330
1331UBool
1332Locale::isRightToLeft() const {
1333 return uloc_isRightToLeft(getBaseName());
1334}
1335
f3c0d7a5
A
1336U_NAMESPACE_END
1337
2ca993e8
A
1338// The following must at least allow for rg key value (6) plus terminator (1).
1339#define ULOC_RG_BUFLEN 8
1340
1341U_CAPI int32_t U_EXPORT2
1342ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1343 char *region, int32_t regionCapacity, UErrorCode* status) {
1344 if (U_FAILURE(*status)) {
1345 return 0;
1346 }
1347 char rgBuf[ULOC_RG_BUFLEN];
1348 UErrorCode rgStatus = U_ZERO_ERROR;
1349
1350 // First check for rg keyword value
1351 int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1352 if (U_FAILURE(rgStatus) || rgLen != 6) {
1353 rgLen = 0;
1354 } else {
1355 // rgBuf guaranteed to be zero terminated here, with text len 6
1356 char *rgPtr = rgBuf;
1357 for (; *rgPtr!= 0; rgPtr++) {
1358 *rgPtr = uprv_toupper(*rgPtr);
1359 }
1360 rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1361 }
1362
1363 if (rgLen == 0) {
1364 // No valid rg keyword value, try for unicode_region_subtag
1365 rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1366 if (U_FAILURE(*status)) {
1367 rgLen = 0;
1368 } else if (rgLen == 0 && inferRegion) {
1369 // no unicode_region_subtag but inferRegion TRUE, try likely subtags
1370 char locBuf[ULOC_FULLNAME_CAPACITY];
1371 rgStatus = U_ZERO_ERROR;
1372 (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus);
1373 if (U_SUCCESS(rgStatus)) {
1374 rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status);
1375 if (U_FAILURE(*status)) {
1376 rgLen = 0;
1377 }
1378 }
1379 }
1380 }
1381
1382 rgBuf[rgLen] = 0;
1383 uprv_strncpy(region, rgBuf, regionCapacity);
1384 return u_terminateChars(region, regionCapacity, rgLen, status);
1385}
1386