2 *******************************************************************************
3 * Copyright (C) 1996-2008, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucol_res.cpp
8 * tab size: 8 (not used)
12 * This file contains dependencies that the collation run-time doesn't normally
13 * need. This mainly contains resource bundle usage and collation meta information
15 * Modification history
17 * 1996-1999 various members of ICU team maintained C API for collation framework
18 * 02/16/2001 synwee Added internal method getPrevSpecialCE
19 * 03/01/2001 synwee Added maxexpansion functionality.
20 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
21 * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp
24 #include "unicode/utypes.h"
26 #if !UCONFIG_NO_COLLATION
27 #include "unicode/uloc.h"
28 #include "unicode/coll.h"
29 #include "unicode/tblcoll.h"
30 #include "unicode/caniter.h"
31 #include "unicode/ustring.h"
49 // static UCA. There is only one. Collators don't use it.
50 // It is referenced only in ucol_initUCA and ucol_cleanup
51 static UCollator
* _staticUCA
= NULL
;
52 // static pointer to udata memory. Inited in ucol_initUCA
53 // used for cleanup in ucol_cleanup
54 static UDataMemory
* UCA_DATA_MEM
= NULL
;
57 static UBool U_CALLCONV
58 ucol_res_cleanup(void)
61 udata_close(UCA_DATA_MEM
);
65 ucol_close(_staticUCA
);
71 static UBool U_CALLCONV
72 isAcceptableUCA(void * /*context*/,
73 const char * /*type*/, const char * /*name*/,
74 const UDataInfo
*pInfo
){
75 /* context, type & name are intentionally not used */
76 if( pInfo
->size
>=20 &&
77 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
78 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
79 pInfo
->dataFormat
[0]==UCA_DATA_FORMAT_0
&& /* dataFormat="UCol" */
80 pInfo
->dataFormat
[1]==UCA_DATA_FORMAT_1
&&
81 pInfo
->dataFormat
[2]==UCA_DATA_FORMAT_2
&&
82 pInfo
->dataFormat
[3]==UCA_DATA_FORMAT_3
&&
83 pInfo
->formatVersion
[0]==UCA_FORMAT_VERSION_0
&&
84 pInfo
->formatVersion
[1]>=UCA_FORMAT_VERSION_1
// &&
85 //pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 &&
86 //pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh
87 //pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh
89 UVersionInfo UCDVersion
;
90 u_getUnicodeVersion(UCDVersion
);
91 return (UBool
)(pInfo
->dataVersion
[0]==UCDVersion
[0]
92 && pInfo
->dataVersion
[1]==UCDVersion
[1]);
93 //&& pInfo->dataVersion[2]==ucaDataInfo.dataVersion[2]
94 //&& pInfo->dataVersion[3]==ucaDataInfo.dataVersion[3]);
101 /* do not close UCA returned by ucol_initUCA! */
103 ucol_initUCA(UErrorCode
*status
) {
104 if(U_FAILURE(*status
)) {
108 UMTX_CHECK(NULL
, (_staticUCA
== NULL
), needsInit
);
111 UDataMemory
*result
= udata_openChoice(NULL
, UCA_DATA_TYPE
, UCA_DATA_NAME
, isAcceptableUCA
, NULL
, status
);
113 if(U_SUCCESS(*status
)){
114 UCollator
*newUCA
= ucol_initCollator((const UCATableHeader
*)udata_getMemory(result
), NULL
, NULL
, status
);
115 if(U_SUCCESS(*status
)){
117 if(_staticUCA
== NULL
) {
120 UCA_DATA_MEM
= result
;
125 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES
, ucol_res_cleanup
);
130 // Initalize variables for implicit generation
131 uprv_uca_initImplicitConstants(status
);
144 U_CAPI
void U_EXPORT2
151 /****************************************************************************/
152 /* Following are the open/close functions */
154 /****************************************************************************/
156 tryOpeningFromRules(UResourceBundle
*collElem
, UErrorCode
*status
) {
157 int32_t rulesLen
= 0;
158 const UChar
*rules
= ures_getStringByKey(collElem
, "Sequence", &rulesLen
, status
);
159 return ucol_openRules(rules
, rulesLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, status
);
166 ucol_open_internal(const char *loc
,
169 UErrorCode intStatus
= U_ZERO_ERROR
;
170 const UCollator
* UCA
= ucol_initUCA(status
);
173 if(U_FAILURE(*status
)) return 0;
177 UCollator
*result
= NULL
;
178 UResourceBundle
*b
= ures_open(U_ICUDATA_COLL
, loc
, status
);
180 /* we try to find stuff from keyword */
181 UResourceBundle
*collations
= ures_getByKey(b
, "collations", NULL
, status
);
182 UResourceBundle
*collElem
= NULL
;
184 // if there is a keyword, we pick it up and try to get elements
185 if(!uloc_getKeywordValue(loc
, "collation", keyBuffer
, 256, status
)) {
186 // no keyword. we try to find the default setting, which will give us the keyword value
187 intStatus
= U_ZERO_ERROR
;
188 // finding default value does not affect collation fallback status
189 UResourceBundle
*defaultColl
= ures_getByKeyWithFallback(collations
, "default", NULL
, &intStatus
);
190 if(U_SUCCESS(intStatus
)) {
191 int32_t defaultKeyLen
= 0;
192 const UChar
*defaultKey
= ures_getString(defaultColl
, &defaultKeyLen
, &intStatus
);
193 u_UCharsToChars(defaultKey
, keyBuffer
, defaultKeyLen
);
194 keyBuffer
[defaultKeyLen
] = 0;
196 *status
= U_INTERNAL_PROGRAM_ERROR
;
199 ures_close(defaultColl
);
201 collElem
= ures_getByKeyWithFallback(collations
, keyBuffer
, collations
, status
);
202 collations
= NULL
; // We just reused the collations object as collElem.
204 UResourceBundle
*binary
= NULL
;
206 if(*status
== U_MISSING_RESOURCE_ERROR
) { /* We didn't find the tailoring data, we fallback to the UCA */
207 *status
= U_USING_DEFAULT_WARNING
;
208 result
= ucol_initCollator(UCA
->image
, result
, UCA
, status
);
209 if (U_FAILURE(*status
)) {
212 // if we use UCA, real locale is root
214 b
= ures_open(U_ICUDATA_COLL
, "", status
);
215 ures_close(collElem
);
216 collElem
= ures_open(U_ICUDATA_COLL
, "", status
);
217 if(U_FAILURE(*status
)) {
220 result
->hasRealData
= FALSE
;
221 } else if(U_SUCCESS(*status
)) {
222 intStatus
= U_ZERO_ERROR
;
224 binary
= ures_getByKey(collElem
, "%%CollationBin", NULL
, &intStatus
);
226 if(intStatus
== U_MISSING_RESOURCE_ERROR
) { /* we didn't find the binary image, we should use the rules */
228 result
= tryOpeningFromRules(collElem
, status
);
229 if(U_FAILURE(*status
)) {
232 } else if(U_SUCCESS(*status
)) { /* otherwise, we'll pick a collation data that exists */
234 const uint8_t *inData
= ures_getBinary(binary
, &len
, status
);
235 UCATableHeader
*colData
= (UCATableHeader
*)inData
;
236 if(uprv_memcmp(colData
->UCAVersion
, UCA
->image
->UCAVersion
, sizeof(UVersionInfo
)) != 0 ||
237 uprv_memcmp(colData
->UCDVersion
, UCA
->image
->UCDVersion
, sizeof(UVersionInfo
)) != 0 ||
238 colData
->version
[0] != UCOL_BUILDER_VERSION
)
240 *status
= U_DIFFERENT_UCA_VERSION
;
241 result
= tryOpeningFromRules(collElem
, status
);
243 if(U_FAILURE(*status
)){
246 if((uint32_t)len
> (paddedsize(sizeof(UCATableHeader
)) + paddedsize(sizeof(UColOptionSet
)))) {
247 result
= ucol_initCollator((const UCATableHeader
*)inData
, result
, UCA
, status
);
248 if(U_FAILURE(*status
)){
251 result
->hasRealData
= TRUE
;
253 result
= ucol_initCollator(UCA
->image
, result
, UCA
, status
);
254 ucol_setOptionsFromHeader(result
, (UColOptionSet
*)(inData
+((const UCATableHeader
*)inData
)->options
), status
);
255 if(U_FAILURE(*status
)){
258 result
->hasRealData
= FALSE
;
260 result
->freeImageOnClose
= FALSE
;
263 intStatus
= U_ZERO_ERROR
;
264 result
->rules
= ures_getStringByKey(collElem
, "Sequence", &result
->rulesLength
, &intStatus
);
265 result
->freeRulesOnClose
= FALSE
;
266 } else { /* There is another error, and we're just gonna clean up */
270 intStatus
= U_ZERO_ERROR
;
271 result
->ucaRules
= ures_getStringByKey(b
,"UCARules",NULL
,&intStatus
);
274 loc
= ures_getLocale(b
, status
);
276 result
->requestedLocale
= uprv_strdup(loc
);
278 if (result
->requestedLocale
== NULL
) {
279 *status
= U_MEMORY_ALLOCATION_ERROR
;
282 loc
= ures_getLocale(collElem
, status
);
283 result
->actualLocale
= uprv_strdup(loc
);
285 if (result
->actualLocale
== NULL
) {
286 *status
= U_MEMORY_ALLOCATION_ERROR
;
289 loc
= ures_getLocale(b
, status
);
290 result
->validLocale
= uprv_strdup(loc
);
292 if (result
->validLocale
== NULL
) {
293 *status
= U_MEMORY_ALLOCATION_ERROR
;
298 ures_close(collElem
);
304 ures_close(collElem
);
311 ucol_open(const char *loc
,
316 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN
);
317 UTRACE_DATA1(UTRACE_INFO
, "locale = \"%s\"", loc
);
318 UCollator
*result
= NULL
;
321 #if !UCONFIG_NO_SERVICE
322 result
= Collator::createUCollator(loc
, status
);
326 result
= ucol_open_internal(loc
, status
);
328 UTRACE_EXIT_PTR_STATUS(result
, *status
);
332 U_CAPI UCollator
* U_EXPORT2
333 ucol_openRules( const UChar
*rules
,
335 UColAttributeValue normalizationMode
,
336 UCollationStrength strength
,
337 UParseError
*parseError
,
341 UColAttributeValue norm
;
344 if(status
== NULL
|| U_FAILURE(*status
)){
349 if (U_FAILURE(*status
)) {
353 if(rules
== NULL
|| rulesLength
< -1) {
354 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
358 if(rulesLength
== -1) {
359 rulesLength
= u_strlen(rules
);
362 if(parseError
== NULL
){
366 switch(normalizationMode
) {
370 norm
= normalizationMode
;
373 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
377 UCollator
*result
= NULL
;
378 UCATableHeader
*table
= NULL
;
379 UCollator
*UCA
= ucol_initUCA(status
);
381 if(U_FAILURE(*status
)){
385 ucol_tok_initTokenList(&src
, rules
, rulesLength
, UCA
, status
);
386 ucol_tok_assembleTokenList(&src
,parseError
, status
);
388 if(U_FAILURE(*status
)) {
389 /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */
390 /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */
391 /* so something might be done here... or on lower level */
393 if(*status
== U_ILLEGAL_ARGUMENT_ERROR
) {
394 fprintf(stderr
, "bad option starting at offset %i\n", src
.current
-src
.source
);
396 fprintf(stderr
, "invalid rule just before offset %i\n", src
.current
-src
.source
);
402 if(src
.resultLen
> 0 || src
.removeSet
!= NULL
) { /* we have a set of rules, let's make something of it */
403 /* also, if we wanted to remove some contractions, we should make a tailoring */
404 table
= ucol_assembleTailoringTable(&src
, status
);
405 if(U_SUCCESS(*status
)) {
407 table
->version
[0] = UCOL_BUILDER_VERSION
;
408 // no tailoring information on this level
409 table
->version
[1] = table
->version
[2] = table
->version
[3] = 0;
411 u_getUnicodeVersion(table
->UCDVersion
);
413 uprv_memcpy(table
->UCAVersion
, UCA
->image
->UCAVersion
, sizeof(UVersionInfo
));
414 result
= ucol_initCollator(table
, 0, UCA
, status
);
415 if (U_FAILURE(*status
)) {
418 result
->hasRealData
= TRUE
;
419 result
->freeImageOnClose
= TRUE
;
421 } else { /* no rules, but no error either */
422 // must be only options
423 // We will init the collator from UCA
424 result
= ucol_initCollator(UCA
->image
, 0, UCA
, status
);
425 // Check for null result
426 if (U_FAILURE(*status
)) {
429 // And set only the options
430 UColOptionSet
*opts
= (UColOptionSet
*)uprv_malloc(sizeof(UColOptionSet
));
433 *status
= U_MEMORY_ALLOCATION_ERROR
;
436 uprv_memcpy(opts
, src
.opts
, sizeof(UColOptionSet
));
437 ucol_setOptionsFromHeader(result
, opts
, status
);
438 result
->freeOptionsOnClose
= TRUE
;
439 result
->hasRealData
= FALSE
;
440 result
->freeImageOnClose
= FALSE
;
443 if(U_SUCCESS(*status
)) {
445 result
->dataVersion
[0] = UCOL_BUILDER_VERSION
;
446 if(rulesLength
> 0) {
447 newRules
= (UChar
*)uprv_malloc((rulesLength
+1)*U_SIZEOF_UCHAR
);
449 if (newRules
== NULL
) {
450 *status
= U_MEMORY_ALLOCATION_ERROR
;
453 uprv_memcpy(newRules
, rules
, rulesLength
*U_SIZEOF_UCHAR
);
454 newRules
[rulesLength
]=0;
455 result
->rules
= newRules
;
456 result
->rulesLength
= rulesLength
;
457 result
->freeRulesOnClose
= TRUE
;
459 result
->ucaRules
= NULL
;
460 result
->actualLocale
= NULL
;
461 result
->validLocale
= NULL
;
462 result
->requestedLocale
= NULL
;
463 ucol_setAttribute(result
, UCOL_STRENGTH
, strength
, status
);
464 ucol_setAttribute(result
, UCOL_NORMALIZATION_MODE
, norm
, status
);
477 ucol_tok_closeTokenList(&src
);
482 U_CAPI
int32_t U_EXPORT2
483 ucol_getRulesEx(const UCollator
*coll
, UColRuleOption delta
, UChar
*buffer
, int32_t bufferLen
) {
484 UErrorCode status
= U_ZERO_ERROR
;
487 const UChar
* ucaRules
= 0;
488 const UChar
*rules
= ucol_getRules(coll
, &len
);
489 if(delta
== UCOL_FULL_RULES
) {
490 /* take the UCA rules and append real rules at the end */
491 /* UCA rules will be probably coming from the root RB */
492 ucaRules
= coll
->ucaRules
;
494 UCAlen
= u_strlen(ucaRules
);
497 ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
498 UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
499 UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
500 ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
505 if(U_FAILURE(status
)) {
508 if(buffer
!=0 && bufferLen
>0){
511 u_memcpy(buffer
, ucaRules
, uprv_min(UCAlen
, bufferLen
));
513 if(len
> 0 && bufferLen
> UCAlen
) {
514 u_memcpy(buffer
+UCAlen
, rules
, uprv_min(len
, bufferLen
-UCAlen
));
517 return u_terminateUChars(buffer
, bufferLen
, len
+UCAlen
, &status
);
520 static const UChar _NUL
= 0;
522 U_CAPI
const UChar
* U_EXPORT2
523 ucol_getRules( const UCollator
*coll
,
526 if(coll
->rules
!= NULL
) {
527 *length
= coll
->rulesLength
;
536 U_CAPI UBool U_EXPORT2
537 ucol_equals(const UCollator
*source
, const UCollator
*target
) {
538 UErrorCode status
= U_ZERO_ERROR
;
539 // if pointers are equal, collators are equal
540 if(source
== target
) {
543 int32_t i
= 0, j
= 0;
544 // if any of attributes are different, collators are not equal
545 for(i
= 0; i
< UCOL_ATTRIBUTE_COUNT
; i
++) {
546 if(ucol_getAttribute(source
, (UColAttribute
)i
, &status
) != ucol_getAttribute(target
, (UColAttribute
)i
, &status
) || U_FAILURE(status
)) {
551 int32_t sourceRulesLen
= 0, targetRulesLen
= 0;
552 const UChar
*sourceRules
= ucol_getRules(source
, &sourceRulesLen
);
553 const UChar
*targetRules
= ucol_getRules(target
, &targetRulesLen
);
555 if(sourceRulesLen
== targetRulesLen
&& u_strncmp(sourceRules
, targetRules
, sourceRulesLen
) == 0) {
556 // all the attributes are equal and the rules are equal - collators are equal
559 // hard part, need to construct tree from rules and see if they yield the same tailoring
561 UParseError parseError
;
562 UColTokenParser sourceParser
, targetParser
;
563 int32_t sourceListLen
= 0, targetListLen
= 0;
564 ucol_tok_initTokenList(&sourceParser
, sourceRules
, sourceRulesLen
, source
->UCA
, &status
);
565 ucol_tok_initTokenList(&targetParser
, targetRules
, targetRulesLen
, target
->UCA
, &status
);
566 sourceListLen
= ucol_tok_assembleTokenList(&sourceParser
, &parseError
, &status
);
567 targetListLen
= ucol_tok_assembleTokenList(&targetParser
, &parseError
, &status
);
569 if(sourceListLen
!= targetListLen
) {
570 // different number of resets
573 UColToken
*sourceReset
= NULL
, *targetReset
= NULL
;
574 UChar
*sourceResetString
= NULL
, *targetResetString
= NULL
;
575 int32_t sourceStringLen
= 0, targetStringLen
= 0;
576 for(i
= 0; i
< sourceListLen
; i
++) {
577 sourceReset
= sourceParser
.lh
[i
].reset
;
578 sourceResetString
= sourceParser
.source
+(sourceReset
->source
& 0xFFFFFF);
579 sourceStringLen
= sourceReset
->source
>> 24;
580 for(j
= 0; j
< sourceListLen
; j
++) {
581 targetReset
= targetParser
.lh
[j
].reset
;
582 targetResetString
= targetParser
.source
+(targetReset
->source
& 0xFFFFFF);
583 targetStringLen
= targetReset
->source
>> 24;
584 if(sourceStringLen
== targetStringLen
&& (u_strncmp(sourceResetString
, targetResetString
, sourceStringLen
) == 0)) {
585 sourceReset
= sourceParser
.lh
[i
].first
;
586 targetReset
= targetParser
.lh
[j
].first
;
587 while(sourceReset
!= NULL
&& targetReset
!= NULL
) {
588 sourceResetString
= sourceParser
.source
+(sourceReset
->source
& 0xFFFFFF);
589 sourceStringLen
= sourceReset
->source
>> 24;
590 targetResetString
= targetParser
.source
+(targetReset
->source
& 0xFFFFFF);
591 targetStringLen
= targetReset
->source
>> 24;
592 if(sourceStringLen
!= targetStringLen
|| (u_strncmp(sourceResetString
, targetResetString
, sourceStringLen
) != 0)) {
596 // probably also need to check the expansions
597 if(sourceReset
->expansion
) {
598 if(!targetReset
->expansion
) {
602 // compare expansions
603 sourceResetString
= sourceParser
.source
+(sourceReset
->expansion
& 0xFFFFFF);
604 sourceStringLen
= sourceReset
->expansion
>> 24;
605 targetResetString
= targetParser
.source
+(targetReset
->expansion
& 0xFFFFFF);
606 targetStringLen
= targetReset
->expansion
>> 24;
607 if(sourceStringLen
!= targetStringLen
|| (u_strncmp(sourceResetString
, targetResetString
, sourceStringLen
) != 0)) {
613 if(targetReset
->expansion
) {
618 sourceReset
= sourceReset
->next
;
619 targetReset
= targetReset
->next
;
621 if(sourceReset
!= targetReset
) { // at least one is not NULL
622 // there are more tailored elements in one list
631 // couldn't find the reset anchor, so the collators are not equal
632 if(j
== sourceListLen
) {
640 ucol_tok_closeTokenList(&sourceParser
);
641 ucol_tok_closeTokenList(&targetParser
);
646 U_CAPI
int32_t U_EXPORT2
647 ucol_getDisplayName( const char *objLoc
,
650 int32_t resultLength
,
655 if(U_FAILURE(*status
)) return -1;
657 if(!(result
==NULL
&& resultLength
==0)) {
658 // NULL destination for pure preflighting: empty dummy string
659 // otherwise, alias the destination buffer
660 dst
.setTo(result
, 0, resultLength
);
662 Collator::getDisplayName(Locale(objLoc
), Locale(dispLoc
), dst
);
663 return dst
.extract(result
, resultLength
, *status
);
666 U_CAPI
const char* U_EXPORT2
667 ucol_getAvailable(int32_t index
)
670 const Locale
*loc
= Collator::getAvailableLocales(count
);
671 if (loc
!= NULL
&& index
< count
) {
672 return loc
[index
].getName();
677 U_CAPI
int32_t U_EXPORT2
678 ucol_countAvailable()
681 Collator::getAvailableLocales(count
);
685 #if !UCONFIG_NO_SERVICE
686 U_CAPI UEnumeration
* U_EXPORT2
687 ucol_openAvailableLocales(UErrorCode
*status
) {
690 // This is a wrapper over Collator::getAvailableLocales()
691 if (U_FAILURE(*status
)) {
694 StringEnumeration
*s
= Collator::getAvailableLocales();
696 *status
= U_MEMORY_ALLOCATION_ERROR
;
699 return uenum_openStringEnumeration(s
, status
);
703 // Note: KEYWORDS[0] != RESOURCE_NAME - alan
705 static const char RESOURCE_NAME
[] = "collations";
707 static const char* const KEYWORDS
[] = { "collation" };
709 #define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0]))
711 U_CAPI UEnumeration
* U_EXPORT2
712 ucol_getKeywords(UErrorCode
*status
) {
713 UEnumeration
*result
= NULL
;
714 if (U_SUCCESS(*status
)) {
715 return uenum_openCharStringsEnumeration(KEYWORDS
, KEYWORD_COUNT
, status
);
720 U_CAPI UEnumeration
* U_EXPORT2
721 ucol_getKeywordValues(const char *keyword
, UErrorCode
*status
) {
722 if (U_FAILURE(*status
)) {
725 // hard-coded to accept exactly one collation keyword
726 // modify if additional collation keyword is added later
727 if (keyword
==NULL
|| uprv_strcmp(keyword
, KEYWORDS
[0])!=0)
729 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
732 return ures_getKeywordValues(U_ICUDATA_COLL
, RESOURCE_NAME
, status
);
735 U_CAPI
int32_t U_EXPORT2
736 ucol_getFunctionalEquivalent(char* result
, int32_t resultCapacity
,
737 const char* keyword
, const char* locale
,
738 UBool
* isAvailable
, UErrorCode
* status
)
740 // N.B.: Resource name is "collations" but keyword is "collation"
741 return ures_getFunctionalEquivalent(result
, resultCapacity
, U_ICUDATA_COLL
,
742 "collations", keyword
, locale
,
743 isAvailable
, TRUE
, status
);
746 /* returns the locale name the collation data comes from */
747 U_CAPI
const char * U_EXPORT2
748 ucol_getLocale(const UCollator
*coll
, ULocDataLocaleType type
, UErrorCode
*status
) {
749 return ucol_getLocaleByType(coll
, type
, status
);
752 U_CAPI
const char * U_EXPORT2
753 ucol_getLocaleByType(const UCollator
*coll
, ULocDataLocaleType type
, UErrorCode
*status
) {
754 const char *result
= NULL
;
755 if(status
== NULL
|| U_FAILURE(*status
)) {
758 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE
);
759 UTRACE_DATA1(UTRACE_INFO
, "coll=%p", coll
);
762 case ULOC_ACTUAL_LOCALE
:
763 result
= coll
->actualLocale
;
765 case ULOC_VALID_LOCALE
:
766 result
= coll
->validLocale
;
768 case ULOC_REQUESTED_LOCALE
:
769 result
= coll
->requestedLocale
;
772 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
774 UTRACE_DATA1(UTRACE_INFO
, "result = %s", result
);
775 UTRACE_EXIT_STATUS(*status
);
779 U_CFUNC
void U_EXPORT2
780 ucol_setReqValidLocales(UCollator
*coll
, char *requestedLocaleToAdopt
, char *validLocaleToAdopt
, char *actualLocaleToAdopt
)
783 if (coll
->validLocale
) {
784 uprv_free(coll
->validLocale
);
786 coll
->validLocale
= validLocaleToAdopt
;
787 if (coll
->requestedLocale
) { // should always have
788 uprv_free(coll
->requestedLocale
);
790 coll
->requestedLocale
= requestedLocaleToAdopt
;
791 if (coll
->actualLocale
) {
792 uprv_free(coll
->actualLocale
);
794 coll
->actualLocale
= actualLocaleToAdopt
;
798 U_CAPI USet
* U_EXPORT2
799 ucol_getTailoredSet(const UCollator
*coll
, UErrorCode
*status
)
803 if(status
== NULL
|| U_FAILURE(*status
)) {
806 if(coll
== NULL
|| coll
->UCA
== NULL
) {
807 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
810 UParseError parseError
;
812 int32_t rulesLen
= 0;
813 const UChar
*rules
= ucol_getRules(coll
, &rulesLen
);
814 UBool startOfRules
= TRUE
;
815 // we internally use the C++ class, for the following reasons:
816 // 1. we need to utilize canonical iterator, which is a C++ only class
817 // 2. canonical iterator returns UnicodeStrings - USet cannot take them
818 // 3. USet is internally really UnicodeSet, C is just a wrapper
819 UnicodeSet
*tailored
= new UnicodeSet();
820 UnicodeString pattern
;
822 CanonicalIterator
it(empty
, *status
);
825 // The idea is to tokenize the rule set. For each non-reset token,
826 // we add all the canonicaly equivalent FCD sequences
827 ucol_tok_initTokenList(&src
, rules
, rulesLen
, coll
->UCA
, status
);
828 while (ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
, status
) != NULL
) {
829 startOfRules
= FALSE
;
830 if(src
.parsedToken
.strength
!= UCOL_TOK_RESET
) {
831 const UChar
*stuff
= src
.source
+(src
.parsedToken
.charsOffset
);
832 it
.setSource(UnicodeString(stuff
, src
.parsedToken
.charsLen
), *status
);
834 while(!pattern
.isBogus()) {
835 if(Normalizer::quickCheck(pattern
, UNORM_FCD
, *status
) != UNORM_NO
) {
836 tailored
->add(pattern
);
842 ucol_tok_closeTokenList(&src
);
843 return (USet
*)tailored
;
846 #endif /* #if !UCONFIG_NO_COLLATION */