2 *******************************************************************************
3 * Copyright (C) 1996-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucol_res.cpp
8 * tab size: 8 (not used)
12 * This file contains dependencies that the collation run-time doesn't normally
13 * need. This mainly contains resource bundle usage and collation meta information
15 * Modification history
17 * 1996-1999 various members of ICU team maintained C API for collation framework
18 * 02/16/2001 synwee Added internal method getPrevSpecialCE
19 * 03/01/2001 synwee Added maxexpansion functionality.
20 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
21 * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp
24 #include "unicode/utypes.h"
26 #if !UCONFIG_NO_COLLATION
27 #include "unicode/uloc.h"
28 #include "unicode/coll.h"
29 #include "unicode/tblcoll.h"
30 #include "unicode/caniter.h"
31 #include "unicode/uscript.h"
32 #include "unicode/ustring.h"
52 static void ucol_setReorderCodesFromParser(UCollator
*coll
, UColTokenParser
*parser
, UErrorCode
*status
);
54 // static UCA. There is only one. Collators don't use it.
55 // It is referenced only in ucol_initUCA and ucol_cleanup
56 static UCollator
* _staticUCA
= NULL
;
57 // static pointer to udata memory. Inited in ucol_initUCA
58 // used for cleanup in ucol_cleanup
59 static UDataMemory
* UCA_DATA_MEM
= NULL
;
62 static UBool U_CALLCONV
63 ucol_res_cleanup(void)
66 udata_close(UCA_DATA_MEM
);
70 ucol_close(_staticUCA
);
76 static UBool U_CALLCONV
77 isAcceptableUCA(void * /*context*/,
78 const char * /*type*/, const char * /*name*/,
79 const UDataInfo
*pInfo
){
80 /* context, type & name are intentionally not used */
81 if( pInfo
->size
>=20 &&
82 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
83 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
84 pInfo
->dataFormat
[0]==UCA_DATA_FORMAT_0
&& /* dataFormat="UCol" */
85 pInfo
->dataFormat
[1]==UCA_DATA_FORMAT_1
&&
86 pInfo
->dataFormat
[2]==UCA_DATA_FORMAT_2
&&
87 pInfo
->dataFormat
[3]==UCA_DATA_FORMAT_3
&&
88 pInfo
->formatVersion
[0]==UCA_FORMAT_VERSION_0
89 #if UCA_FORMAT_VERSION_1!=0
90 && pInfo
->formatVersion
[1]>=UCA_FORMAT_VERSION_1
92 //pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 &&
93 //pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh
94 //pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh
96 UVersionInfo UCDVersion
;
97 u_getUnicodeVersion(UCDVersion
);
98 return (UBool
)(pInfo
->dataVersion
[0]==UCDVersion
[0]
99 && pInfo
->dataVersion
[1]==UCDVersion
[1]);
100 //&& pInfo->dataVersion[2]==ucaDataInfo.dataVersion[2]
101 //&& pInfo->dataVersion[3]==ucaDataInfo.dataVersion[3]);
108 /* do not close UCA returned by ucol_initUCA! */
110 ucol_initUCA(UErrorCode
*status
) {
111 if(U_FAILURE(*status
)) {
115 UMTX_CHECK(NULL
, (_staticUCA
== NULL
), needsInit
);
118 UDataMemory
*result
= udata_openChoice(U_ICUDATA_COLL
, UCA_DATA_TYPE
, UCA_DATA_NAME
, isAcceptableUCA
, NULL
, status
);
120 if(U_SUCCESS(*status
)){
121 UCollator
*newUCA
= ucol_initCollator((const UCATableHeader
*)udata_getMemory(result
), NULL
, NULL
, status
);
122 if(U_SUCCESS(*status
)){
123 // Initalize variables for implicit generation
124 uprv_uca_initImplicitConstants(status
);
127 if(_staticUCA
== NULL
) {
128 UCA_DATA_MEM
= result
;
135 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES
, ucol_res_cleanup
);
152 U_CAPI
void U_EXPORT2
159 /****************************************************************************/
160 /* Following are the open/close functions */
162 /****************************************************************************/
164 tryOpeningFromRules(UResourceBundle
*collElem
, UErrorCode
*status
) {
165 int32_t rulesLen
= 0;
166 const UChar
*rules
= ures_getStringByKey(collElem
, "Sequence", &rulesLen
, status
);
167 return ucol_openRules(rules
, rulesLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, status
);
174 ucol_open_internal(const char *loc
,
177 UErrorCode intStatus
= U_ZERO_ERROR
;
178 const UCollator
* UCA
= ucol_initUCA(status
);
181 if(U_FAILURE(*status
)) return 0;
185 UCollator
*result
= NULL
;
186 UResourceBundle
*b
= ures_open(U_ICUDATA_COLL
, loc
, status
);
188 /* we try to find stuff from keyword */
189 UResourceBundle
*collations
= ures_getByKey(b
, "collations", NULL
, status
);
190 UResourceBundle
*collElem
= NULL
;
192 // if there is a keyword, we pick it up and try to get elements
193 if(!uloc_getKeywordValue(loc
, "collation", keyBuffer
, 256, status
) ||
194 !uprv_strcmp(keyBuffer
,"default")) { /* Treat 'zz@collation=default' as 'zz'. */
195 // no keyword. we try to find the default setting, which will give us the keyword value
196 intStatus
= U_ZERO_ERROR
;
197 // finding default value does not affect collation fallback status
198 UResourceBundle
*defaultColl
= ures_getByKeyWithFallback(collations
, "default", NULL
, &intStatus
);
199 if(U_SUCCESS(intStatus
)) {
200 int32_t defaultKeyLen
= 0;
201 const UChar
*defaultKey
= ures_getString(defaultColl
, &defaultKeyLen
, &intStatus
);
202 u_UCharsToChars(defaultKey
, keyBuffer
, defaultKeyLen
);
203 keyBuffer
[defaultKeyLen
] = 0;
205 *status
= U_INTERNAL_PROGRAM_ERROR
;
208 ures_close(defaultColl
);
210 collElem
= ures_getByKeyWithFallback(collations
, keyBuffer
, collations
, status
);
211 collations
= NULL
; // We just reused the collations object as collElem.
213 UResourceBundle
*binary
= NULL
;
214 UResourceBundle
*reorderRes
= NULL
;
216 if(*status
== U_MISSING_RESOURCE_ERROR
) { /* We didn't find the tailoring data, we fallback to the UCA */
217 *status
= U_USING_DEFAULT_WARNING
;
218 result
= ucol_initCollator(UCA
->image
, result
, UCA
, status
);
219 if (U_FAILURE(*status
)) {
222 // if we use UCA, real locale is root
224 b
= ures_open(U_ICUDATA_COLL
, "", status
);
225 ures_close(collElem
);
226 collElem
= ures_open(U_ICUDATA_COLL
, "", status
);
227 if(U_FAILURE(*status
)) {
230 result
->hasRealData
= FALSE
;
231 } else if(U_SUCCESS(*status
)) {
232 intStatus
= U_ZERO_ERROR
;
234 binary
= ures_getByKey(collElem
, "%%CollationBin", NULL
, &intStatus
);
236 if(intStatus
== U_MISSING_RESOURCE_ERROR
) { /* we didn't find the binary image, we should use the rules */
238 result
= tryOpeningFromRules(collElem
, status
);
239 if(U_FAILURE(*status
)) {
242 } else if(U_SUCCESS(intStatus
)) { /* otherwise, we'll pick a collation data that exists */
244 const uint8_t *inData
= ures_getBinary(binary
, &len
, status
);
245 if(U_FAILURE(*status
)) {
248 UCATableHeader
*colData
= (UCATableHeader
*)inData
;
249 if(uprv_memcmp(colData
->UCAVersion
, UCA
->image
->UCAVersion
, sizeof(UVersionInfo
)) != 0 ||
250 uprv_memcmp(colData
->UCDVersion
, UCA
->image
->UCDVersion
, sizeof(UVersionInfo
)) != 0 ||
251 colData
->version
[0] != UCOL_BUILDER_VERSION
)
253 *status
= U_DIFFERENT_UCA_VERSION
;
254 result
= tryOpeningFromRules(collElem
, status
);
256 if(U_FAILURE(*status
)){
259 if((uint32_t)len
> (paddedsize(sizeof(UCATableHeader
)) + paddedsize(sizeof(UColOptionSet
)))) {
260 result
= ucol_initCollator((const UCATableHeader
*)inData
, result
, UCA
, status
);
261 if(U_FAILURE(*status
)){
264 result
->hasRealData
= TRUE
;
266 result
= ucol_initCollator(UCA
->image
, result
, UCA
, status
);
267 ucol_setOptionsFromHeader(result
, (UColOptionSet
*)(inData
+((const UCATableHeader
*)inData
)->options
), status
);
268 if(U_FAILURE(*status
)){
271 result
->hasRealData
= FALSE
;
273 result
->freeImageOnClose
= FALSE
;
275 reorderRes
= ures_getByKey(collElem
, "%%ReorderCodes", NULL
, &intStatus
);
276 if (U_SUCCESS(intStatus
)) {
277 int32_t reorderCodesLen
= 0;
278 const int32_t* reorderCodes
= ures_getIntVector(reorderRes
, &reorderCodesLen
, status
);
279 if (reorderCodesLen
> 0) {
280 ucol_setReorderCodes(result
, reorderCodes
, reorderCodesLen
, status
);
281 // copy the reorder codes into the default reorder codes
282 result
->defaultReorderCodesLength
= result
->reorderCodesLength
;
283 result
->defaultReorderCodes
= (int32_t*) uprv_malloc(result
->defaultReorderCodesLength
* sizeof(int32_t));
284 uprv_memcpy(result
->defaultReorderCodes
, result
->reorderCodes
, result
->defaultReorderCodesLength
* sizeof(int32_t));
285 result
->freeDefaultReorderCodesOnClose
= TRUE
;
287 if (U_FAILURE(*status
)) {
293 } else { // !U_SUCCESS(binaryStatus)
294 if(U_SUCCESS(*status
)) {
295 *status
= intStatus
; // propagate underlying error
299 intStatus
= U_ZERO_ERROR
;
300 result
->rules
= ures_getStringByKey(collElem
, "Sequence", &result
->rulesLength
, &intStatus
);
301 result
->freeRulesOnClose
= FALSE
;
302 } else { /* There is another error, and we're just gonna clean up */
306 intStatus
= U_ZERO_ERROR
;
307 result
->ucaRules
= ures_getStringByKey(b
,"UCARules",NULL
,&intStatus
);
310 loc
= ures_getLocaleByType(b
, ULOC_ACTUAL_LOCALE
, status
);
312 result
->requestedLocale
= uprv_strdup(loc
);
314 if (result
->requestedLocale
== NULL
) {
315 *status
= U_MEMORY_ALLOCATION_ERROR
;
318 loc
= ures_getLocaleByType(collElem
, ULOC_ACTUAL_LOCALE
, status
);
319 result
->actualLocale
= uprv_strdup(loc
);
321 if (result
->actualLocale
== NULL
) {
322 *status
= U_MEMORY_ALLOCATION_ERROR
;
325 loc
= ures_getLocaleByType(b
, ULOC_ACTUAL_LOCALE
, status
);
326 result
->validLocale
= uprv_strdup(loc
);
328 if (result
->validLocale
== NULL
) {
329 *status
= U_MEMORY_ALLOCATION_ERROR
;
334 ures_close(collElem
);
336 ures_close(reorderRes
);
341 ures_close(collElem
);
343 ures_close(reorderRes
);
349 ucol_open(const char *loc
,
354 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN
);
355 UTRACE_DATA1(UTRACE_INFO
, "locale = \"%s\"", loc
);
356 UCollator
*result
= NULL
;
358 #if !UCONFIG_NO_SERVICE
359 result
= Collator::createUCollator(loc
, status
);
363 result
= ucol_open_internal(loc
, status
);
365 UTRACE_EXIT_PTR_STATUS(result
, *status
);
371 ucol_openRulesForImport( const UChar
*rules
,
373 UColAttributeValue normalizationMode
,
374 UCollationStrength strength
,
375 UParseError
*parseError
,
376 GetCollationRulesFunction importFunc
,
381 UColAttributeValue norm
;
384 if(status
== NULL
|| U_FAILURE(*status
)){
388 if(rules
== NULL
|| rulesLength
< -1) {
389 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
393 if(rulesLength
== -1) {
394 rulesLength
= u_strlen(rules
);
397 if(parseError
== NULL
){
401 switch(normalizationMode
) {
405 norm
= normalizationMode
;
408 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
412 UCollator
*result
= NULL
;
413 UCATableHeader
*table
= NULL
;
414 UCollator
*UCA
= ucol_initUCA(status
);
416 if(U_FAILURE(*status
)){
420 ucol_tok_initTokenList(&src
, rules
, rulesLength
, UCA
, importFunc
, context
, status
);
421 ucol_tok_assembleTokenList(&src
,parseError
, status
);
423 if(U_FAILURE(*status
)) {
424 /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */
425 /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */
426 /* so something might be done here... or on lower level */
428 if(*status
== U_ILLEGAL_ARGUMENT_ERROR
) {
429 fprintf(stderr
, "bad option starting at offset %i\n", (int)(src
.current
-src
.source
));
431 fprintf(stderr
, "invalid rule just before offset %i\n", (int)(src
.current
-src
.source
));
437 /* if we have a set of rules, let's make something of it */
438 if(src
.resultLen
> 0 || src
.removeSet
!= NULL
) {
439 /* also, if we wanted to remove some contractions, we should make a tailoring */
440 table
= ucol_assembleTailoringTable(&src
, status
);
441 if(U_SUCCESS(*status
)) {
443 table
->version
[0] = UCOL_BUILDER_VERSION
;
444 // no tailoring information on this level
445 table
->version
[1] = table
->version
[2] = table
->version
[3] = 0;
447 u_getUnicodeVersion(table
->UCDVersion
);
449 uprv_memcpy(table
->UCAVersion
, UCA
->image
->UCAVersion
, sizeof(UVersionInfo
));
450 result
= ucol_initCollator(table
, 0, UCA
, status
);
451 if (U_FAILURE(*status
)) {
454 result
->hasRealData
= TRUE
;
455 result
->freeImageOnClose
= TRUE
;
457 } else { /* no rules, but no error either */
458 // must be only options
459 // We will init the collator from UCA
460 result
= ucol_initCollator(UCA
->image
, 0, UCA
, status
);
461 // Check for null result
462 if (U_FAILURE(*status
)) {
465 // And set only the options
466 UColOptionSet
*opts
= (UColOptionSet
*)uprv_malloc(sizeof(UColOptionSet
));
469 *status
= U_MEMORY_ALLOCATION_ERROR
;
472 uprv_memcpy(opts
, src
.opts
, sizeof(UColOptionSet
));
473 ucol_setOptionsFromHeader(result
, opts
, status
);
474 result
->freeOptionsOnClose
= TRUE
;
475 result
->hasRealData
= FALSE
;
476 result
->freeImageOnClose
= FALSE
;
479 ucol_setReorderCodesFromParser(result
, &src
, status
);
481 if(U_SUCCESS(*status
)) {
483 result
->dataVersion
[0] = UCOL_BUILDER_VERSION
;
484 if(rulesLength
> 0) {
485 newRules
= (UChar
*)uprv_malloc((rulesLength
+1)*U_SIZEOF_UCHAR
);
487 if (newRules
== NULL
) {
488 *status
= U_MEMORY_ALLOCATION_ERROR
;
491 uprv_memcpy(newRules
, rules
, rulesLength
*U_SIZEOF_UCHAR
);
492 newRules
[rulesLength
]=0;
493 result
->rules
= newRules
;
494 result
->rulesLength
= rulesLength
;
495 result
->freeRulesOnClose
= TRUE
;
497 result
->ucaRules
= NULL
;
498 result
->actualLocale
= NULL
;
499 result
->validLocale
= NULL
;
500 result
->requestedLocale
= NULL
;
501 ucol_buildPermutationTable(result
, status
);
502 ucol_setAttribute(result
, UCOL_STRENGTH
, strength
, status
);
503 ucol_setAttribute(result
, UCOL_NORMALIZATION_MODE
, norm
, status
);
516 ucol_tok_closeTokenList(&src
);
521 U_CAPI UCollator
* U_EXPORT2
522 ucol_openRules( const UChar
*rules
,
524 UColAttributeValue normalizationMode
,
525 UCollationStrength strength
,
526 UParseError
*parseError
,
529 return ucol_openRulesForImport(rules
,
534 ucol_tok_getRulesFromBundle
,
539 U_CAPI
int32_t U_EXPORT2
540 ucol_getRulesEx(const UCollator
*coll
, UColRuleOption delta
, UChar
*buffer
, int32_t bufferLen
) {
541 UErrorCode status
= U_ZERO_ERROR
;
544 const UChar
* ucaRules
= 0;
545 const UChar
*rules
= ucol_getRules(coll
, &len
);
546 if(delta
== UCOL_FULL_RULES
) {
547 /* take the UCA rules and append real rules at the end */
548 /* UCA rules will be probably coming from the root RB */
549 ucaRules
= coll
->ucaRules
;
551 UCAlen
= u_strlen(ucaRules
);
554 ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
555 UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
556 UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
557 ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
562 if(U_FAILURE(status
)) {
565 if(buffer
!=0 && bufferLen
>0){
568 u_memcpy(buffer
, ucaRules
, uprv_min(UCAlen
, bufferLen
));
570 if(len
> 0 && bufferLen
> UCAlen
) {
571 u_memcpy(buffer
+UCAlen
, rules
, uprv_min(len
, bufferLen
-UCAlen
));
574 return u_terminateUChars(buffer
, bufferLen
, len
+UCAlen
, &status
);
577 static const UChar _NUL
= 0;
579 U_CAPI
const UChar
* U_EXPORT2
580 ucol_getRules( const UCollator
*coll
,
583 if(coll
->rules
!= NULL
) {
584 *length
= coll
->rulesLength
;
593 U_CAPI UBool U_EXPORT2
594 ucol_equals(const UCollator
*source
, const UCollator
*target
) {
595 UErrorCode status
= U_ZERO_ERROR
;
596 // if pointers are equal, collators are equal
597 if(source
== target
) {
600 int32_t i
= 0, j
= 0;
601 // if any of attributes are different, collators are not equal
602 for(i
= 0; i
< UCOL_ATTRIBUTE_COUNT
; i
++) {
603 if(ucol_getAttribute(source
, (UColAttribute
)i
, &status
) != ucol_getAttribute(target
, (UColAttribute
)i
, &status
) || U_FAILURE(status
)) {
607 if (source
->reorderCodesLength
!= target
->reorderCodesLength
){
610 for (i
= 0; i
< source
->reorderCodesLength
; i
++) {
611 if(source
->reorderCodes
[i
] != target
->reorderCodes
[i
]) {
616 int32_t sourceRulesLen
= 0, targetRulesLen
= 0;
617 const UChar
*sourceRules
= ucol_getRules(source
, &sourceRulesLen
);
618 const UChar
*targetRules
= ucol_getRules(target
, &targetRulesLen
);
620 if(sourceRulesLen
== targetRulesLen
&& u_strncmp(sourceRules
, targetRules
, sourceRulesLen
) == 0) {
621 // all the attributes are equal and the rules are equal - collators are equal
624 // hard part, need to construct tree from rules and see if they yield the same tailoring
626 UParseError parseError
;
627 UColTokenParser sourceParser
, targetParser
;
628 int32_t sourceListLen
= 0, targetListLen
= 0;
629 ucol_tok_initTokenList(&sourceParser
, sourceRules
, sourceRulesLen
, source
->UCA
, ucol_tok_getRulesFromBundle
, NULL
, &status
);
630 ucol_tok_initTokenList(&targetParser
, targetRules
, targetRulesLen
, target
->UCA
, ucol_tok_getRulesFromBundle
, NULL
, &status
);
631 sourceListLen
= ucol_tok_assembleTokenList(&sourceParser
, &parseError
, &status
);
632 targetListLen
= ucol_tok_assembleTokenList(&targetParser
, &parseError
, &status
);
634 if(sourceListLen
!= targetListLen
) {
635 // different number of resets
638 UColToken
*sourceReset
= NULL
, *targetReset
= NULL
;
639 UChar
*sourceResetString
= NULL
, *targetResetString
= NULL
;
640 int32_t sourceStringLen
= 0, targetStringLen
= 0;
641 for(i
= 0; i
< sourceListLen
; i
++) {
642 sourceReset
= sourceParser
.lh
[i
].reset
;
643 sourceResetString
= sourceParser
.source
+(sourceReset
->source
& 0xFFFFFF);
644 sourceStringLen
= sourceReset
->source
>> 24;
645 for(j
= 0; j
< sourceListLen
; j
++) {
646 targetReset
= targetParser
.lh
[j
].reset
;
647 targetResetString
= targetParser
.source
+(targetReset
->source
& 0xFFFFFF);
648 targetStringLen
= targetReset
->source
>> 24;
649 if(sourceStringLen
== targetStringLen
&& (u_strncmp(sourceResetString
, targetResetString
, sourceStringLen
) == 0)) {
650 sourceReset
= sourceParser
.lh
[i
].first
;
651 targetReset
= targetParser
.lh
[j
].first
;
652 while(sourceReset
!= NULL
&& targetReset
!= NULL
) {
653 sourceResetString
= sourceParser
.source
+(sourceReset
->source
& 0xFFFFFF);
654 sourceStringLen
= sourceReset
->source
>> 24;
655 targetResetString
= targetParser
.source
+(targetReset
->source
& 0xFFFFFF);
656 targetStringLen
= targetReset
->source
>> 24;
657 if(sourceStringLen
!= targetStringLen
|| (u_strncmp(sourceResetString
, targetResetString
, sourceStringLen
) != 0)) {
661 // probably also need to check the expansions
662 if(sourceReset
->expansion
) {
663 if(!targetReset
->expansion
) {
667 // compare expansions
668 sourceResetString
= sourceParser
.source
+(sourceReset
->expansion
& 0xFFFFFF);
669 sourceStringLen
= sourceReset
->expansion
>> 24;
670 targetResetString
= targetParser
.source
+(targetReset
->expansion
& 0xFFFFFF);
671 targetStringLen
= targetReset
->expansion
>> 24;
672 if(sourceStringLen
!= targetStringLen
|| (u_strncmp(sourceResetString
, targetResetString
, sourceStringLen
) != 0)) {
678 if(targetReset
->expansion
) {
683 sourceReset
= sourceReset
->next
;
684 targetReset
= targetReset
->next
;
686 if(sourceReset
!= targetReset
) { // at least one is not NULL
687 // there are more tailored elements in one list
696 // couldn't find the reset anchor, so the collators are not equal
697 if(j
== sourceListLen
) {
705 ucol_tok_closeTokenList(&sourceParser
);
706 ucol_tok_closeTokenList(&targetParser
);
711 U_CAPI
int32_t U_EXPORT2
712 ucol_getDisplayName( const char *objLoc
,
715 int32_t resultLength
,
720 if(U_FAILURE(*status
)) return -1;
722 if(!(result
==NULL
&& resultLength
==0)) {
723 // NULL destination for pure preflighting: empty dummy string
724 // otherwise, alias the destination buffer
725 dst
.setTo(result
, 0, resultLength
);
727 Collator::getDisplayName(Locale(objLoc
), Locale(dispLoc
), dst
);
728 return dst
.extract(result
, resultLength
, *status
);
731 U_CAPI
const char* U_EXPORT2
732 ucol_getAvailable(int32_t index
)
735 const Locale
*loc
= Collator::getAvailableLocales(count
);
736 if (loc
!= NULL
&& index
< count
) {
737 return loc
[index
].getName();
742 U_CAPI
int32_t U_EXPORT2
743 ucol_countAvailable()
746 Collator::getAvailableLocales(count
);
750 #if !UCONFIG_NO_SERVICE
751 U_CAPI UEnumeration
* U_EXPORT2
752 ucol_openAvailableLocales(UErrorCode
*status
) {
755 // This is a wrapper over Collator::getAvailableLocales()
756 if (U_FAILURE(*status
)) {
759 StringEnumeration
*s
= icu::Collator::getAvailableLocales();
761 *status
= U_MEMORY_ALLOCATION_ERROR
;
764 return uenum_openFromStringEnumeration(s
, status
);
768 // Note: KEYWORDS[0] != RESOURCE_NAME - alan
770 static const char RESOURCE_NAME
[] = "collations";
772 static const char* const KEYWORDS
[] = { "collation" };
774 #define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0]))
776 U_CAPI UEnumeration
* U_EXPORT2
777 ucol_getKeywords(UErrorCode
*status
) {
778 UEnumeration
*result
= NULL
;
779 if (U_SUCCESS(*status
)) {
780 return uenum_openCharStringsEnumeration(KEYWORDS
, KEYWORD_COUNT
, status
);
785 U_CAPI UEnumeration
* U_EXPORT2
786 ucol_getKeywordValues(const char *keyword
, UErrorCode
*status
) {
787 if (U_FAILURE(*status
)) {
790 // hard-coded to accept exactly one collation keyword
791 // modify if additional collation keyword is added later
792 if (keyword
==NULL
|| uprv_strcmp(keyword
, KEYWORDS
[0])!=0)
794 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
797 return ures_getKeywordValues(U_ICUDATA_COLL
, RESOURCE_NAME
, status
);
800 static const UEnumeration defaultKeywordValues
= {
803 ulist_close_keyword_values_iterator
,
804 ulist_count_keyword_values
,
806 ulist_next_keyword_value
,
807 ulist_reset_keyword_values_iterator
812 U_CAPI UEnumeration
* U_EXPORT2
813 ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale
,
814 UBool
/*commonlyUsed*/, UErrorCode
* status
) {
815 /* Get the locale base name. */
816 char localeBuffer
[ULOC_FULLNAME_CAPACITY
] = "";
817 uloc_getBaseName(locale
, localeBuffer
, sizeof(localeBuffer
), status
);
819 /* Create the 2 lists
820 * -values is the temp location for the keyword values
821 * -results hold the actual list used by the UEnumeration object
823 UList
*values
= ulist_createEmptyList(status
);
824 UList
*results
= ulist_createEmptyList(status
);
825 UEnumeration
*en
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
826 if (U_FAILURE(*status
) || en
== NULL
) {
828 *status
= U_MEMORY_ALLOCATION_ERROR
;
832 ulist_deleteList(values
);
833 ulist_deleteList(results
);
837 memcpy(en
, &defaultKeywordValues
, sizeof(UEnumeration
));
838 en
->context
= results
;
840 /* Open the resource bundle for collation with the given locale. */
841 UResourceBundle bundle
, collations
, collres
, defres
;
842 ures_initStackObject(&bundle
);
843 ures_initStackObject(&collations
);
844 ures_initStackObject(&collres
);
845 ures_initStackObject(&defres
);
847 ures_openFillIn(&bundle
, U_ICUDATA_COLL
, localeBuffer
, status
);
849 while (U_SUCCESS(*status
)) {
850 ures_getByKey(&bundle
, RESOURCE_NAME
, &collations
, status
);
851 ures_resetIterator(&collations
);
852 while (U_SUCCESS(*status
) && ures_hasNext(&collations
)) {
853 ures_getNextResource(&collations
, &collres
, status
);
854 const char *key
= ures_getKey(&collres
);
855 /* If the key is default, get the string and store it in results list only
856 * if results list is empty.
858 if (uprv_strcmp(key
, "default") == 0) {
859 if (ulist_getListSize(results
) == 0) {
860 char *defcoll
= (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY
);
861 int32_t defcollLength
= ULOC_KEYWORDS_CAPACITY
;
863 ures_getNextResource(&collres
, &defres
, status
);
864 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
865 /* optimize - use the utf-8 string */
866 ures_getUTF8String(&defres
, defcoll
, &defcollLength
, TRUE
, status
);
869 const UChar
* defString
= ures_getString(&defres
, &defcollLength
, status
);
870 if(U_SUCCESS(*status
)) {
871 if(defcollLength
+1 > ULOC_KEYWORDS_CAPACITY
) {
872 *status
= U_BUFFER_OVERFLOW_ERROR
;
874 u_UCharsToChars(defString
, defcoll
, defcollLength
+1);
880 ulist_addItemBeginList(results
, defcoll
, TRUE
, status
);
883 ulist_addItemEndList(values
, key
, FALSE
, status
);
887 /* If the locale is "" this is root so exit. */
888 if (uprv_strlen(localeBuffer
) == 0) {
891 /* Get the parent locale and open a new resource bundle. */
892 uloc_getParent(localeBuffer
, localeBuffer
, sizeof(localeBuffer
), status
);
893 ures_openFillIn(&bundle
, U_ICUDATA_COLL
, localeBuffer
, status
);
897 ures_close(&collres
);
898 ures_close(&collations
);
901 if (U_SUCCESS(*status
)) {
903 ulist_resetList(values
);
904 while ((value
= (char *)ulist_getNext(values
)) != NULL
) {
905 if (!ulist_containsString(results
, value
, (int32_t)uprv_strlen(value
))) {
906 ulist_addItemEndList(results
, value
, FALSE
, status
);
907 if (U_FAILURE(*status
)) {
914 ulist_deleteList(values
);
916 if (U_FAILURE(*status
)){
920 ulist_resetList(results
);
926 U_CAPI
int32_t U_EXPORT2
927 ucol_getFunctionalEquivalent(char* result
, int32_t resultCapacity
,
928 const char* keyword
, const char* locale
,
929 UBool
* isAvailable
, UErrorCode
* status
)
931 // N.B.: Resource name is "collations" but keyword is "collation"
932 return ures_getFunctionalEquivalent(result
, resultCapacity
, U_ICUDATA_COLL
,
933 "collations", keyword
, locale
,
934 isAvailable
, TRUE
, status
);
937 /* returns the locale name the collation data comes from */
938 U_CAPI
const char * U_EXPORT2
939 ucol_getLocale(const UCollator
*coll
, ULocDataLocaleType type
, UErrorCode
*status
) {
940 return ucol_getLocaleByType(coll
, type
, status
);
943 U_CAPI
const char * U_EXPORT2
944 ucol_getLocaleByType(const UCollator
*coll
, ULocDataLocaleType type
, UErrorCode
*status
) {
945 const char *result
= NULL
;
946 if(status
== NULL
|| U_FAILURE(*status
)) {
949 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE
);
950 UTRACE_DATA1(UTRACE_INFO
, "coll=%p", coll
);
952 if(coll
->delegate
!=NULL
) {
953 return ((const Collator
*)coll
->delegate
)->getLocale(type
, *status
).getName();
956 case ULOC_ACTUAL_LOCALE
:
957 result
= coll
->actualLocale
;
959 case ULOC_VALID_LOCALE
:
960 result
= coll
->validLocale
;
962 case ULOC_REQUESTED_LOCALE
:
963 result
= coll
->requestedLocale
;
966 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
968 UTRACE_DATA1(UTRACE_INFO
, "result = %s", result
);
969 UTRACE_EXIT_STATUS(*status
);
973 U_CFUNC
void U_EXPORT2
974 ucol_setReqValidLocales(UCollator
*coll
, char *requestedLocaleToAdopt
, char *validLocaleToAdopt
, char *actualLocaleToAdopt
)
977 if (coll
->validLocale
) {
978 uprv_free(coll
->validLocale
);
980 coll
->validLocale
= validLocaleToAdopt
;
981 if (coll
->requestedLocale
) { // should always have
982 uprv_free(coll
->requestedLocale
);
984 coll
->requestedLocale
= requestedLocaleToAdopt
;
985 if (coll
->actualLocale
) {
986 uprv_free(coll
->actualLocale
);
988 coll
->actualLocale
= actualLocaleToAdopt
;
992 U_CAPI USet
* U_EXPORT2
993 ucol_getTailoredSet(const UCollator
*coll
, UErrorCode
*status
)
997 if(status
== NULL
|| U_FAILURE(*status
)) {
1000 if(coll
== NULL
|| coll
->UCA
== NULL
) {
1001 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1004 UParseError parseError
;
1005 UColTokenParser src
;
1006 int32_t rulesLen
= 0;
1007 const UChar
*rules
= ucol_getRules(coll
, &rulesLen
);
1008 UBool startOfRules
= TRUE
;
1009 // we internally use the C++ class, for the following reasons:
1010 // 1. we need to utilize canonical iterator, which is a C++ only class
1011 // 2. canonical iterator returns UnicodeStrings - USet cannot take them
1012 // 3. USet is internally really UnicodeSet, C is just a wrapper
1013 UnicodeSet
*tailored
= new UnicodeSet();
1014 UnicodeString pattern
;
1015 UnicodeString empty
;
1016 CanonicalIterator
it(empty
, *status
);
1019 // The idea is to tokenize the rule set. For each non-reset token,
1020 // we add all the canonicaly equivalent FCD sequences
1021 ucol_tok_initTokenList(&src
, rules
, rulesLen
, coll
->UCA
, ucol_tok_getRulesFromBundle
, NULL
, status
);
1022 while (ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
, status
) != NULL
) {
1023 startOfRules
= FALSE
;
1024 if(src
.parsedToken
.strength
!= UCOL_TOK_RESET
) {
1025 const UChar
*stuff
= src
.source
+(src
.parsedToken
.charsOffset
);
1026 it
.setSource(UnicodeString(stuff
, src
.parsedToken
.charsLen
), *status
);
1027 pattern
= it
.next();
1028 while(!pattern
.isBogus()) {
1029 if(Normalizer::quickCheck(pattern
, UNORM_FCD
, *status
) != UNORM_NO
) {
1030 tailored
->add(pattern
);
1032 pattern
= it
.next();
1036 ucol_tok_closeTokenList(&src
);
1037 return (USet
*)tailored
;
1041 * Collation Reordering
1044 void ucol_setReorderCodesFromParser(UCollator
*coll
, UColTokenParser
*parser
, UErrorCode
*status
) {
1045 if (U_FAILURE(*status
)) {
1049 if (parser
->reorderCodesLength
== 0 || parser
->reorderCodes
== NULL
) {
1053 coll
->reorderCodesLength
= 0;
1054 if (coll
->reorderCodes
!= NULL
&& coll
->freeReorderCodesOnClose
== TRUE
) {
1055 uprv_free(coll
->reorderCodes
);
1058 if (coll
->defaultReorderCodes
!= NULL
&& coll
->freeDefaultReorderCodesOnClose
== TRUE
) {
1059 uprv_free(coll
->defaultReorderCodes
);
1061 coll
->defaultReorderCodesLength
= parser
->reorderCodesLength
;
1062 coll
->defaultReorderCodes
= (int32_t*) uprv_malloc(coll
->defaultReorderCodesLength
* sizeof(int32_t));
1063 if (coll
->defaultReorderCodes
== NULL
) {
1064 *status
= U_MEMORY_ALLOCATION_ERROR
;
1067 uprv_memcpy(coll
->defaultReorderCodes
, parser
->reorderCodes
, coll
->defaultReorderCodesLength
* sizeof(int32_t));
1068 coll
->freeDefaultReorderCodesOnClose
= TRUE
;
1070 coll
->reorderCodesLength
= parser
->reorderCodesLength
;
1071 coll
->reorderCodes
= (int32_t*) uprv_malloc(coll
->reorderCodesLength
* sizeof(int32_t));
1072 if (coll
->reorderCodes
== NULL
) {
1073 *status
= U_MEMORY_ALLOCATION_ERROR
;
1076 uprv_memcpy(coll
->reorderCodes
, parser
->reorderCodes
, coll
->reorderCodesLength
* sizeof(int32_t));
1077 coll
->freeReorderCodesOnClose
= TRUE
;
1081 * Data is stored in the reorder code to lead byte table as:
1082 * index count - unsigned short (2 bytes) - number of index entries
1083 * data size - unsigned short (2 bytes) - number of unsigned short data elements
1084 * index[index count] - array of 2 unsigned shorts (4 bytes each entry)
1085 * - reorder code, offset
1086 * - index is sorted by reorder code
1087 * - if an offset has the high bit set then it is not an offset but a single data entry
1088 * once the high bit is stripped off
1089 * data[data size] - array of unsigned short (2 bytes each entry)
1090 * - the data is an usigned short count followed by count number
1091 * of lead bytes stored in an unsigned short
1093 U_CFUNC
int U_EXPORT2
1094 ucol_getLeadBytesForReorderCode(const UCollator
*uca
, int reorderCode
, uint16_t* returnLeadBytes
, int returnCapacity
) {
1095 uint16_t reorderCodeIndexLength
= *((uint16_t*) ((uint8_t *)uca
->image
+ uca
->image
->scriptToLeadByte
));
1096 uint16_t* reorderCodeIndex
= (uint16_t*) ((uint8_t *)uca
->image
+ uca
->image
->scriptToLeadByte
+ 2 *sizeof(uint16_t));
1098 // reorder code index is 2 uint16_t's - reorder code + offset
1099 for (int i
= 0; i
< reorderCodeIndexLength
; i
++) {
1100 if (reorderCode
== reorderCodeIndex
[i
*2]) {
1101 uint16_t dataOffset
= reorderCodeIndex
[(i
*2) + 1];
1102 if ((dataOffset
& 0x8000) == 0x8000) {
1103 // offset isn't offset but instead is a single data element
1104 if (returnCapacity
>= 1) {
1105 returnLeadBytes
[0] = dataOffset
& ~0x8000;
1110 uint16_t* dataOffsetBase
= (uint16_t*) ((uint8_t *)reorderCodeIndex
+ reorderCodeIndexLength
* (2 * sizeof(uint16_t)));
1111 uint16_t leadByteCount
= *(dataOffsetBase
+ dataOffset
);
1112 leadByteCount
= leadByteCount
> returnCapacity
? returnCapacity
: leadByteCount
;
1113 uprv_memcpy(returnLeadBytes
, dataOffsetBase
+ dataOffset
+ 1, leadByteCount
* sizeof(uint16_t));
1114 return leadByteCount
;
1121 * Data is stored in the lead byte to reorder code table as:
1122 * index count - unsigned short (2 bytes) - number of index entries
1123 * data size - unsigned short (2 bytes) - number of unsigned short data elements
1124 * index[index count] - array of unsigned short (2 bytes each entry)
1125 * - index is sorted by lead byte
1126 * - if an index has the high bit set then it is not an index but a single data entry
1127 * once the high bit is stripped off
1128 * data[data size] - array of unsigned short (2 bytes each entry)
1129 * - the data is an usigned short count followed by count number of reorder codes
1131 U_CFUNC
int U_EXPORT2
1132 ucol_getReorderCodesForLeadByte(const UCollator
*uca
, int leadByte
, int16_t* returnReorderCodes
, int returnCapacity
) {
1133 uint16_t* leadByteTable
= ((uint16_t*) ((uint8_t *)uca
->image
+ uca
->image
->leadByteToScript
));
1134 uint16_t leadByteIndexLength
= *leadByteTable
;
1135 if (leadByte
>= leadByteIndexLength
) {
1138 uint16_t leadByteIndex
= *(leadByteTable
+ (2 + leadByte
));
1140 if ((leadByteIndex
& 0x8000) == 0x8000) {
1141 // offset isn't offset but instead is a single data element
1142 if (returnCapacity
>= 1) {
1143 returnReorderCodes
[0] = leadByteIndex
& ~0x8000;
1148 //uint16_t* dataOffsetBase = leadByteTable + (2 + leadByteIndexLength);
1149 uint16_t* reorderCodeData
= leadByteTable
+ (2 + leadByteIndexLength
) + leadByteIndex
;
1150 uint16_t reorderCodeCount
= *reorderCodeData
> returnCapacity
? returnCapacity
: *reorderCodeData
;
1151 uprv_memcpy(returnReorderCodes
, reorderCodeData
+ 1, reorderCodeCount
* sizeof(uint16_t));
1152 return reorderCodeCount
;
1155 // used to mark ignorable reorder code slots
1156 static const int32_t UCOL_REORDER_CODE_IGNORE
= UCOL_REORDER_CODE_LIMIT
+ 1;
1158 U_CFUNC
void U_EXPORT2
1159 ucol_buildPermutationTable(UCollator
*coll
, UErrorCode
*status
) {
1160 uint16_t leadBytesSize
= 256;
1161 uint16_t leadBytes
[256];
1162 int32_t internalReorderCodesLength
= coll
->reorderCodesLength
+ (UCOL_REORDER_CODE_LIMIT
- UCOL_REORDER_CODE_FIRST
);
1163 int32_t* internalReorderCodes
;
1165 // The lowest byte that hasn't been assigned a mapping
1166 int toBottom
= 0x03;
1167 // The highest byte that hasn't been assigned a mapping - don't include the special or trailing
1170 // are we filling from the bottom?
1171 bool fromTheBottom
= true;
1172 int32_t reorderCodesIndex
= -1;
1174 // lead bytes that have alread been assigned to the permutation table
1175 bool newLeadByteUsed
[256];
1176 // permutation table slots that have already been filled
1177 bool permutationSlotFilled
[256];
1180 if(U_FAILURE(*status
) || coll
== NULL
) {
1184 // clear the reordering
1185 if (coll
->reorderCodes
== NULL
|| coll
->reorderCodesLength
== 0
1186 || (coll
->reorderCodesLength
== 1 && coll
->reorderCodes
[0] == UCOL_REORDER_CODE_NONE
)) {
1187 if (coll
->leadBytePermutationTable
!= NULL
) {
1188 if (coll
->freeLeadBytePermutationTableOnClose
) {
1189 uprv_free(coll
->leadBytePermutationTable
);
1191 coll
->leadBytePermutationTable
= NULL
;
1192 coll
->reorderCodesLength
= 0;
1197 // set reordering to the default reordering
1198 if (coll
->reorderCodes
[0] == UCOL_REORDER_CODE_DEFAULT
) {
1199 if (coll
->reorderCodesLength
!= 1) {
1200 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1203 if (coll
->freeReorderCodesOnClose
== TRUE
) {
1204 uprv_free(coll
->reorderCodes
);
1206 coll
->reorderCodes
= NULL
;
1208 if (coll
->leadBytePermutationTable
!= NULL
&& coll
->freeLeadBytePermutationTableOnClose
== TRUE
) {
1209 uprv_free(coll
->leadBytePermutationTable
);
1211 coll
->leadBytePermutationTable
= NULL
;
1213 if (coll
->defaultReorderCodesLength
== 0) {
1217 coll
->reorderCodes
= (int32_t*)uprv_malloc(coll
->defaultReorderCodesLength
* sizeof(int32_t));
1218 coll
->freeReorderCodesOnClose
= TRUE
;
1219 if (coll
->reorderCodes
== NULL
) {
1220 *status
= U_MEMORY_ALLOCATION_ERROR
;
1223 coll
->reorderCodesLength
= coll
->defaultReorderCodesLength
;
1224 uprv_memcpy(coll
->defaultReorderCodes
, coll
->reorderCodes
, coll
->reorderCodesLength
* sizeof(int32_t));
1227 if (coll
->leadBytePermutationTable
== NULL
) {
1228 coll
->leadBytePermutationTable
= (uint8_t*)uprv_malloc(256*sizeof(uint8_t));
1229 coll
->freeLeadBytePermutationTableOnClose
= TRUE
;
1230 if (coll
->leadBytePermutationTable
== NULL
) {
1231 *status
= U_MEMORY_ALLOCATION_ERROR
;
1236 // prefill the reordering codes with the leading entries
1237 internalReorderCodes
= (int32_t*)uprv_malloc(internalReorderCodesLength
* sizeof(int32_t));
1238 if (internalReorderCodes
== NULL
) {
1239 *status
= U_MEMORY_ALLOCATION_ERROR
;
1240 if (coll
->leadBytePermutationTable
!= NULL
&& coll
->freeLeadBytePermutationTableOnClose
== TRUE
) {
1241 uprv_free(coll
->leadBytePermutationTable
);
1243 coll
->leadBytePermutationTable
= NULL
;
1247 for (uint32_t codeIndex
= 0; codeIndex
< (UCOL_REORDER_CODE_LIMIT
- UCOL_REORDER_CODE_FIRST
); codeIndex
++) {
1248 internalReorderCodes
[codeIndex
] = UCOL_REORDER_CODE_FIRST
+ codeIndex
;
1250 for (int32_t codeIndex
= 0; codeIndex
< coll
->reorderCodesLength
; codeIndex
++) {
1251 uint32_t reorderCodesCode
= coll
->reorderCodes
[codeIndex
];
1252 internalReorderCodes
[codeIndex
+ (UCOL_REORDER_CODE_LIMIT
- UCOL_REORDER_CODE_FIRST
)] = reorderCodesCode
;
1253 if (reorderCodesCode
>= UCOL_REORDER_CODE_FIRST
&& reorderCodesCode
< UCOL_REORDER_CODE_LIMIT
) {
1254 internalReorderCodes
[reorderCodesCode
- UCOL_REORDER_CODE_FIRST
] = UCOL_REORDER_CODE_IGNORE
;
1258 for (int i
= 0; i
< 256; i
++) {
1259 if (i
< toBottom
|| i
> toTop
) {
1260 permutationSlotFilled
[i
] = true;
1261 newLeadByteUsed
[i
] = true;
1262 coll
->leadBytePermutationTable
[i
] = i
;
1264 permutationSlotFilled
[i
] = false;
1265 newLeadByteUsed
[i
] = false;
1266 coll
->leadBytePermutationTable
[i
] = 0;
1270 /* Start from the front of the list and place each script we encounter at the
1271 * earliest possible locatation in the permutation table. If we encounter
1272 * UNKNOWN, start processing from the back, and place each script in the last
1273 * possible location. At each step, we also need to make sure that any scripts
1274 * that need to not be moved are copied to their same location in the final table.
1276 for (int reorderCodesCount
= 0; reorderCodesCount
< internalReorderCodesLength
; reorderCodesCount
++) {
1277 reorderCodesIndex
+= fromTheBottom
? 1 : -1;
1278 int32_t next
= internalReorderCodes
[reorderCodesIndex
];
1279 if (next
== UCOL_REORDER_CODE_IGNORE
) {
1282 if (next
== USCRIPT_UNKNOWN
) {
1283 if (fromTheBottom
== false) {
1284 // double turnaround
1285 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1286 if (coll
->leadBytePermutationTable
!= NULL
&& coll
->freeLeadBytePermutationTableOnClose
== TRUE
) {
1287 uprv_free(coll
->leadBytePermutationTable
);
1289 coll
->leadBytePermutationTable
= NULL
;
1290 coll
->reorderCodesLength
= 0;
1291 if (internalReorderCodes
!= NULL
) {
1292 uprv_free(internalReorderCodes
);
1296 fromTheBottom
= false;
1297 reorderCodesIndex
= internalReorderCodesLength
;
1301 uint16_t leadByteCount
= ucol_getLeadBytesForReorderCode(coll
->UCA
, next
, leadBytes
, leadBytesSize
);
1302 if (fromTheBottom
) {
1303 for (int leadByteIndex
= 0; leadByteIndex
< leadByteCount
; leadByteIndex
++) {
1304 // don't place a lead byte twice in the permutation table
1305 if (permutationSlotFilled
[leadBytes
[leadByteIndex
]]) {
1306 // lead byte already used
1307 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1308 if (coll
->leadBytePermutationTable
!= NULL
&& coll
->freeLeadBytePermutationTableOnClose
== TRUE
) {
1309 uprv_free(coll
->leadBytePermutationTable
);
1311 coll
->leadBytePermutationTable
= NULL
;
1312 coll
->reorderCodesLength
= 0;
1313 if (internalReorderCodes
!= NULL
) {
1314 uprv_free(internalReorderCodes
);
1319 coll
->leadBytePermutationTable
[leadBytes
[leadByteIndex
]] = toBottom
;
1320 newLeadByteUsed
[toBottom
] = true;
1321 permutationSlotFilled
[leadBytes
[leadByteIndex
]] = true;
1325 for (int leadByteIndex
= leadByteCount
- 1; leadByteIndex
>= 0; leadByteIndex
--) {
1326 // don't place a lead byte twice in the permutation table
1327 if (permutationSlotFilled
[leadBytes
[leadByteIndex
]]) {
1328 // lead byte already used
1329 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1330 if (coll
->leadBytePermutationTable
!= NULL
&& coll
->freeLeadBytePermutationTableOnClose
== TRUE
) {
1331 uprv_free(coll
->leadBytePermutationTable
);
1333 coll
->leadBytePermutationTable
= NULL
;
1334 coll
->reorderCodesLength
= 0;
1335 if (internalReorderCodes
!= NULL
) {
1336 uprv_free(internalReorderCodes
);
1341 coll
->leadBytePermutationTable
[leadBytes
[leadByteIndex
]] = toTop
;
1342 newLeadByteUsed
[toTop
] = true;
1343 permutationSlotFilled
[leadBytes
[leadByteIndex
]] = true;
1349 #ifdef REORDER_DEBUG
1350 fprintf(stdout
, "\n@@@@ Partial Script Reordering Table\n");
1351 for (int i
= 0; i
< 256; i
++) {
1352 fprintf(stdout
, "\t%02x = %02x\n", i
, coll
->leadBytePermutationTable
[i
]);
1354 fprintf(stdout
, "\n@@@@ Lead Byte Used Table\n");
1355 for (int i
= 0; i
< 256; i
++) {
1356 fprintf(stdout
, "\t%02x = %02x\n", i
, newLeadByteUsed
[i
]);
1358 fprintf(stdout
, "\n@@@@ Permutation Slot Filled Table\n");
1359 for (int i
= 0; i
< 256; i
++) {
1360 fprintf(stdout
, "\t%02x = %02x\n", i
, permutationSlotFilled
[i
]);
1364 /* Copy everything that's left over */
1365 int reorderCode
= 0;
1366 for (int i
= 0; i
< 256; i
++) {
1367 if (!permutationSlotFilled
[i
]) {
1368 while (reorderCode
< 256 && newLeadByteUsed
[reorderCode
]) {
1371 coll
->leadBytePermutationTable
[i
] = reorderCode
;
1372 permutationSlotFilled
[i
] = true;
1373 newLeadByteUsed
[reorderCode
] = true;
1377 #ifdef REORDER_DEBUG
1378 fprintf(stdout
, "\n@@@@ Script Reordering Table\n");
1379 for (int i
= 0; i
< 256; i
++) {
1380 fprintf(stdout
, "\t%02x = %02x\n", i
, coll
->leadBytePermutationTable
[i
]);
1384 if (internalReorderCodes
!= NULL
) {
1385 uprv_free(internalReorderCodes
);
1388 // force a regen of the latin one table since it is affected by the script reordering
1389 coll
->latinOneRegenTable
= TRUE
;
1390 ucol_updateInternalState(coll
, status
);
1393 #endif /* #if !UCONFIG_NO_COLLATION */