2 *******************************************************************************
3 * Copyright (C) 1996-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucol_res.cpp
8 * tab size: 8 (not used)
12 * This file contains dependencies that the collation run-time doesn't normally
13 * need. This mainly contains resource bundle usage and collation meta information
15 * Modification history
17 * 1996-1999 various members of ICU team maintained C API for collation framework
18 * 02/16/2001 synwee Added internal method getPrevSpecialCE
19 * 03/01/2001 synwee Added maxexpansion functionality.
20 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
21 * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp
24 #include "unicode/utypes.h"
26 #if !UCONFIG_NO_COLLATION
27 #include "unicode/uloc.h"
28 #include "unicode/coll.h"
29 #include "unicode/tblcoll.h"
30 #include "unicode/caniter.h"
31 #include "unicode/uscript.h"
32 #include "unicode/ustring.h"
52 static void ucol_setReorderCodesFromParser(UCollator
*coll
, UColTokenParser
*parser
, UErrorCode
*status
);
54 // static UCA. There is only one. Collators don't use it.
55 // It is referenced only in ucol_initUCA and ucol_cleanup
56 static UCollator
* _staticUCA
= NULL
;
57 // static pointer to udata memory. Inited in ucol_initUCA
58 // used for cleanup in ucol_cleanup
59 static UDataMemory
* UCA_DATA_MEM
= NULL
;
62 static UBool U_CALLCONV
63 ucol_res_cleanup(void)
66 udata_close(UCA_DATA_MEM
);
70 ucol_close(_staticUCA
);
76 static UBool U_CALLCONV
77 isAcceptableUCA(void * /*context*/,
78 const char * /*type*/, const char * /*name*/,
79 const UDataInfo
*pInfo
){
80 /* context, type & name are intentionally not used */
81 if( pInfo
->size
>=20 &&
82 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
83 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
84 pInfo
->dataFormat
[0]==UCA_DATA_FORMAT_0
&& /* dataFormat="UCol" */
85 pInfo
->dataFormat
[1]==UCA_DATA_FORMAT_1
&&
86 pInfo
->dataFormat
[2]==UCA_DATA_FORMAT_2
&&
87 pInfo
->dataFormat
[3]==UCA_DATA_FORMAT_3
&&
88 pInfo
->formatVersion
[0]==UCA_FORMAT_VERSION_0
89 #if UCA_FORMAT_VERSION_1!=0
90 && pInfo
->formatVersion
[1]>=UCA_FORMAT_VERSION_1
92 //pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 &&
93 //pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh
94 //pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh
96 UVersionInfo UCDVersion
;
97 u_getUnicodeVersion(UCDVersion
);
98 return (UBool
)(pInfo
->dataVersion
[0]==UCDVersion
[0]
99 && pInfo
->dataVersion
[1]==UCDVersion
[1]);
100 //&& pInfo->dataVersion[2]==ucaDataInfo.dataVersion[2]
101 //&& pInfo->dataVersion[3]==ucaDataInfo.dataVersion[3]);
108 /* do not close UCA returned by ucol_initUCA! */
110 ucol_initUCA(UErrorCode
*status
) {
111 if(U_FAILURE(*status
)) {
115 UMTX_CHECK(NULL
, (_staticUCA
== NULL
), needsInit
);
118 UDataMemory
*result
= udata_openChoice(U_ICUDATA_COLL
, UCA_DATA_TYPE
, UCA_DATA_NAME
, isAcceptableUCA
, NULL
, status
);
120 if(U_SUCCESS(*status
)){
121 UCollator
*newUCA
= ucol_initCollator((const UCATableHeader
*)udata_getMemory(result
), NULL
, NULL
, status
);
122 if(U_SUCCESS(*status
)){
123 // Initalize variables for implicit generation
124 uprv_uca_initImplicitConstants(status
);
127 if(_staticUCA
== NULL
) {
128 UCA_DATA_MEM
= result
;
135 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES
, ucol_res_cleanup
);
152 U_CAPI
void U_EXPORT2
159 /****************************************************************************/
160 /* Following are the open/close functions */
162 /****************************************************************************/
164 tryOpeningFromRules(UResourceBundle
*collElem
, UErrorCode
*status
) {
165 int32_t rulesLen
= 0;
166 const UChar
*rules
= ures_getStringByKey(collElem
, "Sequence", &rulesLen
, status
);
167 return ucol_openRules(rules
, rulesLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, status
);
174 ucol_open_internal(const char *loc
,
177 UErrorCode intStatus
= U_ZERO_ERROR
;
178 const UCollator
* UCA
= ucol_initUCA(status
);
181 if(U_FAILURE(*status
)) return 0;
185 UCollator
*result
= NULL
;
186 UResourceBundle
*b
= ures_open(U_ICUDATA_COLL
, loc
, status
);
188 /* we try to find stuff from keyword */
189 UResourceBundle
*collations
= ures_getByKey(b
, "collations", NULL
, status
);
190 UResourceBundle
*collElem
= NULL
;
192 // if there is a keyword, we pick it up and try to get elements
193 if(!uloc_getKeywordValue(loc
, "collation", keyBuffer
, 256, status
) ||
194 !uprv_strcmp(keyBuffer
,"default")) { /* Treat 'zz@collation=default' as 'zz'. */
195 // no keyword. we try to find the default setting, which will give us the keyword value
196 intStatus
= U_ZERO_ERROR
;
197 // finding default value does not affect collation fallback status
198 UResourceBundle
*defaultColl
= ures_getByKeyWithFallback(collations
, "default", NULL
, &intStatus
);
199 if(U_SUCCESS(intStatus
)) {
200 int32_t defaultKeyLen
= 0;
201 const UChar
*defaultKey
= ures_getString(defaultColl
, &defaultKeyLen
, &intStatus
);
202 u_UCharsToChars(defaultKey
, keyBuffer
, defaultKeyLen
);
203 keyBuffer
[defaultKeyLen
] = 0;
205 *status
= U_INTERNAL_PROGRAM_ERROR
;
208 ures_close(defaultColl
);
210 collElem
= ures_getByKeyWithFallback(collations
, keyBuffer
, collations
, status
);
211 collations
= NULL
; // We just reused the collations object as collElem.
213 UResourceBundle
*binary
= NULL
;
214 UResourceBundle
*reorderRes
= NULL
;
216 if(*status
== U_MISSING_RESOURCE_ERROR
) { /* We didn't find the tailoring data, we fallback to the UCA */
217 *status
= U_USING_DEFAULT_WARNING
;
218 result
= ucol_initCollator(UCA
->image
, result
, UCA
, status
);
219 if (U_FAILURE(*status
)) {
222 // if we use UCA, real locale is root
224 b
= ures_open(U_ICUDATA_COLL
, "", status
);
225 ures_close(collElem
);
226 collElem
= ures_open(U_ICUDATA_COLL
, "", status
);
227 if(U_FAILURE(*status
)) {
230 result
->hasRealData
= FALSE
;
231 } else if(U_SUCCESS(*status
)) {
232 intStatus
= U_ZERO_ERROR
;
234 binary
= ures_getByKey(collElem
, "%%CollationBin", NULL
, &intStatus
);
236 if(intStatus
== U_MISSING_RESOURCE_ERROR
) { /* we didn't find the binary image, we should use the rules */
238 result
= tryOpeningFromRules(collElem
, status
);
239 if(U_FAILURE(*status
)) {
242 } else if(U_SUCCESS(intStatus
)) { /* otherwise, we'll pick a collation data that exists */
244 const uint8_t *inData
= ures_getBinary(binary
, &len
, status
);
245 if(U_FAILURE(*status
)) {
248 UCATableHeader
*colData
= (UCATableHeader
*)inData
;
249 if(uprv_memcmp(colData
->UCAVersion
, UCA
->image
->UCAVersion
, sizeof(UVersionInfo
)) != 0 ||
250 uprv_memcmp(colData
->UCDVersion
, UCA
->image
->UCDVersion
, sizeof(UVersionInfo
)) != 0 ||
251 colData
->version
[0] != UCOL_BUILDER_VERSION
)
253 *status
= U_DIFFERENT_UCA_VERSION
;
254 result
= tryOpeningFromRules(collElem
, status
);
256 if(U_FAILURE(*status
)){
259 if((uint32_t)len
> (paddedsize(sizeof(UCATableHeader
)) + paddedsize(sizeof(UColOptionSet
)))) {
260 result
= ucol_initCollator((const UCATableHeader
*)inData
, result
, UCA
, status
);
261 if(U_FAILURE(*status
)){
264 result
->hasRealData
= TRUE
;
266 result
= ucol_initCollator(UCA
->image
, result
, UCA
, status
);
267 ucol_setOptionsFromHeader(result
, (UColOptionSet
*)(inData
+((const UCATableHeader
*)inData
)->options
), status
);
268 if(U_FAILURE(*status
)){
271 result
->hasRealData
= FALSE
;
273 result
->freeImageOnClose
= FALSE
;
275 reorderRes
= ures_getByKey(collElem
, "%%ReorderCodes", NULL
, &intStatus
);
276 if (U_SUCCESS(intStatus
)) {
277 int32_t reorderCodesLen
= 0;
278 const int32_t* reorderCodes
= ures_getIntVector(reorderRes
, &reorderCodesLen
, status
);
279 if (reorderCodesLen
> 0) {
280 ucol_setReorderCodes(result
, reorderCodes
, reorderCodesLen
, status
);
281 // copy the reorder codes into the default reorder codes
282 result
->defaultReorderCodesLength
= result
->reorderCodesLength
;
283 result
->defaultReorderCodes
= (int32_t*) uprv_malloc(result
->defaultReorderCodesLength
* sizeof(int32_t));
284 uprv_memcpy(result
->defaultReorderCodes
, result
->reorderCodes
, result
->defaultReorderCodesLength
* sizeof(int32_t));
285 result
->freeDefaultReorderCodesOnClose
= TRUE
;
287 if (U_FAILURE(*status
)) {
293 } else { // !U_SUCCESS(binaryStatus)
294 if(U_SUCCESS(*status
)) {
295 *status
= intStatus
; // propagate underlying error
299 intStatus
= U_ZERO_ERROR
;
300 result
->rules
= ures_getStringByKey(collElem
, "Sequence", &result
->rulesLength
, &intStatus
);
301 result
->freeRulesOnClose
= FALSE
;
302 } else { /* There is another error, and we're just gonna clean up */
306 intStatus
= U_ZERO_ERROR
;
307 result
->ucaRules
= ures_getStringByKey(b
,"UCARules",NULL
,&intStatus
);
310 loc
= ures_getLocaleByType(b
, ULOC_ACTUAL_LOCALE
, status
);
312 result
->requestedLocale
= uprv_strdup(loc
);
314 if (result
->requestedLocale
== NULL
) {
315 *status
= U_MEMORY_ALLOCATION_ERROR
;
318 loc
= ures_getLocaleByType(collElem
, ULOC_ACTUAL_LOCALE
, status
);
319 result
->actualLocale
= uprv_strdup(loc
);
321 if (result
->actualLocale
== NULL
) {
322 *status
= U_MEMORY_ALLOCATION_ERROR
;
325 loc
= ures_getLocaleByType(b
, ULOC_ACTUAL_LOCALE
, status
);
326 result
->validLocale
= uprv_strdup(loc
);
328 if (result
->validLocale
== NULL
) {
329 *status
= U_MEMORY_ALLOCATION_ERROR
;
334 ures_close(collElem
);
336 ures_close(reorderRes
);
341 ures_close(collElem
);
343 ures_close(reorderRes
);
349 ucol_open(const char *loc
,
354 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN
);
355 UTRACE_DATA1(UTRACE_INFO
, "locale = \"%s\"", loc
);
356 UCollator
*result
= NULL
;
358 #if !UCONFIG_NO_SERVICE
359 result
= Collator::createUCollator(loc
, status
);
363 result
= ucol_open_internal(loc
, status
);
365 UTRACE_EXIT_PTR_STATUS(result
, *status
);
371 ucol_openRulesForImport( const UChar
*rules
,
373 UColAttributeValue normalizationMode
,
374 UCollationStrength strength
,
375 UParseError
*parseError
,
376 GetCollationRulesFunction importFunc
,
381 UColAttributeValue norm
;
384 if(status
== NULL
|| U_FAILURE(*status
)){
388 if(rules
== NULL
|| rulesLength
< -1) {
389 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
393 if(rulesLength
== -1) {
394 rulesLength
= u_strlen(rules
);
397 if(parseError
== NULL
){
401 switch(normalizationMode
) {
405 norm
= normalizationMode
;
408 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
412 UCollator
*result
= NULL
;
413 UCATableHeader
*table
= NULL
;
414 UCollator
*UCA
= ucol_initUCA(status
);
416 if(U_FAILURE(*status
)){
420 ucol_tok_initTokenList(&src
, rules
, rulesLength
, UCA
, importFunc
, context
, status
);
421 ucol_tok_assembleTokenList(&src
,parseError
, status
);
423 if(U_FAILURE(*status
)) {
424 /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */
425 /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */
426 /* so something might be done here... or on lower level */
428 if(*status
== U_ILLEGAL_ARGUMENT_ERROR
) {
429 fprintf(stderr
, "bad option starting at offset %i\n", (int)(src
.current
-src
.source
));
431 fprintf(stderr
, "invalid rule just before offset %i\n", (int)(src
.current
-src
.source
));
437 /* if we have a set of rules, let's make something of it */
438 if(src
.resultLen
> 0 || src
.removeSet
!= NULL
) {
439 /* also, if we wanted to remove some contractions, we should make a tailoring */
440 table
= ucol_assembleTailoringTable(&src
, status
);
441 if(U_SUCCESS(*status
)) {
443 table
->version
[0] = UCOL_BUILDER_VERSION
;
444 // no tailoring information on this level
445 table
->version
[1] = table
->version
[2] = table
->version
[3] = 0;
447 u_getUnicodeVersion(table
->UCDVersion
);
449 uprv_memcpy(table
->UCAVersion
, UCA
->image
->UCAVersion
, sizeof(UVersionInfo
));
450 result
= ucol_initCollator(table
, 0, UCA
, status
);
451 if (U_FAILURE(*status
)) {
454 result
->hasRealData
= TRUE
;
455 result
->freeImageOnClose
= TRUE
;
459 } else { /* no rules, but no error either */
460 // must be only options
461 // We will init the collator from UCA
462 result
= ucol_initCollator(UCA
->image
, 0, UCA
, status
);
463 // Check for null result
464 if (U_FAILURE(*status
)) {
467 // And set only the options
468 UColOptionSet
*opts
= (UColOptionSet
*)uprv_malloc(sizeof(UColOptionSet
));
471 *status
= U_MEMORY_ALLOCATION_ERROR
;
474 uprv_memcpy(opts
, src
.opts
, sizeof(UColOptionSet
));
475 ucol_setOptionsFromHeader(result
, opts
, status
);
476 result
->freeOptionsOnClose
= TRUE
;
477 result
->hasRealData
= FALSE
;
478 result
->freeImageOnClose
= FALSE
;
481 ucol_setReorderCodesFromParser(result
, &src
, status
);
483 if(U_SUCCESS(*status
)) {
485 result
->dataVersion
[0] = UCOL_BUILDER_VERSION
;
486 if(rulesLength
> 0) {
487 newRules
= (UChar
*)uprv_malloc((rulesLength
+1)*U_SIZEOF_UCHAR
);
489 if (newRules
== NULL
) {
490 *status
= U_MEMORY_ALLOCATION_ERROR
;
493 uprv_memcpy(newRules
, rules
, rulesLength
*U_SIZEOF_UCHAR
);
494 newRules
[rulesLength
]=0;
495 result
->rules
= newRules
;
496 result
->rulesLength
= rulesLength
;
497 result
->freeRulesOnClose
= TRUE
;
499 result
->ucaRules
= NULL
;
500 result
->actualLocale
= NULL
;
501 result
->validLocale
= NULL
;
502 result
->requestedLocale
= NULL
;
503 ucol_buildPermutationTable(result
, status
);
504 ucol_setAttribute(result
, UCOL_STRENGTH
, strength
, status
);
505 ucol_setAttribute(result
, UCOL_NORMALIZATION_MODE
, norm
, status
);
518 ucol_tok_closeTokenList(&src
);
523 U_CAPI UCollator
* U_EXPORT2
524 ucol_openRules( const UChar
*rules
,
526 UColAttributeValue normalizationMode
,
527 UCollationStrength strength
,
528 UParseError
*parseError
,
531 return ucol_openRulesForImport(rules
,
536 ucol_tok_getRulesFromBundle
,
541 U_CAPI
int32_t U_EXPORT2
542 ucol_getRulesEx(const UCollator
*coll
, UColRuleOption delta
, UChar
*buffer
, int32_t bufferLen
) {
543 UErrorCode status
= U_ZERO_ERROR
;
546 const UChar
* ucaRules
= 0;
547 const UChar
*rules
= ucol_getRules(coll
, &len
);
548 if(delta
== UCOL_FULL_RULES
) {
549 /* take the UCA rules and append real rules at the end */
550 /* UCA rules will be probably coming from the root RB */
551 ucaRules
= coll
->ucaRules
;
553 UCAlen
= u_strlen(ucaRules
);
556 ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
557 UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
558 UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
559 ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
564 if(U_FAILURE(status
)) {
567 if(buffer
!=0 && bufferLen
>0){
570 u_memcpy(buffer
, ucaRules
, uprv_min(UCAlen
, bufferLen
));
572 if(len
> 0 && bufferLen
> UCAlen
) {
573 u_memcpy(buffer
+UCAlen
, rules
, uprv_min(len
, bufferLen
-UCAlen
));
576 return u_terminateUChars(buffer
, bufferLen
, len
+UCAlen
, &status
);
579 static const UChar _NUL
= 0;
581 U_CAPI
const UChar
* U_EXPORT2
582 ucol_getRules( const UCollator
*coll
,
585 if(coll
->rules
!= NULL
) {
586 *length
= coll
->rulesLength
;
595 U_CAPI UBool U_EXPORT2
596 ucol_equals(const UCollator
*source
, const UCollator
*target
) {
597 UErrorCode status
= U_ZERO_ERROR
;
598 // if pointers are equal, collators are equal
599 if(source
== target
) {
602 int32_t i
= 0, j
= 0;
603 // if any of attributes are different, collators are not equal
604 for(i
= 0; i
< UCOL_ATTRIBUTE_COUNT
; i
++) {
605 if(ucol_getAttribute(source
, (UColAttribute
)i
, &status
) != ucol_getAttribute(target
, (UColAttribute
)i
, &status
) || U_FAILURE(status
)) {
609 if (source
->reorderCodesLength
!= target
->reorderCodesLength
){
612 for (i
= 0; i
< source
->reorderCodesLength
; i
++) {
613 if(source
->reorderCodes
[i
] != target
->reorderCodes
[i
]) {
618 int32_t sourceRulesLen
= 0, targetRulesLen
= 0;
619 const UChar
*sourceRules
= ucol_getRules(source
, &sourceRulesLen
);
620 const UChar
*targetRules
= ucol_getRules(target
, &targetRulesLen
);
622 if(sourceRulesLen
== targetRulesLen
&& u_strncmp(sourceRules
, targetRules
, sourceRulesLen
) == 0) {
623 // all the attributes are equal and the rules are equal - collators are equal
626 // hard part, need to construct tree from rules and see if they yield the same tailoring
628 UParseError parseError
;
629 UColTokenParser sourceParser
, targetParser
;
630 int32_t sourceListLen
= 0, targetListLen
= 0;
631 ucol_tok_initTokenList(&sourceParser
, sourceRules
, sourceRulesLen
, source
->UCA
, ucol_tok_getRulesFromBundle
, NULL
, &status
);
632 ucol_tok_initTokenList(&targetParser
, targetRules
, targetRulesLen
, target
->UCA
, ucol_tok_getRulesFromBundle
, NULL
, &status
);
633 sourceListLen
= ucol_tok_assembleTokenList(&sourceParser
, &parseError
, &status
);
634 targetListLen
= ucol_tok_assembleTokenList(&targetParser
, &parseError
, &status
);
636 if(sourceListLen
!= targetListLen
) {
637 // different number of resets
640 UColToken
*sourceReset
= NULL
, *targetReset
= NULL
;
641 UChar
*sourceResetString
= NULL
, *targetResetString
= NULL
;
642 int32_t sourceStringLen
= 0, targetStringLen
= 0;
643 for(i
= 0; i
< sourceListLen
; i
++) {
644 sourceReset
= sourceParser
.lh
[i
].reset
;
645 sourceResetString
= sourceParser
.source
+(sourceReset
->source
& 0xFFFFFF);
646 sourceStringLen
= sourceReset
->source
>> 24;
647 for(j
= 0; j
< sourceListLen
; j
++) {
648 targetReset
= targetParser
.lh
[j
].reset
;
649 targetResetString
= targetParser
.source
+(targetReset
->source
& 0xFFFFFF);
650 targetStringLen
= targetReset
->source
>> 24;
651 if(sourceStringLen
== targetStringLen
&& (u_strncmp(sourceResetString
, targetResetString
, sourceStringLen
) == 0)) {
652 sourceReset
= sourceParser
.lh
[i
].first
;
653 targetReset
= targetParser
.lh
[j
].first
;
654 while(sourceReset
!= NULL
&& targetReset
!= NULL
) {
655 sourceResetString
= sourceParser
.source
+(sourceReset
->source
& 0xFFFFFF);
656 sourceStringLen
= sourceReset
->source
>> 24;
657 targetResetString
= targetParser
.source
+(targetReset
->source
& 0xFFFFFF);
658 targetStringLen
= targetReset
->source
>> 24;
659 if(sourceStringLen
!= targetStringLen
|| (u_strncmp(sourceResetString
, targetResetString
, sourceStringLen
) != 0)) {
663 // probably also need to check the expansions
664 if(sourceReset
->expansion
) {
665 if(!targetReset
->expansion
) {
669 // compare expansions
670 sourceResetString
= sourceParser
.source
+(sourceReset
->expansion
& 0xFFFFFF);
671 sourceStringLen
= sourceReset
->expansion
>> 24;
672 targetResetString
= targetParser
.source
+(targetReset
->expansion
& 0xFFFFFF);
673 targetStringLen
= targetReset
->expansion
>> 24;
674 if(sourceStringLen
!= targetStringLen
|| (u_strncmp(sourceResetString
, targetResetString
, sourceStringLen
) != 0)) {
680 if(targetReset
->expansion
) {
685 sourceReset
= sourceReset
->next
;
686 targetReset
= targetReset
->next
;
688 if(sourceReset
!= targetReset
) { // at least one is not NULL
689 // there are more tailored elements in one list
698 // couldn't find the reset anchor, so the collators are not equal
699 if(j
== sourceListLen
) {
707 ucol_tok_closeTokenList(&sourceParser
);
708 ucol_tok_closeTokenList(&targetParser
);
713 U_CAPI
int32_t U_EXPORT2
714 ucol_getDisplayName( const char *objLoc
,
717 int32_t resultLength
,
722 if(U_FAILURE(*status
)) return -1;
724 if(!(result
==NULL
&& resultLength
==0)) {
725 // NULL destination for pure preflighting: empty dummy string
726 // otherwise, alias the destination buffer
727 dst
.setTo(result
, 0, resultLength
);
729 Collator::getDisplayName(Locale(objLoc
), Locale(dispLoc
), dst
);
730 return dst
.extract(result
, resultLength
, *status
);
733 U_CAPI
const char* U_EXPORT2
734 ucol_getAvailable(int32_t index
)
737 const Locale
*loc
= Collator::getAvailableLocales(count
);
738 if (loc
!= NULL
&& index
< count
) {
739 return loc
[index
].getName();
744 U_CAPI
int32_t U_EXPORT2
745 ucol_countAvailable()
748 Collator::getAvailableLocales(count
);
752 #if !UCONFIG_NO_SERVICE
753 U_CAPI UEnumeration
* U_EXPORT2
754 ucol_openAvailableLocales(UErrorCode
*status
) {
757 // This is a wrapper over Collator::getAvailableLocales()
758 if (U_FAILURE(*status
)) {
761 StringEnumeration
*s
= icu::Collator::getAvailableLocales();
763 *status
= U_MEMORY_ALLOCATION_ERROR
;
766 return uenum_openFromStringEnumeration(s
, status
);
770 // Note: KEYWORDS[0] != RESOURCE_NAME - alan
772 static const char RESOURCE_NAME
[] = "collations";
774 static const char* const KEYWORDS
[] = { "collation" };
776 #define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0]))
778 U_CAPI UEnumeration
* U_EXPORT2
779 ucol_getKeywords(UErrorCode
*status
) {
780 UEnumeration
*result
= NULL
;
781 if (U_SUCCESS(*status
)) {
782 return uenum_openCharStringsEnumeration(KEYWORDS
, KEYWORD_COUNT
, status
);
787 U_CAPI UEnumeration
* U_EXPORT2
788 ucol_getKeywordValues(const char *keyword
, UErrorCode
*status
) {
789 if (U_FAILURE(*status
)) {
792 // hard-coded to accept exactly one collation keyword
793 // modify if additional collation keyword is added later
794 if (keyword
==NULL
|| uprv_strcmp(keyword
, KEYWORDS
[0])!=0)
796 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
799 return ures_getKeywordValues(U_ICUDATA_COLL
, RESOURCE_NAME
, status
);
802 static const UEnumeration defaultKeywordValues
= {
805 ulist_close_keyword_values_iterator
,
806 ulist_count_keyword_values
,
808 ulist_next_keyword_value
,
809 ulist_reset_keyword_values_iterator
814 U_CAPI UEnumeration
* U_EXPORT2
815 ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale
,
816 UBool
/*commonlyUsed*/, UErrorCode
* status
) {
817 /* Get the locale base name. */
818 char localeBuffer
[ULOC_FULLNAME_CAPACITY
] = "";
819 uloc_getBaseName(locale
, localeBuffer
, sizeof(localeBuffer
), status
);
821 /* Create the 2 lists
822 * -values is the temp location for the keyword values
823 * -results hold the actual list used by the UEnumeration object
825 UList
*values
= ulist_createEmptyList(status
);
826 UList
*results
= ulist_createEmptyList(status
);
827 UEnumeration
*en
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
828 if (U_FAILURE(*status
) || en
== NULL
) {
830 *status
= U_MEMORY_ALLOCATION_ERROR
;
834 ulist_deleteList(values
);
835 ulist_deleteList(results
);
839 memcpy(en
, &defaultKeywordValues
, sizeof(UEnumeration
));
840 en
->context
= results
;
842 /* Open the resource bundle for collation with the given locale. */
843 UResourceBundle bundle
, collations
, collres
, defres
;
844 ures_initStackObject(&bundle
);
845 ures_initStackObject(&collations
);
846 ures_initStackObject(&collres
);
847 ures_initStackObject(&defres
);
849 ures_openFillIn(&bundle
, U_ICUDATA_COLL
, localeBuffer
, status
);
851 while (U_SUCCESS(*status
)) {
852 ures_getByKey(&bundle
, RESOURCE_NAME
, &collations
, status
);
853 ures_resetIterator(&collations
);
854 while (U_SUCCESS(*status
) && ures_hasNext(&collations
)) {
855 ures_getNextResource(&collations
, &collres
, status
);
856 const char *key
= ures_getKey(&collres
);
857 /* If the key is default, get the string and store it in results list only
858 * if results list is empty.
860 if (uprv_strcmp(key
, "default") == 0) {
861 if (ulist_getListSize(results
) == 0) {
862 char *defcoll
= (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY
);
863 int32_t defcollLength
= ULOC_KEYWORDS_CAPACITY
;
865 ures_getNextResource(&collres
, &defres
, status
);
866 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
867 /* optimize - use the utf-8 string */
868 ures_getUTF8String(&defres
, defcoll
, &defcollLength
, TRUE
, status
);
871 const UChar
* defString
= ures_getString(&defres
, &defcollLength
, status
);
872 if(U_SUCCESS(*status
)) {
873 if(defcollLength
+1 > ULOC_KEYWORDS_CAPACITY
) {
874 *status
= U_BUFFER_OVERFLOW_ERROR
;
876 u_UCharsToChars(defString
, defcoll
, defcollLength
+1);
882 ulist_addItemBeginList(results
, defcoll
, TRUE
, status
);
885 ulist_addItemEndList(values
, key
, FALSE
, status
);
889 /* If the locale is "" this is root so exit. */
890 if (uprv_strlen(localeBuffer
) == 0) {
893 /* Get the parent locale and open a new resource bundle. */
894 uloc_getParent(localeBuffer
, localeBuffer
, sizeof(localeBuffer
), status
);
895 ures_openFillIn(&bundle
, U_ICUDATA_COLL
, localeBuffer
, status
);
899 ures_close(&collres
);
900 ures_close(&collations
);
903 if (U_SUCCESS(*status
)) {
905 ulist_resetList(values
);
906 while ((value
= (char *)ulist_getNext(values
)) != NULL
) {
907 if (!ulist_containsString(results
, value
, (int32_t)uprv_strlen(value
))) {
908 ulist_addItemEndList(results
, value
, FALSE
, status
);
909 if (U_FAILURE(*status
)) {
916 ulist_deleteList(values
);
918 if (U_FAILURE(*status
)){
922 ulist_resetList(results
);
928 U_CAPI
int32_t U_EXPORT2
929 ucol_getFunctionalEquivalent(char* result
, int32_t resultCapacity
,
930 const char* keyword
, const char* locale
,
931 UBool
* isAvailable
, UErrorCode
* status
)
933 // N.B.: Resource name is "collations" but keyword is "collation"
934 return ures_getFunctionalEquivalent(result
, resultCapacity
, U_ICUDATA_COLL
,
935 "collations", keyword
, locale
,
936 isAvailable
, TRUE
, status
);
939 /* returns the locale name the collation data comes from */
940 U_CAPI
const char * U_EXPORT2
941 ucol_getLocale(const UCollator
*coll
, ULocDataLocaleType type
, UErrorCode
*status
) {
942 return ucol_getLocaleByType(coll
, type
, status
);
945 U_CAPI
const char * U_EXPORT2
946 ucol_getLocaleByType(const UCollator
*coll
, ULocDataLocaleType type
, UErrorCode
*status
) {
947 const char *result
= NULL
;
948 if(status
== NULL
|| U_FAILURE(*status
)) {
951 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE
);
952 UTRACE_DATA1(UTRACE_INFO
, "coll=%p", coll
);
954 if(coll
->delegate
!=NULL
) {
955 return ((const Collator
*)coll
->delegate
)->getLocale(type
, *status
).getName();
958 case ULOC_ACTUAL_LOCALE
:
959 result
= coll
->actualLocale
;
961 case ULOC_VALID_LOCALE
:
962 result
= coll
->validLocale
;
964 case ULOC_REQUESTED_LOCALE
:
965 result
= coll
->requestedLocale
;
968 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
970 UTRACE_DATA1(UTRACE_INFO
, "result = %s", result
);
971 UTRACE_EXIT_STATUS(*status
);
975 U_CFUNC
void U_EXPORT2
976 ucol_setReqValidLocales(UCollator
*coll
, char *requestedLocaleToAdopt
, char *validLocaleToAdopt
, char *actualLocaleToAdopt
)
979 if (coll
->validLocale
) {
980 uprv_free(coll
->validLocale
);
982 coll
->validLocale
= validLocaleToAdopt
;
983 if (coll
->requestedLocale
) { // should always have
984 uprv_free(coll
->requestedLocale
);
986 coll
->requestedLocale
= requestedLocaleToAdopt
;
987 if (coll
->actualLocale
) {
988 uprv_free(coll
->actualLocale
);
990 coll
->actualLocale
= actualLocaleToAdopt
;
994 U_CAPI USet
* U_EXPORT2
995 ucol_getTailoredSet(const UCollator
*coll
, UErrorCode
*status
)
999 if(status
== NULL
|| U_FAILURE(*status
)) {
1002 if(coll
== NULL
|| coll
->UCA
== NULL
) {
1003 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1006 UParseError parseError
;
1007 UColTokenParser src
;
1008 int32_t rulesLen
= 0;
1009 const UChar
*rules
= ucol_getRules(coll
, &rulesLen
);
1010 UBool startOfRules
= TRUE
;
1011 // we internally use the C++ class, for the following reasons:
1012 // 1. we need to utilize canonical iterator, which is a C++ only class
1013 // 2. canonical iterator returns UnicodeStrings - USet cannot take them
1014 // 3. USet is internally really UnicodeSet, C is just a wrapper
1015 UnicodeSet
*tailored
= new UnicodeSet();
1016 UnicodeString pattern
;
1017 UnicodeString empty
;
1018 CanonicalIterator
it(empty
, *status
);
1021 // The idea is to tokenize the rule set. For each non-reset token,
1022 // we add all the canonicaly equivalent FCD sequences
1023 ucol_tok_initTokenList(&src
, rules
, rulesLen
, coll
->UCA
, ucol_tok_getRulesFromBundle
, NULL
, status
);
1024 while (ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
, status
) != NULL
) {
1025 startOfRules
= FALSE
;
1026 if(src
.parsedToken
.strength
!= UCOL_TOK_RESET
) {
1027 const UChar
*stuff
= src
.source
+(src
.parsedToken
.charsOffset
);
1028 it
.setSource(UnicodeString(stuff
, src
.parsedToken
.charsLen
), *status
);
1029 pattern
= it
.next();
1030 while(!pattern
.isBogus()) {
1031 if(Normalizer::quickCheck(pattern
, UNORM_FCD
, *status
) != UNORM_NO
) {
1032 tailored
->add(pattern
);
1034 pattern
= it
.next();
1038 ucol_tok_closeTokenList(&src
);
1039 return (USet
*)tailored
;
1043 * Collation Reordering
1046 void ucol_setReorderCodesFromParser(UCollator
*coll
, UColTokenParser
*parser
, UErrorCode
*status
) {
1047 if (U_FAILURE(*status
)) {
1051 if (parser
->reorderCodesLength
== 0 || parser
->reorderCodes
== NULL
) {
1055 coll
->reorderCodesLength
= 0;
1056 if (coll
->reorderCodes
!= NULL
&& coll
->freeReorderCodesOnClose
== TRUE
) {
1057 uprv_free(coll
->reorderCodes
);
1060 if (coll
->defaultReorderCodes
!= NULL
&& coll
->freeDefaultReorderCodesOnClose
== TRUE
) {
1061 uprv_free(coll
->defaultReorderCodes
);
1063 coll
->defaultReorderCodesLength
= parser
->reorderCodesLength
;
1064 coll
->defaultReorderCodes
= (int32_t*) uprv_malloc(coll
->defaultReorderCodesLength
* sizeof(int32_t));
1065 if (coll
->defaultReorderCodes
== NULL
) {
1066 *status
= U_MEMORY_ALLOCATION_ERROR
;
1069 uprv_memcpy(coll
->defaultReorderCodes
, parser
->reorderCodes
, coll
->defaultReorderCodesLength
* sizeof(int32_t));
1070 coll
->freeDefaultReorderCodesOnClose
= TRUE
;
1072 coll
->reorderCodesLength
= parser
->reorderCodesLength
;
1073 coll
->reorderCodes
= (int32_t*) uprv_malloc(coll
->reorderCodesLength
* sizeof(int32_t));
1074 if (coll
->reorderCodes
== NULL
) {
1075 *status
= U_MEMORY_ALLOCATION_ERROR
;
1078 uprv_memcpy(coll
->reorderCodes
, parser
->reorderCodes
, coll
->reorderCodesLength
* sizeof(int32_t));
1079 coll
->freeReorderCodesOnClose
= TRUE
;
1083 * Data is stored in the reorder code to lead byte table as:
1084 * index count - unsigned short (2 bytes) - number of index entries
1085 * data size - unsigned short (2 bytes) - number of unsigned short data elements
1086 * index[index count] - array of 2 unsigned shorts (4 bytes each entry)
1087 * - reorder code, offset
1088 * - index is sorted by reorder code
1089 * - if an offset has the high bit set then it is not an offset but a single data entry
1090 * once the high bit is stripped off
1091 * data[data size] - array of unsigned short (2 bytes each entry)
1092 * - the data is an usigned short count followed by count number
1093 * of lead bytes stored in an unsigned short
1095 U_CFUNC
int U_EXPORT2
1096 ucol_getLeadBytesForReorderCode(const UCollator
*uca
, int reorderCode
, uint16_t* returnLeadBytes
, int returnCapacity
) {
1097 uint16_t reorderCodeIndexLength
= *((uint16_t*) ((uint8_t *)uca
->image
+ uca
->image
->scriptToLeadByte
));
1098 uint16_t* reorderCodeIndex
= (uint16_t*) ((uint8_t *)uca
->image
+ uca
->image
->scriptToLeadByte
+ 2 *sizeof(uint16_t));
1100 // reorder code index is 2 uint16_t's - reorder code + offset
1101 for (int i
= 0; i
< reorderCodeIndexLength
; i
++) {
1102 if (reorderCode
== reorderCodeIndex
[i
*2]) {
1103 uint16_t dataOffset
= reorderCodeIndex
[(i
*2) + 1];
1104 if ((dataOffset
& 0x8000) == 0x8000) {
1105 // offset isn't offset but instead is a single data element
1106 if (returnCapacity
>= 1) {
1107 returnLeadBytes
[0] = dataOffset
& ~0x8000;
1112 uint16_t* dataOffsetBase
= (uint16_t*) ((uint8_t *)reorderCodeIndex
+ reorderCodeIndexLength
* (2 * sizeof(uint16_t)));
1113 uint16_t leadByteCount
= *(dataOffsetBase
+ dataOffset
);
1114 leadByteCount
= leadByteCount
> returnCapacity
? returnCapacity
: leadByteCount
;
1115 uprv_memcpy(returnLeadBytes
, dataOffsetBase
+ dataOffset
+ 1, leadByteCount
* sizeof(uint16_t));
1116 return leadByteCount
;
1123 * Data is stored in the lead byte to reorder code table as:
1124 * index count - unsigned short (2 bytes) - number of index entries
1125 * data size - unsigned short (2 bytes) - number of unsigned short data elements
1126 * index[index count] - array of unsigned short (2 bytes each entry)
1127 * - index is sorted by lead byte
1128 * - if an index has the high bit set then it is not an index but a single data entry
1129 * once the high bit is stripped off
1130 * data[data size] - array of unsigned short (2 bytes each entry)
1131 * - the data is an usigned short count followed by count number of reorder codes
1133 U_CFUNC
int U_EXPORT2
1134 ucol_getReorderCodesForLeadByte(const UCollator
*uca
, int leadByte
, int16_t* returnReorderCodes
, int returnCapacity
) {
1135 uint16_t* leadByteTable
= ((uint16_t*) ((uint8_t *)uca
->image
+ uca
->image
->leadByteToScript
));
1136 uint16_t leadByteIndexLength
= *leadByteTable
;
1137 if (leadByte
>= leadByteIndexLength
) {
1140 uint16_t leadByteIndex
= *(leadByteTable
+ (2 + leadByte
));
1142 if ((leadByteIndex
& 0x8000) == 0x8000) {
1143 // offset isn't offset but instead is a single data element
1144 if (returnCapacity
>= 1) {
1145 returnReorderCodes
[0] = leadByteIndex
& ~0x8000;
1150 //uint16_t* dataOffsetBase = leadByteTable + (2 + leadByteIndexLength);
1151 uint16_t* reorderCodeData
= leadByteTable
+ (2 + leadByteIndexLength
) + leadByteIndex
;
1152 uint16_t reorderCodeCount
= *reorderCodeData
> returnCapacity
? returnCapacity
: *reorderCodeData
;
1153 uprv_memcpy(returnReorderCodes
, reorderCodeData
+ 1, reorderCodeCount
* sizeof(uint16_t));
1154 return reorderCodeCount
;
1157 // used to mark ignorable reorder code slots
1158 static const int32_t UCOL_REORDER_CODE_IGNORE
= UCOL_REORDER_CODE_LIMIT
+ 1;
1160 U_CFUNC
void U_EXPORT2
1161 ucol_buildPermutationTable(UCollator
*coll
, UErrorCode
*status
) {
1162 uint16_t leadBytesSize
= 256;
1163 uint16_t leadBytes
[256];
1164 int32_t internalReorderCodesLength
= coll
->reorderCodesLength
+ (UCOL_REORDER_CODE_LIMIT
- UCOL_REORDER_CODE_FIRST
);
1165 int32_t* internalReorderCodes
;
1167 // The lowest byte that hasn't been assigned a mapping
1168 int toBottom
= 0x03;
1169 // The highest byte that hasn't been assigned a mapping - don't include the special or trailing
1172 // are we filling from the bottom?
1173 bool fromTheBottom
= true;
1174 int32_t reorderCodesIndex
= -1;
1176 // lead bytes that have alread been assigned to the permutation table
1177 bool newLeadByteUsed
[256];
1178 // permutation table slots that have already been filled
1179 bool permutationSlotFilled
[256];
1182 if(U_FAILURE(*status
) || coll
== NULL
) {
1186 // clear the reordering
1187 if (coll
->reorderCodes
== NULL
|| coll
->reorderCodesLength
== 0
1188 || (coll
->reorderCodesLength
== 1 && coll
->reorderCodes
[0] == UCOL_REORDER_CODE_NONE
)) {
1189 if (coll
->leadBytePermutationTable
!= NULL
) {
1190 if (coll
->freeLeadBytePermutationTableOnClose
) {
1191 uprv_free(coll
->leadBytePermutationTable
);
1193 coll
->leadBytePermutationTable
= NULL
;
1194 coll
->reorderCodesLength
= 0;
1199 // set reordering to the default reordering
1200 if (coll
->reorderCodes
[0] == UCOL_REORDER_CODE_DEFAULT
) {
1201 if (coll
->reorderCodesLength
!= 1) {
1202 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1205 if (coll
->freeReorderCodesOnClose
== TRUE
) {
1206 uprv_free(coll
->reorderCodes
);
1208 coll
->reorderCodes
= NULL
;
1210 if (coll
->leadBytePermutationTable
!= NULL
&& coll
->freeLeadBytePermutationTableOnClose
== TRUE
) {
1211 uprv_free(coll
->leadBytePermutationTable
);
1213 coll
->leadBytePermutationTable
= NULL
;
1215 if (coll
->defaultReorderCodesLength
== 0) {
1219 coll
->reorderCodes
= (int32_t*)uprv_malloc(coll
->defaultReorderCodesLength
* sizeof(int32_t));
1220 coll
->freeReorderCodesOnClose
= TRUE
;
1221 if (coll
->reorderCodes
== NULL
) {
1222 *status
= U_MEMORY_ALLOCATION_ERROR
;
1225 coll
->reorderCodesLength
= coll
->defaultReorderCodesLength
;
1226 uprv_memcpy(coll
->defaultReorderCodes
, coll
->reorderCodes
, coll
->reorderCodesLength
* sizeof(int32_t));
1229 if (coll
->leadBytePermutationTable
== NULL
) {
1230 coll
->leadBytePermutationTable
= (uint8_t*)uprv_malloc(256*sizeof(uint8_t));
1231 coll
->freeLeadBytePermutationTableOnClose
= TRUE
;
1232 if (coll
->leadBytePermutationTable
== NULL
) {
1233 *status
= U_MEMORY_ALLOCATION_ERROR
;
1238 // prefill the reordering codes with the leading entries
1239 internalReorderCodes
= (int32_t*)uprv_malloc(internalReorderCodesLength
* sizeof(int32_t));
1240 if (internalReorderCodes
== NULL
) {
1241 *status
= U_MEMORY_ALLOCATION_ERROR
;
1242 if (coll
->leadBytePermutationTable
!= NULL
&& coll
->freeLeadBytePermutationTableOnClose
== TRUE
) {
1243 uprv_free(coll
->leadBytePermutationTable
);
1245 coll
->leadBytePermutationTable
= NULL
;
1249 for (uint32_t codeIndex
= 0; codeIndex
< (UCOL_REORDER_CODE_LIMIT
- UCOL_REORDER_CODE_FIRST
); codeIndex
++) {
1250 internalReorderCodes
[codeIndex
] = UCOL_REORDER_CODE_FIRST
+ codeIndex
;
1252 for (int32_t codeIndex
= 0; codeIndex
< coll
->reorderCodesLength
; codeIndex
++) {
1253 uint32_t reorderCodesCode
= coll
->reorderCodes
[codeIndex
];
1254 internalReorderCodes
[codeIndex
+ (UCOL_REORDER_CODE_LIMIT
- UCOL_REORDER_CODE_FIRST
)] = reorderCodesCode
;
1255 if (reorderCodesCode
>= UCOL_REORDER_CODE_FIRST
&& reorderCodesCode
< UCOL_REORDER_CODE_LIMIT
) {
1256 internalReorderCodes
[reorderCodesCode
- UCOL_REORDER_CODE_FIRST
] = UCOL_REORDER_CODE_IGNORE
;
1260 for (int i
= 0; i
< 256; i
++) {
1261 if (i
< toBottom
|| i
> toTop
) {
1262 permutationSlotFilled
[i
] = true;
1263 newLeadByteUsed
[i
] = true;
1264 coll
->leadBytePermutationTable
[i
] = i
;
1266 permutationSlotFilled
[i
] = false;
1267 newLeadByteUsed
[i
] = false;
1268 coll
->leadBytePermutationTable
[i
] = 0;
1272 /* Start from the front of the list and place each script we encounter at the
1273 * earliest possible locatation in the permutation table. If we encounter
1274 * UNKNOWN, start processing from the back, and place each script in the last
1275 * possible location. At each step, we also need to make sure that any scripts
1276 * that need to not be moved are copied to their same location in the final table.
1278 for (int reorderCodesCount
= 0; reorderCodesCount
< internalReorderCodesLength
; reorderCodesCount
++) {
1279 reorderCodesIndex
+= fromTheBottom
? 1 : -1;
1280 int32_t next
= internalReorderCodes
[reorderCodesIndex
];
1281 if (next
== UCOL_REORDER_CODE_IGNORE
) {
1284 if (next
== USCRIPT_UNKNOWN
) {
1285 if (fromTheBottom
== false) {
1286 // double turnaround
1287 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1288 if (coll
->leadBytePermutationTable
!= NULL
&& coll
->freeLeadBytePermutationTableOnClose
== TRUE
) {
1289 uprv_free(coll
->leadBytePermutationTable
);
1291 coll
->leadBytePermutationTable
= NULL
;
1292 coll
->reorderCodesLength
= 0;
1293 if (internalReorderCodes
!= NULL
) {
1294 uprv_free(internalReorderCodes
);
1298 fromTheBottom
= false;
1299 reorderCodesIndex
= internalReorderCodesLength
;
1303 uint16_t leadByteCount
= ucol_getLeadBytesForReorderCode(coll
->UCA
, next
, leadBytes
, leadBytesSize
);
1304 if (fromTheBottom
) {
1305 for (int leadByteIndex
= 0; leadByteIndex
< leadByteCount
; leadByteIndex
++) {
1306 // don't place a lead byte twice in the permutation table
1307 if (permutationSlotFilled
[leadBytes
[leadByteIndex
]]) {
1308 // lead byte already used
1309 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1310 if (coll
->leadBytePermutationTable
!= NULL
&& coll
->freeLeadBytePermutationTableOnClose
== TRUE
) {
1311 uprv_free(coll
->leadBytePermutationTable
);
1313 coll
->leadBytePermutationTable
= NULL
;
1314 coll
->reorderCodesLength
= 0;
1315 if (internalReorderCodes
!= NULL
) {
1316 uprv_free(internalReorderCodes
);
1321 coll
->leadBytePermutationTable
[leadBytes
[leadByteIndex
]] = toBottom
;
1322 newLeadByteUsed
[toBottom
] = true;
1323 permutationSlotFilled
[leadBytes
[leadByteIndex
]] = true;
1327 for (int leadByteIndex
= leadByteCount
- 1; leadByteIndex
>= 0; leadByteIndex
--) {
1328 // don't place a lead byte twice in the permutation table
1329 if (permutationSlotFilled
[leadBytes
[leadByteIndex
]]) {
1330 // lead byte already used
1331 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1332 if (coll
->leadBytePermutationTable
!= NULL
&& coll
->freeLeadBytePermutationTableOnClose
== TRUE
) {
1333 uprv_free(coll
->leadBytePermutationTable
);
1335 coll
->leadBytePermutationTable
= NULL
;
1336 coll
->reorderCodesLength
= 0;
1337 if (internalReorderCodes
!= NULL
) {
1338 uprv_free(internalReorderCodes
);
1343 coll
->leadBytePermutationTable
[leadBytes
[leadByteIndex
]] = toTop
;
1344 newLeadByteUsed
[toTop
] = true;
1345 permutationSlotFilled
[leadBytes
[leadByteIndex
]] = true;
1351 #ifdef REORDER_DEBUG
1352 fprintf(stdout
, "\n@@@@ Partial Script Reordering Table\n");
1353 for (int i
= 0; i
< 256; i
++) {
1354 fprintf(stdout
, "\t%02x = %02x\n", i
, coll
->leadBytePermutationTable
[i
]);
1356 fprintf(stdout
, "\n@@@@ Lead Byte Used Table\n");
1357 for (int i
= 0; i
< 256; i
++) {
1358 fprintf(stdout
, "\t%02x = %02x\n", i
, newLeadByteUsed
[i
]);
1360 fprintf(stdout
, "\n@@@@ Permutation Slot Filled Table\n");
1361 for (int i
= 0; i
< 256; i
++) {
1362 fprintf(stdout
, "\t%02x = %02x\n", i
, permutationSlotFilled
[i
]);
1366 /* Copy everything that's left over */
1367 int reorderCode
= 0;
1368 for (int i
= 0; i
< 256; i
++) {
1369 if (!permutationSlotFilled
[i
]) {
1370 while (reorderCode
< 256 && newLeadByteUsed
[reorderCode
]) {
1373 coll
->leadBytePermutationTable
[i
] = reorderCode
;
1374 permutationSlotFilled
[i
] = true;
1375 newLeadByteUsed
[reorderCode
] = true;
1379 #ifdef REORDER_DEBUG
1380 fprintf(stdout
, "\n@@@@ Script Reordering Table\n");
1381 for (int i
= 0; i
< 256; i
++) {
1382 fprintf(stdout
, "\t%02x = %02x\n", i
, coll
->leadBytePermutationTable
[i
]);
1386 if (internalReorderCodes
!= NULL
) {
1387 uprv_free(internalReorderCodes
);
1390 // force a regen of the latin one table since it is affected by the script reordering
1391 coll
->latinOneRegenTable
= TRUE
;
1392 ucol_updateInternalState(coll
, status
);
1395 #endif /* #if !UCONFIG_NO_COLLATION */