]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/ucol_res.cpp
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_res.cpp
CommitLineData
73c04bcf
A
1/*
2*******************************************************************************
729e4ab9 3* Copyright (C) 1996-2011, International Business Machines
73c04bcf
A
4* Corporation and others. All Rights Reserved.
5*******************************************************************************
6* file name: ucol_res.cpp
7* encoding: US-ASCII
8* tab size: 8 (not used)
9* indentation:4
10*
11* Description:
12* This file contains dependencies that the collation run-time doesn't normally
13* need. This mainly contains resource bundle usage and collation meta information
14*
15* Modification history
16* Date Name Comments
17* 1996-1999 various members of ICU team maintained C API for collation framework
18* 02/16/2001 synwee Added internal method getPrevSpecialCE
19* 03/01/2001 synwee Added maxexpansion functionality.
20* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
21* 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp
22*/
23
24#include "unicode/utypes.h"
25
26#if !UCONFIG_NO_COLLATION
27#include "unicode/uloc.h"
28#include "unicode/coll.h"
29#include "unicode/tblcoll.h"
30#include "unicode/caniter.h"
729e4ab9 31#include "unicode/uscript.h"
73c04bcf
A
32#include "unicode/ustring.h"
33
34#include "ucol_bld.h"
35#include "ucol_imp.h"
36#include "ucol_tok.h"
37#include "ucol_elm.h"
38#include "uresimp.h"
39#include "ustr_imp.h"
40#include "cstring.h"
41#include "umutex.h"
46f4442e 42#include "ucln_in.h"
73c04bcf
A
43#include "ustrenum.h"
44#include "putilimp.h"
45#include "utracimp.h"
46#include "cmemory.h"
729e4ab9
A
47#include "uenumimp.h"
48#include "ulist.h"
73c04bcf 49
46f4442e
A
50U_NAMESPACE_USE
51
729e4ab9
A
52static void ucol_setReorderCodesFromParser(UCollator *coll, UColTokenParser *parser, UErrorCode *status);
53
46f4442e
A
54// static UCA. There is only one. Collators don't use it.
55// It is referenced only in ucol_initUCA and ucol_cleanup
56static UCollator* _staticUCA = NULL;
57// static pointer to udata memory. Inited in ucol_initUCA
58// used for cleanup in ucol_cleanup
59static UDataMemory* UCA_DATA_MEM = NULL;
60
73c04bcf 61U_CDECL_BEGIN
46f4442e
A
62static UBool U_CALLCONV
63ucol_res_cleanup(void)
64{
65 if (UCA_DATA_MEM) {
66 udata_close(UCA_DATA_MEM);
67 UCA_DATA_MEM = NULL;
68 }
69 if (_staticUCA) {
70 ucol_close(_staticUCA);
71 _staticUCA = NULL;
72 }
73 return TRUE;
74}
75
76static UBool U_CALLCONV
77isAcceptableUCA(void * /*context*/,
78 const char * /*type*/, const char * /*name*/,
79 const UDataInfo *pInfo){
80 /* context, type & name are intentionally not used */
81 if( pInfo->size>=20 &&
82 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
83 pInfo->charsetFamily==U_CHARSET_FAMILY &&
84 pInfo->dataFormat[0]==UCA_DATA_FORMAT_0 && /* dataFormat="UCol" */
85 pInfo->dataFormat[1]==UCA_DATA_FORMAT_1 &&
86 pInfo->dataFormat[2]==UCA_DATA_FORMAT_2 &&
87 pInfo->dataFormat[3]==UCA_DATA_FORMAT_3 &&
88 pInfo->formatVersion[0]==UCA_FORMAT_VERSION_0 &&
89 pInfo->formatVersion[1]>=UCA_FORMAT_VERSION_1// &&
90 //pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 &&
91 //pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh
92 //pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh
93 ) {
94 UVersionInfo UCDVersion;
95 u_getUnicodeVersion(UCDVersion);
96 return (UBool)(pInfo->dataVersion[0]==UCDVersion[0]
97 && pInfo->dataVersion[1]==UCDVersion[1]);
98 //&& pInfo->dataVersion[2]==ucaDataInfo.dataVersion[2]
99 //&& pInfo->dataVersion[3]==ucaDataInfo.dataVersion[3]);
100 } else {
101 return FALSE;
102 }
73c04bcf
A
103}
104U_CDECL_END
105
46f4442e
A
106/* do not close UCA returned by ucol_initUCA! */
107UCollator *
108ucol_initUCA(UErrorCode *status) {
109 if(U_FAILURE(*status)) {
110 return NULL;
111 }
112 UBool needsInit;
113 UMTX_CHECK(NULL, (_staticUCA == NULL), needsInit);
114
115 if(needsInit) {
729e4ab9 116 UDataMemory *result = udata_openChoice(U_ICUDATA_COLL, UCA_DATA_TYPE, UCA_DATA_NAME, isAcceptableUCA, NULL, status);
46f4442e
A
117
118 if(U_SUCCESS(*status)){
119 UCollator *newUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), NULL, NULL, status);
120 if(U_SUCCESS(*status)){
729e4ab9
A
121 // Initalize variables for implicit generation
122 uprv_uca_initImplicitConstants(status);
123
46f4442e
A
124 umtx_lock(NULL);
125 if(_staticUCA == NULL) {
729e4ab9 126 UCA_DATA_MEM = result;
46f4442e
A
127 _staticUCA = newUCA;
128 newUCA = NULL;
46f4442e
A
129 result = NULL;
130 }
131 umtx_unlock(NULL);
132
133 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup);
134 if(newUCA != NULL) {
135 ucol_close(newUCA);
136 udata_close(result);
137 }
46f4442e
A
138 }else{
139 ucol_close(newUCA);
140 udata_close(result);
141 }
142 }
143 else {
144 udata_close(result);
145 }
146 }
147 return _staticUCA;
148}
149
150U_CAPI void U_EXPORT2
151ucol_forgetUCA(void)
152{
153 _staticUCA = NULL;
154 UCA_DATA_MEM = NULL;
155}
156
73c04bcf
A
157/****************************************************************************/
158/* Following are the open/close functions */
159/* */
160/****************************************************************************/
161static UCollator*
162tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
46f4442e
A
163 int32_t rulesLen = 0;
164 const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status);
165 return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
73c04bcf
A
166}
167
168
169// API in ucol_imp.h
170
171U_CFUNC UCollator*
172ucol_open_internal(const char *loc,
173 UErrorCode *status)
174{
46f4442e 175 UErrorCode intStatus = U_ZERO_ERROR;
73c04bcf
A
176 const UCollator* UCA = ucol_initUCA(status);
177
178 /* New version */
179 if(U_FAILURE(*status)) return 0;
180
181
182
183 UCollator *result = NULL;
184 UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
185
186 /* we try to find stuff from keyword */
187 UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
188 UResourceBundle *collElem = NULL;
189 char keyBuffer[256];
190 // if there is a keyword, we pick it up and try to get elements
729e4ab9
A
191 if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status) ||
192 !uprv_strcmp(keyBuffer,"default")) { /* Treat 'zz@collation=default' as 'zz'. */
73c04bcf 193 // no keyword. we try to find the default setting, which will give us the keyword value
46f4442e 194 intStatus = U_ZERO_ERROR;
73c04bcf
A
195 // finding default value does not affect collation fallback status
196 UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus);
197 if(U_SUCCESS(intStatus)) {
198 int32_t defaultKeyLen = 0;
199 const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus);
200 u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
201 keyBuffer[defaultKeyLen] = 0;
202 } else {
203 *status = U_INTERNAL_PROGRAM_ERROR;
204 return NULL;
205 }
206 ures_close(defaultColl);
207 }
46f4442e
A
208 collElem = ures_getByKeyWithFallback(collations, keyBuffer, collations, status);
209 collations = NULL; // We just reused the collations object as collElem.
73c04bcf
A
210
211 UResourceBundle *binary = NULL;
729e4ab9
A
212 UResourceBundle *reorderRes = NULL;
213
73c04bcf
A
214 if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
215 *status = U_USING_DEFAULT_WARNING;
216 result = ucol_initCollator(UCA->image, result, UCA, status);
46f4442e
A
217 if (U_FAILURE(*status)) {
218 goto clean;
219 }
73c04bcf 220 // if we use UCA, real locale is root
46f4442e
A
221 ures_close(b);
222 b = ures_open(U_ICUDATA_COLL, "", status);
223 ures_close(collElem);
224 collElem = ures_open(U_ICUDATA_COLL, "", status);
73c04bcf
A
225 if(U_FAILURE(*status)) {
226 goto clean;
227 }
73c04bcf
A
228 result->hasRealData = FALSE;
229 } else if(U_SUCCESS(*status)) {
46f4442e 230 intStatus = U_ZERO_ERROR;
73c04bcf 231
46f4442e 232 binary = ures_getByKey(collElem, "%%CollationBin", NULL, &intStatus);
73c04bcf 233
46f4442e 234 if(intStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
73c04bcf
A
235 binary = NULL;
236 result = tryOpeningFromRules(collElem, status);
237 if(U_FAILURE(*status)) {
238 goto clean;
239 }
729e4ab9 240 } else if(U_SUCCESS(intStatus)) { /* otherwise, we'll pick a collation data that exists */
46f4442e 241 int32_t len = 0;
73c04bcf 242 const uint8_t *inData = ures_getBinary(binary, &len, status);
729e4ab9
A
243 if(U_FAILURE(*status)) {
244 goto clean;
245 }
73c04bcf
A
246 UCATableHeader *colData = (UCATableHeader *)inData;
247 if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
248 uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
249 colData->version[0] != UCOL_BUILDER_VERSION)
250 {
251 *status = U_DIFFERENT_UCA_VERSION;
252 result = tryOpeningFromRules(collElem, status);
253 } else {
254 if(U_FAILURE(*status)){
255 goto clean;
256 }
257 if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
258 result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
259 if(U_FAILURE(*status)){
260 goto clean;
261 }
262 result->hasRealData = TRUE;
263 } else {
264 result = ucol_initCollator(UCA->image, result, UCA, status);
265 ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
266 if(U_FAILURE(*status)){
267 goto clean;
268 }
269 result->hasRealData = FALSE;
270 }
271 result->freeImageOnClose = FALSE;
729e4ab9
A
272
273 reorderRes = ures_getByKey(collElem, "%%ReorderCodes", NULL, &intStatus);
274 if (U_SUCCESS(intStatus)) {
275 int32_t reorderCodesLen = 0;
276 const int32_t* reorderCodes = ures_getIntVector(reorderRes, &reorderCodesLen, status);
277 ucol_setReorderCodes(result, reorderCodes, reorderCodesLen, status);
278 if (U_FAILURE(*status)) {
279 goto clean;
280 }
281 }
73c04bcf 282 }
729e4ab9
A
283
284 } else { // !U_SUCCESS(binaryStatus)
285 if(U_SUCCESS(*status)) {
286 *status = intStatus; // propagate underlying error
287 }
288 goto clean;
73c04bcf 289 }
46f4442e
A
290 intStatus = U_ZERO_ERROR;
291 result->rules = ures_getStringByKey(collElem, "Sequence", &result->rulesLength, &intStatus);
73c04bcf
A
292 result->freeRulesOnClose = FALSE;
293 } else { /* There is another error, and we're just gonna clean up */
294 goto clean;
295 }
296
46f4442e
A
297 intStatus = U_ZERO_ERROR;
298 result->ucaRules = ures_getStringByKey(b,"UCARules",NULL,&intStatus);
73c04bcf
A
299
300 if(loc == NULL) {
729e4ab9 301 loc = ures_getLocaleByType(b, ULOC_ACTUAL_LOCALE, status);
73c04bcf 302 }
46f4442e 303 result->requestedLocale = uprv_strdup(loc);
73c04bcf
A
304 /* test for NULL */
305 if (result->requestedLocale == NULL) {
306 *status = U_MEMORY_ALLOCATION_ERROR;
307 goto clean;
308 }
729e4ab9 309 loc = ures_getLocaleByType(collElem, ULOC_ACTUAL_LOCALE, status);
46f4442e
A
310 result->actualLocale = uprv_strdup(loc);
311 /* test for NULL */
312 if (result->actualLocale == NULL) {
313 *status = U_MEMORY_ALLOCATION_ERROR;
314 goto clean;
315 }
729e4ab9 316 loc = ures_getLocaleByType(b, ULOC_ACTUAL_LOCALE, status);
46f4442e
A
317 result->validLocale = uprv_strdup(loc);
318 /* test for NULL */
319 if (result->validLocale == NULL) {
320 *status = U_MEMORY_ALLOCATION_ERROR;
321 goto clean;
322 }
73c04bcf 323
46f4442e
A
324 ures_close(b);
325 ures_close(collElem);
73c04bcf 326 ures_close(binary);
729e4ab9 327 ures_close(reorderRes);
73c04bcf
A
328 return result;
329
330clean:
331 ures_close(b);
332 ures_close(collElem);
73c04bcf 333 ures_close(binary);
729e4ab9 334 ures_close(reorderRes);
46f4442e 335 ucol_close(result);
73c04bcf
A
336 return NULL;
337}
338
339U_CAPI UCollator*
340ucol_open(const char *loc,
341 UErrorCode *status)
342{
46f4442e
A
343 U_NAMESPACE_USE
344
345 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
346 UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
347 UCollator *result = NULL;
73c04bcf 348
73c04bcf 349#if !UCONFIG_NO_SERVICE
46f4442e
A
350 result = Collator::createUCollator(loc, status);
351 if (result == NULL)
73c04bcf 352#endif
46f4442e
A
353 {
354 result = ucol_open_internal(loc, status);
355 }
356 UTRACE_EXIT_PTR_STATUS(result, *status);
357 return result;
73c04bcf
A
358}
359
729e4ab9
A
360
361UCollator*
362ucol_openRulesForImport( const UChar *rules,
363 int32_t rulesLength,
364 UColAttributeValue normalizationMode,
365 UCollationStrength strength,
366 UParseError *parseError,
367 GetCollationRulesFunction importFunc,
368 void* context,
369 UErrorCode *status)
73c04bcf 370{
46f4442e
A
371 UColTokenParser src;
372 UColAttributeValue norm;
373 UParseError tErr;
73c04bcf 374
46f4442e
A
375 if(status == NULL || U_FAILURE(*status)){
376 return 0;
377 }
73c04bcf 378
46f4442e
A
379 if(rules == NULL || rulesLength < -1) {
380 *status = U_ILLEGAL_ARGUMENT_ERROR;
381 return 0;
382 }
383
384 if(rulesLength == -1) {
385 rulesLength = u_strlen(rules);
386 }
387
388 if(parseError == NULL){
389 parseError = &tErr;
390 }
391
392 switch(normalizationMode) {
393 case UCOL_OFF:
394 case UCOL_ON:
395 case UCOL_DEFAULT:
396 norm = normalizationMode;
397 break;
398 default:
399 *status = U_ILLEGAL_ARGUMENT_ERROR;
400 return 0;
401 }
402
403 UCollator *result = NULL;
404 UCATableHeader *table = NULL;
405 UCollator *UCA = ucol_initUCA(status);
406
407 if(U_FAILURE(*status)){
408 return NULL;
409 }
410
729e4ab9 411 ucol_tok_initTokenList(&src, rules, rulesLength, UCA, importFunc, context, status);
46f4442e
A
412 ucol_tok_assembleTokenList(&src,parseError, status);
413
414 if(U_FAILURE(*status)) {
415 /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */
416 /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */
417 /* so something might be done here... or on lower level */
418#ifdef UCOL_DEBUG
419 if(*status == U_ILLEGAL_ARGUMENT_ERROR) {
729e4ab9 420 fprintf(stderr, "bad option starting at offset %i\n", (int)(src.current-src.source));
46f4442e 421 } else {
729e4ab9 422 fprintf(stderr, "invalid rule just before offset %i\n", (int)(src.current-src.source));
46f4442e
A
423 }
424#endif
73c04bcf
A
425 goto cleanup;
426 }
46f4442e
A
427
428 if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */
429 /* also, if we wanted to remove some contractions, we should make a tailoring */
430 table = ucol_assembleTailoringTable(&src, status);
431 if(U_SUCCESS(*status)) {
432 // builder version
433 table->version[0] = UCOL_BUILDER_VERSION;
434 // no tailoring information on this level
435 table->version[1] = table->version[2] = table->version[3] = 0;
436 // set UCD version
437 u_getUnicodeVersion(table->UCDVersion);
438 // set UCA version
439 uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo));
440 result = ucol_initCollator(table, 0, UCA, status);
441 if (U_FAILURE(*status)) {
442 goto cleanup;
443 }
444 result->hasRealData = TRUE;
445 result->freeImageOnClose = TRUE;
446 }
447 } else { /* no rules, but no error either */
448 // must be only options
449 // We will init the collator from UCA
450 result = ucol_initCollator(UCA->image, 0, UCA, status);
451 // Check for null result
452 if (U_FAILURE(*status)) {
453 goto cleanup;
454 }
455 // And set only the options
456 UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
457 /* test for NULL */
458 if (opts == NULL) {
459 *status = U_MEMORY_ALLOCATION_ERROR;
460 goto cleanup;
461 }
462 uprv_memcpy(opts, src.opts, sizeof(UColOptionSet));
463 ucol_setOptionsFromHeader(result, opts, status);
729e4ab9 464 ucol_setReorderCodesFromParser(result, &src, status);
46f4442e
A
465 result->freeOptionsOnClose = TRUE;
466 result->hasRealData = FALSE;
467 result->freeImageOnClose = FALSE;
468 }
469
470 if(U_SUCCESS(*status)) {
471 UChar *newRules;
472 result->dataVersion[0] = UCOL_BUILDER_VERSION;
473 if(rulesLength > 0) {
474 newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR);
475 /* test for NULL */
476 if (newRules == NULL) {
477 *status = U_MEMORY_ALLOCATION_ERROR;
478 goto cleanup;
479 }
480 uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR);
481 newRules[rulesLength]=0;
482 result->rules = newRules;
483 result->rulesLength = rulesLength;
484 result->freeRulesOnClose = TRUE;
485 }
486 result->ucaRules = NULL;
487 result->actualLocale = NULL;
488 result->validLocale = NULL;
489 result->requestedLocale = NULL;
729e4ab9 490 ucol_buildPermutationTable(result, status);
46f4442e
A
491 ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
492 ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
73c04bcf 493 } else {
46f4442e
A
494cleanup:
495 if(result != NULL) {
496 ucol_close(result);
497 } else {
498 if(table != NULL) {
499 uprv_free(table);
500 }
501 }
502 result = NULL;
73c04bcf 503 }
73c04bcf 504
46f4442e 505 ucol_tok_closeTokenList(&src);
73c04bcf 506
46f4442e 507 return result;
73c04bcf
A
508}
509
729e4ab9
A
510U_CAPI UCollator* U_EXPORT2
511ucol_openRules( const UChar *rules,
512 int32_t rulesLength,
513 UColAttributeValue normalizationMode,
514 UCollationStrength strength,
515 UParseError *parseError,
516 UErrorCode *status)
517{
518 return ucol_openRulesForImport(rules,
519 rulesLength,
520 normalizationMode,
521 strength,
522 parseError,
523 ucol_tok_getRulesFromBundle,
524 NULL,
525 status);
526}
527
73c04bcf
A
528U_CAPI int32_t U_EXPORT2
529ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
46f4442e
A
530 UErrorCode status = U_ZERO_ERROR;
531 int32_t len = 0;
532 int32_t UCAlen = 0;
533 const UChar* ucaRules = 0;
534 const UChar *rules = ucol_getRules(coll, &len);
535 if(delta == UCOL_FULL_RULES) {
536 /* take the UCA rules and append real rules at the end */
537 /* UCA rules will be probably coming from the root RB */
538 ucaRules = coll->ucaRules;
539 if (ucaRules) {
540 UCAlen = u_strlen(ucaRules);
541 }
542 /*
543 ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
544 UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
545 UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
546 ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
547 ures_close(uca);
548 ures_close(cresb);
549 */
550 }
551 if(U_FAILURE(status)) {
552 return 0;
553 }
554 if(buffer!=0 && bufferLen>0){
555 *buffer=0;
556 if(UCAlen > 0) {
557 u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen));
558 }
559 if(len > 0 && bufferLen > UCAlen) {
560 u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen));
561 }
562 }
563 return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status);
73c04bcf
A
564}
565
566static const UChar _NUL = 0;
567
568U_CAPI const UChar* U_EXPORT2
569ucol_getRules( const UCollator *coll,
46f4442e 570 int32_t *length)
73c04bcf
A
571{
572 if(coll->rules != NULL) {
573 *length = coll->rulesLength;
574 return coll->rules;
575 }
576 else {
577 *length = 0;
578 return &_NUL;
579 }
580}
581
582U_CAPI UBool U_EXPORT2
583ucol_equals(const UCollator *source, const UCollator *target) {
46f4442e
A
584 UErrorCode status = U_ZERO_ERROR;
585 // if pointers are equal, collators are equal
586 if(source == target) {
587 return TRUE;
588 }
589 int32_t i = 0, j = 0;
590 // if any of attributes are different, collators are not equal
591 for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
592 if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) {
593 return FALSE;
594 }
595 }
729e4ab9
A
596 if (source->reorderCodesLength != target->reorderCodesLength){
597 return FALSE;
598 }
599 for (i = 0; i < source->reorderCodesLength; i++) {
600 if(source->reorderCodes[i] != target->reorderCodes[i]) {
601 return FALSE;
602 }
603 }
46f4442e
A
604
605 int32_t sourceRulesLen = 0, targetRulesLen = 0;
606 const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen);
607 const UChar *targetRules = ucol_getRules(target, &targetRulesLen);
608
609 if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) {
610 // all the attributes are equal and the rules are equal - collators are equal
611 return(TRUE);
612 }
613 // hard part, need to construct tree from rules and see if they yield the same tailoring
614 UBool result = TRUE;
615 UParseError parseError;
616 UColTokenParser sourceParser, targetParser;
617 int32_t sourceListLen = 0, targetListLen = 0;
729e4ab9
A
618 ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, ucol_tok_getRulesFromBundle, NULL, &status);
619 ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, ucol_tok_getRulesFromBundle, NULL, &status);
46f4442e
A
620 sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status);
621 targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status);
622
623 if(sourceListLen != targetListLen) {
624 // different number of resets
625 result = FALSE;
626 } else {
627 UColToken *sourceReset = NULL, *targetReset = NULL;
628 UChar *sourceResetString = NULL, *targetResetString = NULL;
629 int32_t sourceStringLen = 0, targetStringLen = 0;
630 for(i = 0; i < sourceListLen; i++) {
631 sourceReset = sourceParser.lh[i].reset;
73c04bcf
A
632 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
633 sourceStringLen = sourceReset->source >> 24;
46f4442e
A
634 for(j = 0; j < sourceListLen; j++) {
635 targetReset = targetParser.lh[j].reset;
636 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
637 targetStringLen = targetReset->source >> 24;
638 if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) {
639 sourceReset = sourceParser.lh[i].first;
640 targetReset = targetParser.lh[j].first;
641 while(sourceReset != NULL && targetReset != NULL) {
642 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
643 sourceStringLen = sourceReset->source >> 24;
644 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
645 targetStringLen = targetReset->source >> 24;
646 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
647 result = FALSE;
648 goto returnResult;
649 }
650 // probably also need to check the expansions
651 if(sourceReset->expansion) {
652 if(!targetReset->expansion) {
653 result = FALSE;
654 goto returnResult;
655 } else {
656 // compare expansions
657 sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF);
658 sourceStringLen = sourceReset->expansion >> 24;
659 targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF);
660 targetStringLen = targetReset->expansion >> 24;
661 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
662 result = FALSE;
663 goto returnResult;
664 }
665 }
666 } else {
667 if(targetReset->expansion) {
668 result = FALSE;
669 goto returnResult;
670 }
671 }
672 sourceReset = sourceReset->next;
673 targetReset = targetReset->next;
674 }
675 if(sourceReset != targetReset) { // at least one is not NULL
676 // there are more tailored elements in one list
677 result = FALSE;
678 goto returnResult;
679 }
680
681
682 break;
73c04bcf 683 }
46f4442e
A
684 }
685 // couldn't find the reset anchor, so the collators are not equal
686 if(j == sourceListLen) {
73c04bcf
A
687 result = FALSE;
688 goto returnResult;
73c04bcf 689 }
73c04bcf 690 }
73c04bcf 691 }
73c04bcf
A
692
693returnResult:
46f4442e
A
694 ucol_tok_closeTokenList(&sourceParser);
695 ucol_tok_closeTokenList(&targetParser);
696 return result;
73c04bcf
A
697
698}
699
700U_CAPI int32_t U_EXPORT2
701ucol_getDisplayName( const char *objLoc,
46f4442e
A
702 const char *dispLoc,
703 UChar *result,
704 int32_t resultLength,
705 UErrorCode *status)
73c04bcf 706{
46f4442e
A
707 U_NAMESPACE_USE
708
709 if(U_FAILURE(*status)) return -1;
710 UnicodeString dst;
711 if(!(result==NULL && resultLength==0)) {
712 // NULL destination for pure preflighting: empty dummy string
713 // otherwise, alias the destination buffer
714 dst.setTo(result, 0, resultLength);
715 }
716 Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
717 return dst.extract(result, resultLength, *status);
73c04bcf
A
718}
719
720U_CAPI const char* U_EXPORT2
721ucol_getAvailable(int32_t index)
722{
46f4442e
A
723 int32_t count = 0;
724 const Locale *loc = Collator::getAvailableLocales(count);
725 if (loc != NULL && index < count) {
726 return loc[index].getName();
727 }
728 return NULL;
73c04bcf
A
729}
730
731U_CAPI int32_t U_EXPORT2
732ucol_countAvailable()
733{
46f4442e
A
734 int32_t count = 0;
735 Collator::getAvailableLocales(count);
736 return count;
73c04bcf
A
737}
738
739#if !UCONFIG_NO_SERVICE
740U_CAPI UEnumeration* U_EXPORT2
741ucol_openAvailableLocales(UErrorCode *status) {
46f4442e
A
742 U_NAMESPACE_USE
743
73c04bcf
A
744 // This is a wrapper over Collator::getAvailableLocales()
745 if (U_FAILURE(*status)) {
746 return NULL;
747 }
748 StringEnumeration *s = Collator::getAvailableLocales();
749 if (s == NULL) {
750 *status = U_MEMORY_ALLOCATION_ERROR;
751 return NULL;
752 }
729e4ab9 753 return uenum_openFromStringEnumeration(s, status);
73c04bcf
A
754}
755#endif
756
757// Note: KEYWORDS[0] != RESOURCE_NAME - alan
758
46f4442e 759static const char RESOURCE_NAME[] = "collations";
73c04bcf 760
46f4442e 761static const char* const KEYWORDS[] = { "collation" };
73c04bcf
A
762
763#define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0]))
764
765U_CAPI UEnumeration* U_EXPORT2
766ucol_getKeywords(UErrorCode *status) {
767 UEnumeration *result = NULL;
768 if (U_SUCCESS(*status)) {
769 return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
770 }
771 return result;
772}
773
774U_CAPI UEnumeration* U_EXPORT2
775ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
46f4442e
A
776 if (U_FAILURE(*status)) {
777 return NULL;
778 }
73c04bcf
A
779 // hard-coded to accept exactly one collation keyword
780 // modify if additional collation keyword is added later
46f4442e
A
781 if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0)
782 {
73c04bcf
A
783 *status = U_ILLEGAL_ARGUMENT_ERROR;
784 return NULL;
785 }
786 return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
787}
788
729e4ab9
A
789static const UEnumeration defaultKeywordValues = {
790 NULL,
791 NULL,
792 ulist_close_keyword_values_iterator,
793 ulist_count_keyword_values,
794 uenum_unextDefault,
795 ulist_next_keyword_value,
796 ulist_reset_keyword_values_iterator
797};
798
799#include <stdio.h>
800
801U_CAPI UEnumeration* U_EXPORT2
802ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale,
803 UBool /*commonlyUsed*/, UErrorCode* status) {
804 /* Get the locale base name. */
805 char localeBuffer[ULOC_FULLNAME_CAPACITY] = "";
806 uloc_getBaseName(locale, localeBuffer, sizeof(localeBuffer), status);
807
808 /* Create the 2 lists
809 * -values is the temp location for the keyword values
810 * -results hold the actual list used by the UEnumeration object
811 */
812 UList *values = ulist_createEmptyList(status);
813 UList *results = ulist_createEmptyList(status);
814 UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
815 if (U_FAILURE(*status) || en == NULL) {
816 if (en == NULL) {
817 *status = U_MEMORY_ALLOCATION_ERROR;
818 } else {
819 uprv_free(en);
820 }
821 ulist_deleteList(values);
822 ulist_deleteList(results);
823 return NULL;
824 }
825
826 memcpy(en, &defaultKeywordValues, sizeof(UEnumeration));
827 en->context = results;
828
829 /* Open the resource bundle for collation with the given locale. */
830 UResourceBundle bundle, collations, collres, defres;
831 ures_initStackObject(&bundle);
832 ures_initStackObject(&collations);
833 ures_initStackObject(&collres);
834 ures_initStackObject(&defres);
835
836 ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status);
837
838 while (U_SUCCESS(*status)) {
839 ures_getByKey(&bundle, RESOURCE_NAME, &collations, status);
840 ures_resetIterator(&collations);
841 while (U_SUCCESS(*status) && ures_hasNext(&collations)) {
842 ures_getNextResource(&collations, &collres, status);
843 const char *key = ures_getKey(&collres);
844 /* If the key is default, get the string and store it in results list only
845 * if results list is empty.
846 */
847 if (uprv_strcmp(key, "default") == 0) {
848 if (ulist_getListSize(results) == 0) {
849 char *defcoll = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY);
850 int32_t defcollLength = ULOC_KEYWORDS_CAPACITY;
851
852 ures_getNextResource(&collres, &defres, status);
853#if U_CHARSET_FAMILY==U_ASCII_FAMILY
854 /* optimize - use the utf-8 string */
855 ures_getUTF8String(&defres, defcoll, &defcollLength, TRUE, status);
856#else
857 {
858 const UChar* defString = ures_getString(&defres, &defcollLength, status);
859 if(U_SUCCESS(*status)) {
860 if(defcollLength+1 > ULOC_KEYWORDS_CAPACITY) {
861 *status = U_BUFFER_OVERFLOW_ERROR;
862 } else {
863 u_UCharsToChars(defString, defcoll, defcollLength+1);
864 }
865 }
866 }
867#endif
868
869 ulist_addItemBeginList(results, defcoll, TRUE, status);
870 }
871 } else {
872 ulist_addItemEndList(values, key, FALSE, status);
873 }
874 }
875
876 /* If the locale is "" this is root so exit. */
877 if (uprv_strlen(localeBuffer) == 0) {
878 break;
879 }
880 /* Get the parent locale and open a new resource bundle. */
881 uloc_getParent(localeBuffer, localeBuffer, sizeof(localeBuffer), status);
882 ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status);
883 }
884
885 ures_close(&defres);
886 ures_close(&collres);
887 ures_close(&collations);
888 ures_close(&bundle);
889
890 if (U_SUCCESS(*status)) {
891 char *value = NULL;
892 ulist_resetList(values);
893 while ((value = (char *)ulist_getNext(values)) != NULL) {
894 if (!ulist_containsString(results, value, (int32_t)uprv_strlen(value))) {
895 ulist_addItemEndList(results, value, FALSE, status);
896 if (U_FAILURE(*status)) {
897 break;
898 }
899 }
900 }
901 }
902
903 ulist_deleteList(values);
904
905 if (U_FAILURE(*status)){
906 uenum_close(en);
907 en = NULL;
908 } else {
909 ulist_resetList(results);
910 }
911
912 return en;
913}
914
73c04bcf
A
915U_CAPI int32_t U_EXPORT2
916ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
917 const char* keyword, const char* locale,
46f4442e
A
918 UBool* isAvailable, UErrorCode* status)
919{
73c04bcf
A
920 // N.B.: Resource name is "collations" but keyword is "collation"
921 return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
46f4442e
A
922 "collations", keyword, locale,
923 isAvailable, TRUE, status);
73c04bcf
A
924}
925
926/* returns the locale name the collation data comes from */
927U_CAPI const char * U_EXPORT2
928ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
46f4442e 929 return ucol_getLocaleByType(coll, type, status);
73c04bcf
A
930}
931
932U_CAPI const char * U_EXPORT2
933ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
46f4442e
A
934 const char *result = NULL;
935 if(status == NULL || U_FAILURE(*status)) {
936 return NULL;
937 }
938 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
939 UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
940
941 switch(type) {
942 case ULOC_ACTUAL_LOCALE:
943 result = coll->actualLocale;
944 break;
945 case ULOC_VALID_LOCALE:
946 result = coll->validLocale;
947 break;
948 case ULOC_REQUESTED_LOCALE:
949 result = coll->requestedLocale;
950 break;
951 default:
952 *status = U_ILLEGAL_ARGUMENT_ERROR;
953 }
954 UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
955 UTRACE_EXIT_STATUS(*status);
956 return result;
73c04bcf
A
957}
958
46f4442e
A
959U_CFUNC void U_EXPORT2
960ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt, char *actualLocaleToAdopt)
73c04bcf 961{
46f4442e
A
962 if (coll) {
963 if (coll->validLocale) {
964 uprv_free(coll->validLocale);
965 }
966 coll->validLocale = validLocaleToAdopt;
967 if (coll->requestedLocale) { // should always have
968 uprv_free(coll->requestedLocale);
969 }
970 coll->requestedLocale = requestedLocaleToAdopt;
971 if (coll->actualLocale) {
972 uprv_free(coll->actualLocale);
973 }
974 coll->actualLocale = actualLocaleToAdopt;
73c04bcf 975 }
73c04bcf
A
976}
977
978U_CAPI USet * U_EXPORT2
979ucol_getTailoredSet(const UCollator *coll, UErrorCode *status)
980{
46f4442e
A
981 U_NAMESPACE_USE
982
983 if(status == NULL || U_FAILURE(*status)) {
984 return NULL;
985 }
986 if(coll == NULL || coll->UCA == NULL) {
987 *status = U_ILLEGAL_ARGUMENT_ERROR;
988 return NULL;
989 }
990 UParseError parseError;
991 UColTokenParser src;
992 int32_t rulesLen = 0;
993 const UChar *rules = ucol_getRules(coll, &rulesLen);
994 UBool startOfRules = TRUE;
995 // we internally use the C++ class, for the following reasons:
996 // 1. we need to utilize canonical iterator, which is a C++ only class
997 // 2. canonical iterator returns UnicodeStrings - USet cannot take them
998 // 3. USet is internally really UnicodeSet, C is just a wrapper
999 UnicodeSet *tailored = new UnicodeSet();
1000 UnicodeString pattern;
1001 UnicodeString empty;
1002 CanonicalIterator it(empty, *status);
1003
1004
1005 // The idea is to tokenize the rule set. For each non-reset token,
1006 // we add all the canonicaly equivalent FCD sequences
729e4ab9 1007 ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, ucol_tok_getRulesFromBundle, NULL, status);
46f4442e
A
1008 while (ucol_tok_parseNextToken(&src, startOfRules, &parseError, status) != NULL) {
1009 startOfRules = FALSE;
1010 if(src.parsedToken.strength != UCOL_TOK_RESET) {
1011 const UChar *stuff = src.source+(src.parsedToken.charsOffset);
1012 it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status);
1013 pattern = it.next();
1014 while(!pattern.isBogus()) {
1015 if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) {
1016 tailored->add(pattern);
1017 }
1018 pattern = it.next();
1019 }
73c04bcf 1020 }
73c04bcf 1021 }
46f4442e
A
1022 ucol_tok_closeTokenList(&src);
1023 return (USet *)tailored;
73c04bcf
A
1024}
1025
729e4ab9
A
1026/*
1027 * Collation Reordering
1028 */
1029
1030static void ucol_setReorderCodesFromParser(UCollator *coll, UColTokenParser *parser, UErrorCode *status) {
1031 if (U_FAILURE(*status)) {
1032 return;
1033 }
1034
1035 coll->reorderCodesLength = 0;
1036 if (coll->reorderCodes != NULL) {
1037 uprv_free(coll->reorderCodes);
1038 }
1039
1040 if (parser->reorderCodesLength == 0 || parser->reorderCodes == NULL) {
1041 return;
1042 }
1043
1044 coll->reorderCodesLength = parser->reorderCodesLength;
1045 coll->reorderCodes = (int32_t*) uprv_malloc(coll->reorderCodesLength * sizeof(int32_t));
1046 uprv_memcpy(coll->reorderCodes, parser->reorderCodes, coll->reorderCodesLength * sizeof(int32_t));
1047}
1048
1049static int ucol_getLeadBytesForReorderCode(UCollator *coll, int reorderCode, uint16_t* returnLeadBytes, int returnCapacity) {
1050 uint16_t reorderCodeIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte));
1051 uint16_t* reorderCodeIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte + 2 *sizeof(uint16_t));
1052
1053 // TODO - replace with a binary search
1054 // reorder code index is 2 uint16_t's - reorder code + offset
1055 for (int i = 0; i < reorderCodeIndexLength; i++) {
1056 if (reorderCode == reorderCodeIndex[i*2]) {
1057 uint16_t dataOffset = reorderCodeIndex[(i*2) + 1];
1058 if ((dataOffset & 0x8000) == 0x8000) {
1059 // offset isn't offset but instead is a single data element
1060 if (returnCapacity >= 1) {
1061 returnLeadBytes[0] = dataOffset & ~0x8000;
1062 return 1;
1063 }
1064 return 0;
1065 }
1066 uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)reorderCodeIndex + reorderCodeIndexLength * (2 * sizeof(uint16_t)));
1067 uint16_t leadByteCount = *(dataOffsetBase + dataOffset);
1068 leadByteCount = leadByteCount > returnCapacity ? returnCapacity : leadByteCount;
1069 uprv_memcpy(returnLeadBytes, dataOffsetBase + dataOffset + 1, leadByteCount * sizeof(uint16_t));
1070 return leadByteCount;
1071 }
1072 }
1073 return 0;
1074}
1075
1076static int ucol_getReorderCodesForLeadByte(UCollator *coll, int leadByte, int16_t* returnReorderCodes, int returnCapacity) {
1077 int leadByteIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript));
1078 uint16_t* leadByteIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript + 2 *sizeof(uint16_t));
1079 if (leadByte >= leadByteIndexLength) {
1080 return 0;
1081 }
1082
1083 if ((leadByteIndex[leadByte] & 0x8000) == 0x8000) {
1084 // offset isn't offset but instead is a single data element
1085 if (returnCapacity >= 1) {
1086 returnReorderCodes[0] = leadByteIndex[leadByte] & ~0x8000;
1087 return 1;
1088 }
1089 return 0;
1090 }
1091 uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)leadByteIndex + leadByteIndexLength * (2 * sizeof(uint16_t)));
1092 uint16_t reorderCodeCount = *(dataOffsetBase + leadByteIndex[leadByte]);
1093 reorderCodeCount = reorderCodeCount > returnCapacity ? returnCapacity : reorderCodeCount;
1094 uprv_memcpy(returnReorderCodes, dataOffsetBase + leadByteIndex[leadByte] + 1, reorderCodeCount * sizeof(uint16_t));
1095 return reorderCodeCount;
1096}
1097
1098// used to mark ignorable reorder code slots
1099static const int32_t UCOL_REORDER_CODE_IGNORE = UCOL_REORDER_CODE_LIMIT + 1;
1100
1101void ucol_buildPermutationTable(UCollator *coll, UErrorCode *status) {
1102 uint16_t leadBytesSize = 256;
1103 uint16_t leadBytes[256];
1104 int32_t internalReorderCodesLength = coll->reorderCodesLength + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST);
1105 int32_t* internalReorderCodes;
1106
1107 // The lowest byte that hasn't been assigned a mapping
1108 int toBottom = 0x03;
1109 // The highest byte that hasn't been assigned a mapping - don't include the special or trailing
1110 int toTop = 0xe4;
1111
1112 // are we filling from the bottom?
1113 bool fromTheBottom = true;
1114 int32_t reorderCodesIndex = -1;
1115
1116 // lead bytes that have alread been assigned to the permutation table
1117 bool newLeadByteUsed[256];
1118 // permutation table slots that have already been filled
1119 bool permutationSlotFilled[256];
1120
1121 // nothing to do
1122 if(U_FAILURE(*status) || coll == NULL || coll->reorderCodesLength == 0) {
1123 if (coll != NULL) {
1124 if (coll->leadBytePermutationTable != NULL) {
1125 uprv_free(coll->leadBytePermutationTable);
1126 coll->leadBytePermutationTable = NULL;
1127 }
1128 coll->reorderCodesLength = 0;
1129 }
1130 return;
1131 }
1132
1133 if (coll->leadBytePermutationTable == NULL) {
1134 coll->leadBytePermutationTable = (uint8_t*)uprv_malloc(256*sizeof(uint8_t));
1135 if (coll->leadBytePermutationTable == NULL) {
1136 *status = U_MEMORY_ALLOCATION_ERROR;
1137 return;
1138 }
1139 }
1140
1141 // prefill the reordering codes with the leading entries
1142 internalReorderCodes = (int32_t*)uprv_malloc(internalReorderCodesLength * sizeof(int32_t));
1143 if (internalReorderCodes == NULL) {
1144 *status = U_MEMORY_ALLOCATION_ERROR;
1145 if (coll->leadBytePermutationTable != NULL) {
1146 uprv_free(coll->leadBytePermutationTable);
1147 coll->leadBytePermutationTable = NULL;
1148 }
1149 return;
1150 }
1151
1152 for (uint32_t codeIndex = 0; codeIndex < (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST); codeIndex++) {
1153 internalReorderCodes[codeIndex] = UCOL_REORDER_CODE_FIRST + codeIndex;
1154 }
1155 for (int32_t codeIndex = 0; codeIndex < coll->reorderCodesLength; codeIndex++) {
1156 uint32_t reorderCodesCode = coll->reorderCodes[codeIndex];
1157 internalReorderCodes[codeIndex + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)] = reorderCodesCode;
1158 if (reorderCodesCode >= UCOL_REORDER_CODE_FIRST && reorderCodesCode < UCOL_REORDER_CODE_LIMIT) {
1159 internalReorderCodes[reorderCodesCode - UCOL_REORDER_CODE_FIRST] = UCOL_REORDER_CODE_IGNORE;
1160 }
1161 }
1162
1163 for (int i = 0; i < 256; i++) {
1164 if (i < toBottom || i > toTop) {
1165 permutationSlotFilled[i] = true;
1166 newLeadByteUsed[i] = true;
1167 coll->leadBytePermutationTable[i] = i;
1168 } else {
1169 permutationSlotFilled[i] = false;
1170 newLeadByteUsed[i] = false;
1171 coll->leadBytePermutationTable[i] = 0;
1172 }
1173 }
1174
1175 /* Start from the front of the list and place each script we encounter at the
1176 * earliest possible locatation in the permutation table. If we encounter
1177 * UNKNOWN, start processing from the back, and place each script in the last
1178 * possible location. At each step, we also need to make sure that any scripts
1179 * that need to not be moved are copied to their same location in the final table.
1180 */
1181 for (int reorderCodesCount = 0; reorderCodesCount < internalReorderCodesLength; reorderCodesCount++) {
1182 reorderCodesIndex += fromTheBottom ? 1 : -1;
1183 int32_t next = internalReorderCodes[reorderCodesIndex];
1184 if (next == UCOL_REORDER_CODE_IGNORE) {
1185 continue;
1186 }
1187 if (next == USCRIPT_UNKNOWN) {
1188 if (fromTheBottom == false) {
1189 // double turnaround
1190 *status = U_ILLEGAL_ARGUMENT_ERROR;
1191 if (coll->leadBytePermutationTable != NULL) {
1192 uprv_free(coll->leadBytePermutationTable);
1193 coll->leadBytePermutationTable = NULL;
1194 }
1195 coll->reorderCodesLength = 0;
1196 if (internalReorderCodes != NULL) {
1197 uprv_free(internalReorderCodes);
1198 }
1199 return;
1200 }
1201 fromTheBottom = false;
1202 reorderCodesIndex = internalReorderCodesLength;
1203 continue;
1204 }
1205
1206 uint16_t leadByteCount = ucol_getLeadBytesForReorderCode(coll, next, leadBytes, leadBytesSize);
1207 if (fromTheBottom) {
1208 for (int leadByteIndex = 0; leadByteIndex < leadByteCount; leadByteIndex++) {
1209 // don't place a lead byte twice in the permutation table
1210 if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
1211 // lead byte already used
1212 *status = U_ILLEGAL_ARGUMENT_ERROR;
1213 if (coll->leadBytePermutationTable != NULL) {
1214 uprv_free(coll->leadBytePermutationTable);
1215 coll->leadBytePermutationTable = NULL;
1216 }
1217 coll->reorderCodesLength = 0;
1218 if (internalReorderCodes != NULL) {
1219 uprv_free(internalReorderCodes);
1220 }
1221 return;
1222 }
1223
1224 coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toBottom;
1225 newLeadByteUsed[toBottom] = true;
1226 permutationSlotFilled[leadBytes[leadByteIndex]] = true;
1227 toBottom++;
1228 }
1229 } else {
1230 for (int leadByteIndex = leadByteCount - 1; leadByteIndex >= 0; leadByteIndex--) {
1231 // don't place a lead byte twice in the permutation table
1232 if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
1233 // lead byte already used
1234 *status = U_ILLEGAL_ARGUMENT_ERROR;
1235 if (coll->leadBytePermutationTable != NULL) {
1236 uprv_free(coll->leadBytePermutationTable);
1237 coll->leadBytePermutationTable = NULL;
1238 }
1239 coll->reorderCodesLength = 0;
1240 if (internalReorderCodes != NULL) {
1241 uprv_free(internalReorderCodes);
1242 }
1243 return;
1244 }
1245
1246 coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toTop;
1247 newLeadByteUsed[toTop] = true;
1248 permutationSlotFilled[leadBytes[leadByteIndex]] = true;
1249 toTop--;
1250 }
1251 }
1252 }
1253
1254#ifdef REORDER_DEBUG
1255 fprintf(stdout, "\n@@@@ Partial Script Reordering Table\n");
1256 for (int i = 0; i < 256; i++) {
1257 fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]);
1258 }
1259 fprintf(stdout, "\n@@@@ Lead Byte Used Table\n");
1260 for (int i = 0; i < 256; i++) {
1261 fprintf(stdout, "\t%02x = %02x\n", i, newLeadByteUsed[i]);
1262 }
1263 fprintf(stdout, "\n@@@@ Permutation Slot Filled Table\n");
1264 for (int i = 0; i < 256; i++) {
1265 fprintf(stdout, "\t%02x = %02x\n", i, permutationSlotFilled[i]);
1266 }
1267#endif
1268
1269 /* Copy everything that's left over */
1270 int reorderCode = 0;
1271 for (int i = 0; i < 256; i++) {
1272 if (!permutationSlotFilled[i]) {
1273 while (reorderCode < 256 && newLeadByteUsed[reorderCode]) {
1274 reorderCode++;
1275 }
1276 coll->leadBytePermutationTable[i] = reorderCode;
1277 permutationSlotFilled[i] = true;
1278 newLeadByteUsed[reorderCode] = true;
1279 }
1280 }
1281
1282#ifdef REORDER_DEBUG
1283 fprintf(stdout, "\n@@@@ Script Reordering Table\n");
1284 for (int i = 0; i < 256; i++) {
1285 fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]);
1286 }
1287#endif
1288
1289 if (internalReorderCodes != NULL) {
1290 uprv_free(internalReorderCodes);
1291 }
1292
1293 // force a regen of the latin one table since it is affected by the script reordering
1294 coll->latinOneRegenTable = TRUE;
1295 ucol_updateInternalState(coll, status);
1296}
1297
73c04bcf 1298#endif /* #if !UCONFIG_NO_COLLATION */