]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/ucol_res.cpp
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_res.cpp
CommitLineData
73c04bcf
A
1/*
2*******************************************************************************
46f4442e 3* Copyright (C) 1996-2008, International Business Machines
73c04bcf
A
4* Corporation and others. All Rights Reserved.
5*******************************************************************************
6* file name: ucol_res.cpp
7* encoding: US-ASCII
8* tab size: 8 (not used)
9* indentation:4
10*
11* Description:
12* This file contains dependencies that the collation run-time doesn't normally
13* need. This mainly contains resource bundle usage and collation meta information
14*
15* Modification history
16* Date Name Comments
17* 1996-1999 various members of ICU team maintained C API for collation framework
18* 02/16/2001 synwee Added internal method getPrevSpecialCE
19* 03/01/2001 synwee Added maxexpansion functionality.
20* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
21* 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp
22*/
23
24#include "unicode/utypes.h"
25
26#if !UCONFIG_NO_COLLATION
27#include "unicode/uloc.h"
28#include "unicode/coll.h"
29#include "unicode/tblcoll.h"
30#include "unicode/caniter.h"
31#include "unicode/ustring.h"
32
33#include "ucol_bld.h"
34#include "ucol_imp.h"
35#include "ucol_tok.h"
36#include "ucol_elm.h"
37#include "uresimp.h"
38#include "ustr_imp.h"
39#include "cstring.h"
40#include "umutex.h"
46f4442e 41#include "ucln_in.h"
73c04bcf
A
42#include "ustrenum.h"
43#include "putilimp.h"
44#include "utracimp.h"
45#include "cmemory.h"
46
46f4442e
A
47U_NAMESPACE_USE
48
49// static UCA. There is only one. Collators don't use it.
50// It is referenced only in ucol_initUCA and ucol_cleanup
51static UCollator* _staticUCA = NULL;
52// static pointer to udata memory. Inited in ucol_initUCA
53// used for cleanup in ucol_cleanup
54static UDataMemory* UCA_DATA_MEM = NULL;
55
73c04bcf 56U_CDECL_BEGIN
46f4442e
A
57static UBool U_CALLCONV
58ucol_res_cleanup(void)
59{
60 if (UCA_DATA_MEM) {
61 udata_close(UCA_DATA_MEM);
62 UCA_DATA_MEM = NULL;
63 }
64 if (_staticUCA) {
65 ucol_close(_staticUCA);
66 _staticUCA = NULL;
67 }
68 return TRUE;
69}
70
71static UBool U_CALLCONV
72isAcceptableUCA(void * /*context*/,
73 const char * /*type*/, const char * /*name*/,
74 const UDataInfo *pInfo){
75 /* context, type & name are intentionally not used */
76 if( pInfo->size>=20 &&
77 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
78 pInfo->charsetFamily==U_CHARSET_FAMILY &&
79 pInfo->dataFormat[0]==UCA_DATA_FORMAT_0 && /* dataFormat="UCol" */
80 pInfo->dataFormat[1]==UCA_DATA_FORMAT_1 &&
81 pInfo->dataFormat[2]==UCA_DATA_FORMAT_2 &&
82 pInfo->dataFormat[3]==UCA_DATA_FORMAT_3 &&
83 pInfo->formatVersion[0]==UCA_FORMAT_VERSION_0 &&
84 pInfo->formatVersion[1]>=UCA_FORMAT_VERSION_1// &&
85 //pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 &&
86 //pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh
87 //pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh
88 ) {
89 UVersionInfo UCDVersion;
90 u_getUnicodeVersion(UCDVersion);
91 return (UBool)(pInfo->dataVersion[0]==UCDVersion[0]
92 && pInfo->dataVersion[1]==UCDVersion[1]);
93 //&& pInfo->dataVersion[2]==ucaDataInfo.dataVersion[2]
94 //&& pInfo->dataVersion[3]==ucaDataInfo.dataVersion[3]);
95 } else {
96 return FALSE;
97 }
73c04bcf
A
98}
99U_CDECL_END
100
46f4442e
A
101/* do not close UCA returned by ucol_initUCA! */
102UCollator *
103ucol_initUCA(UErrorCode *status) {
104 if(U_FAILURE(*status)) {
105 return NULL;
106 }
107 UBool needsInit;
108 UMTX_CHECK(NULL, (_staticUCA == NULL), needsInit);
109
110 if(needsInit) {
111 UDataMemory *result = udata_openChoice(NULL, UCA_DATA_TYPE, UCA_DATA_NAME, isAcceptableUCA, NULL, status);
112
113 if(U_SUCCESS(*status)){
114 UCollator *newUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), NULL, NULL, status);
115 if(U_SUCCESS(*status)){
116 umtx_lock(NULL);
117 if(_staticUCA == NULL) {
118 _staticUCA = newUCA;
119 newUCA = NULL;
120 UCA_DATA_MEM = result;
121 result = NULL;
122 }
123 umtx_unlock(NULL);
124
125 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup);
126 if(newUCA != NULL) {
127 ucol_close(newUCA);
128 udata_close(result);
129 }
130 // Initalize variables for implicit generation
131 uprv_uca_initImplicitConstants(status);
132 }else{
133 ucol_close(newUCA);
134 udata_close(result);
135 }
136 }
137 else {
138 udata_close(result);
139 }
140 }
141 return _staticUCA;
142}
143
144U_CAPI void U_EXPORT2
145ucol_forgetUCA(void)
146{
147 _staticUCA = NULL;
148 UCA_DATA_MEM = NULL;
149}
150
73c04bcf
A
151/****************************************************************************/
152/* Following are the open/close functions */
153/* */
154/****************************************************************************/
155static UCollator*
156tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
46f4442e
A
157 int32_t rulesLen = 0;
158 const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status);
159 return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
73c04bcf
A
160}
161
162
163// API in ucol_imp.h
164
165U_CFUNC UCollator*
166ucol_open_internal(const char *loc,
167 UErrorCode *status)
168{
46f4442e 169 UErrorCode intStatus = U_ZERO_ERROR;
73c04bcf
A
170 const UCollator* UCA = ucol_initUCA(status);
171
172 /* New version */
173 if(U_FAILURE(*status)) return 0;
174
175
176
177 UCollator *result = NULL;
178 UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
179
180 /* we try to find stuff from keyword */
181 UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
182 UResourceBundle *collElem = NULL;
183 char keyBuffer[256];
184 // if there is a keyword, we pick it up and try to get elements
185 if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) {
186 // no keyword. we try to find the default setting, which will give us the keyword value
46f4442e 187 intStatus = U_ZERO_ERROR;
73c04bcf
A
188 // finding default value does not affect collation fallback status
189 UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus);
190 if(U_SUCCESS(intStatus)) {
191 int32_t defaultKeyLen = 0;
192 const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus);
193 u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
194 keyBuffer[defaultKeyLen] = 0;
195 } else {
196 *status = U_INTERNAL_PROGRAM_ERROR;
197 return NULL;
198 }
199 ures_close(defaultColl);
200 }
46f4442e
A
201 collElem = ures_getByKeyWithFallback(collations, keyBuffer, collations, status);
202 collations = NULL; // We just reused the collations object as collElem.
73c04bcf
A
203
204 UResourceBundle *binary = NULL;
205
206 if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
207 *status = U_USING_DEFAULT_WARNING;
208 result = ucol_initCollator(UCA->image, result, UCA, status);
46f4442e
A
209 if (U_FAILURE(*status)) {
210 goto clean;
211 }
73c04bcf 212 // if we use UCA, real locale is root
46f4442e
A
213 ures_close(b);
214 b = ures_open(U_ICUDATA_COLL, "", status);
215 ures_close(collElem);
216 collElem = ures_open(U_ICUDATA_COLL, "", status);
73c04bcf
A
217 if(U_FAILURE(*status)) {
218 goto clean;
219 }
73c04bcf
A
220 result->hasRealData = FALSE;
221 } else if(U_SUCCESS(*status)) {
46f4442e 222 intStatus = U_ZERO_ERROR;
73c04bcf 223
46f4442e 224 binary = ures_getByKey(collElem, "%%CollationBin", NULL, &intStatus);
73c04bcf 225
46f4442e 226 if(intStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
73c04bcf
A
227 binary = NULL;
228 result = tryOpeningFromRules(collElem, status);
229 if(U_FAILURE(*status)) {
230 goto clean;
231 }
232 } else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */
46f4442e 233 int32_t len = 0;
73c04bcf
A
234 const uint8_t *inData = ures_getBinary(binary, &len, status);
235 UCATableHeader *colData = (UCATableHeader *)inData;
236 if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
237 uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
238 colData->version[0] != UCOL_BUILDER_VERSION)
239 {
240 *status = U_DIFFERENT_UCA_VERSION;
241 result = tryOpeningFromRules(collElem, status);
242 } else {
243 if(U_FAILURE(*status)){
244 goto clean;
245 }
246 if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
247 result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
248 if(U_FAILURE(*status)){
249 goto clean;
250 }
251 result->hasRealData = TRUE;
252 } else {
253 result = ucol_initCollator(UCA->image, result, UCA, status);
254 ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
255 if(U_FAILURE(*status)){
256 goto clean;
257 }
258 result->hasRealData = FALSE;
259 }
260 result->freeImageOnClose = FALSE;
261 }
262 }
46f4442e
A
263 intStatus = U_ZERO_ERROR;
264 result->rules = ures_getStringByKey(collElem, "Sequence", &result->rulesLength, &intStatus);
73c04bcf
A
265 result->freeRulesOnClose = FALSE;
266 } else { /* There is another error, and we're just gonna clean up */
267 goto clean;
268 }
269
46f4442e
A
270 intStatus = U_ZERO_ERROR;
271 result->ucaRules = ures_getStringByKey(b,"UCARules",NULL,&intStatus);
73c04bcf
A
272
273 if(loc == NULL) {
46f4442e 274 loc = ures_getLocale(b, status);
73c04bcf 275 }
46f4442e 276 result->requestedLocale = uprv_strdup(loc);
73c04bcf
A
277 /* test for NULL */
278 if (result->requestedLocale == NULL) {
279 *status = U_MEMORY_ALLOCATION_ERROR;
280 goto clean;
281 }
46f4442e
A
282 loc = ures_getLocale(collElem, status);
283 result->actualLocale = uprv_strdup(loc);
284 /* test for NULL */
285 if (result->actualLocale == NULL) {
286 *status = U_MEMORY_ALLOCATION_ERROR;
287 goto clean;
288 }
289 loc = ures_getLocale(b, status);
290 result->validLocale = uprv_strdup(loc);
291 /* test for NULL */
292 if (result->validLocale == NULL) {
293 *status = U_MEMORY_ALLOCATION_ERROR;
294 goto clean;
295 }
73c04bcf 296
46f4442e
A
297 ures_close(b);
298 ures_close(collElem);
73c04bcf 299 ures_close(binary);
73c04bcf
A
300 return result;
301
302clean:
303 ures_close(b);
304 ures_close(collElem);
73c04bcf 305 ures_close(binary);
46f4442e 306 ucol_close(result);
73c04bcf
A
307 return NULL;
308}
309
310U_CAPI UCollator*
311ucol_open(const char *loc,
312 UErrorCode *status)
313{
46f4442e
A
314 U_NAMESPACE_USE
315
316 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
317 UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
318 UCollator *result = NULL;
73c04bcf 319
46f4442e 320 u_init(status);
73c04bcf 321#if !UCONFIG_NO_SERVICE
46f4442e
A
322 result = Collator::createUCollator(loc, status);
323 if (result == NULL)
73c04bcf 324#endif
46f4442e
A
325 {
326 result = ucol_open_internal(loc, status);
327 }
328 UTRACE_EXIT_PTR_STATUS(result, *status);
329 return result;
73c04bcf
A
330}
331
332U_CAPI UCollator* U_EXPORT2
333ucol_openRules( const UChar *rules,
46f4442e
A
334 int32_t rulesLength,
335 UColAttributeValue normalizationMode,
336 UCollationStrength strength,
337 UParseError *parseError,
338 UErrorCode *status)
73c04bcf 339{
46f4442e
A
340 UColTokenParser src;
341 UColAttributeValue norm;
342 UParseError tErr;
73c04bcf 343
46f4442e
A
344 if(status == NULL || U_FAILURE(*status)){
345 return 0;
346 }
73c04bcf 347
46f4442e
A
348 u_init(status);
349 if (U_FAILURE(*status)) {
350 return NULL;
73c04bcf 351 }
73c04bcf 352
46f4442e
A
353 if(rules == NULL || rulesLength < -1) {
354 *status = U_ILLEGAL_ARGUMENT_ERROR;
355 return 0;
356 }
357
358 if(rulesLength == -1) {
359 rulesLength = u_strlen(rules);
360 }
361
362 if(parseError == NULL){
363 parseError = &tErr;
364 }
365
366 switch(normalizationMode) {
367 case UCOL_OFF:
368 case UCOL_ON:
369 case UCOL_DEFAULT:
370 norm = normalizationMode;
371 break;
372 default:
373 *status = U_ILLEGAL_ARGUMENT_ERROR;
374 return 0;
375 }
376
377 UCollator *result = NULL;
378 UCATableHeader *table = NULL;
379 UCollator *UCA = ucol_initUCA(status);
380
381 if(U_FAILURE(*status)){
382 return NULL;
383 }
384
385 ucol_tok_initTokenList(&src, rules, rulesLength, UCA, status);
386 ucol_tok_assembleTokenList(&src,parseError, status);
387
388 if(U_FAILURE(*status)) {
389 /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */
390 /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */
391 /* so something might be done here... or on lower level */
392#ifdef UCOL_DEBUG
393 if(*status == U_ILLEGAL_ARGUMENT_ERROR) {
394 fprintf(stderr, "bad option starting at offset %i\n", src.current-src.source);
395 } else {
396 fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source);
397 }
398#endif
73c04bcf
A
399 goto cleanup;
400 }
46f4442e
A
401
402 if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */
403 /* also, if we wanted to remove some contractions, we should make a tailoring */
404 table = ucol_assembleTailoringTable(&src, status);
405 if(U_SUCCESS(*status)) {
406 // builder version
407 table->version[0] = UCOL_BUILDER_VERSION;
408 // no tailoring information on this level
409 table->version[1] = table->version[2] = table->version[3] = 0;
410 // set UCD version
411 u_getUnicodeVersion(table->UCDVersion);
412 // set UCA version
413 uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo));
414 result = ucol_initCollator(table, 0, UCA, status);
415 if (U_FAILURE(*status)) {
416 goto cleanup;
417 }
418 result->hasRealData = TRUE;
419 result->freeImageOnClose = TRUE;
420 }
421 } else { /* no rules, but no error either */
422 // must be only options
423 // We will init the collator from UCA
424 result = ucol_initCollator(UCA->image, 0, UCA, status);
425 // Check for null result
426 if (U_FAILURE(*status)) {
427 goto cleanup;
428 }
429 // And set only the options
430 UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
431 /* test for NULL */
432 if (opts == NULL) {
433 *status = U_MEMORY_ALLOCATION_ERROR;
434 goto cleanup;
435 }
436 uprv_memcpy(opts, src.opts, sizeof(UColOptionSet));
437 ucol_setOptionsFromHeader(result, opts, status);
438 result->freeOptionsOnClose = TRUE;
439 result->hasRealData = FALSE;
440 result->freeImageOnClose = FALSE;
441 }
442
443 if(U_SUCCESS(*status)) {
444 UChar *newRules;
445 result->dataVersion[0] = UCOL_BUILDER_VERSION;
446 if(rulesLength > 0) {
447 newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR);
448 /* test for NULL */
449 if (newRules == NULL) {
450 *status = U_MEMORY_ALLOCATION_ERROR;
451 goto cleanup;
452 }
453 uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR);
454 newRules[rulesLength]=0;
455 result->rules = newRules;
456 result->rulesLength = rulesLength;
457 result->freeRulesOnClose = TRUE;
458 }
459 result->ucaRules = NULL;
460 result->actualLocale = NULL;
461 result->validLocale = NULL;
462 result->requestedLocale = NULL;
463 ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
464 ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
73c04bcf 465 } else {
46f4442e
A
466cleanup:
467 if(result != NULL) {
468 ucol_close(result);
469 } else {
470 if(table != NULL) {
471 uprv_free(table);
472 }
473 }
474 result = NULL;
73c04bcf 475 }
73c04bcf 476
46f4442e 477 ucol_tok_closeTokenList(&src);
73c04bcf 478
46f4442e 479 return result;
73c04bcf
A
480}
481
482U_CAPI int32_t U_EXPORT2
483ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
46f4442e
A
484 UErrorCode status = U_ZERO_ERROR;
485 int32_t len = 0;
486 int32_t UCAlen = 0;
487 const UChar* ucaRules = 0;
488 const UChar *rules = ucol_getRules(coll, &len);
489 if(delta == UCOL_FULL_RULES) {
490 /* take the UCA rules and append real rules at the end */
491 /* UCA rules will be probably coming from the root RB */
492 ucaRules = coll->ucaRules;
493 if (ucaRules) {
494 UCAlen = u_strlen(ucaRules);
495 }
496 /*
497 ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
498 UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
499 UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
500 ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
501 ures_close(uca);
502 ures_close(cresb);
503 */
504 }
505 if(U_FAILURE(status)) {
506 return 0;
507 }
508 if(buffer!=0 && bufferLen>0){
509 *buffer=0;
510 if(UCAlen > 0) {
511 u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen));
512 }
513 if(len > 0 && bufferLen > UCAlen) {
514 u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen));
515 }
516 }
517 return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status);
73c04bcf
A
518}
519
520static const UChar _NUL = 0;
521
522U_CAPI const UChar* U_EXPORT2
523ucol_getRules( const UCollator *coll,
46f4442e 524 int32_t *length)
73c04bcf
A
525{
526 if(coll->rules != NULL) {
527 *length = coll->rulesLength;
528 return coll->rules;
529 }
530 else {
531 *length = 0;
532 return &_NUL;
533 }
534}
535
536U_CAPI UBool U_EXPORT2
537ucol_equals(const UCollator *source, const UCollator *target) {
46f4442e
A
538 UErrorCode status = U_ZERO_ERROR;
539 // if pointers are equal, collators are equal
540 if(source == target) {
541 return TRUE;
542 }
543 int32_t i = 0, j = 0;
544 // if any of attributes are different, collators are not equal
545 for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
546 if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) {
547 return FALSE;
548 }
549 }
550
551 int32_t sourceRulesLen = 0, targetRulesLen = 0;
552 const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen);
553 const UChar *targetRules = ucol_getRules(target, &targetRulesLen);
554
555 if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) {
556 // all the attributes are equal and the rules are equal - collators are equal
557 return(TRUE);
558 }
559 // hard part, need to construct tree from rules and see if they yield the same tailoring
560 UBool result = TRUE;
561 UParseError parseError;
562 UColTokenParser sourceParser, targetParser;
563 int32_t sourceListLen = 0, targetListLen = 0;
564 ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status);
565 ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status);
566 sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status);
567 targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status);
568
569 if(sourceListLen != targetListLen) {
570 // different number of resets
571 result = FALSE;
572 } else {
573 UColToken *sourceReset = NULL, *targetReset = NULL;
574 UChar *sourceResetString = NULL, *targetResetString = NULL;
575 int32_t sourceStringLen = 0, targetStringLen = 0;
576 for(i = 0; i < sourceListLen; i++) {
577 sourceReset = sourceParser.lh[i].reset;
73c04bcf
A
578 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
579 sourceStringLen = sourceReset->source >> 24;
46f4442e
A
580 for(j = 0; j < sourceListLen; j++) {
581 targetReset = targetParser.lh[j].reset;
582 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
583 targetStringLen = targetReset->source >> 24;
584 if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) {
585 sourceReset = sourceParser.lh[i].first;
586 targetReset = targetParser.lh[j].first;
587 while(sourceReset != NULL && targetReset != NULL) {
588 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
589 sourceStringLen = sourceReset->source >> 24;
590 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
591 targetStringLen = targetReset->source >> 24;
592 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
593 result = FALSE;
594 goto returnResult;
595 }
596 // probably also need to check the expansions
597 if(sourceReset->expansion) {
598 if(!targetReset->expansion) {
599 result = FALSE;
600 goto returnResult;
601 } else {
602 // compare expansions
603 sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF);
604 sourceStringLen = sourceReset->expansion >> 24;
605 targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF);
606 targetStringLen = targetReset->expansion >> 24;
607 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
608 result = FALSE;
609 goto returnResult;
610 }
611 }
612 } else {
613 if(targetReset->expansion) {
614 result = FALSE;
615 goto returnResult;
616 }
617 }
618 sourceReset = sourceReset->next;
619 targetReset = targetReset->next;
620 }
621 if(sourceReset != targetReset) { // at least one is not NULL
622 // there are more tailored elements in one list
623 result = FALSE;
624 goto returnResult;
625 }
626
627
628 break;
73c04bcf 629 }
46f4442e
A
630 }
631 // couldn't find the reset anchor, so the collators are not equal
632 if(j == sourceListLen) {
73c04bcf
A
633 result = FALSE;
634 goto returnResult;
73c04bcf 635 }
73c04bcf 636 }
73c04bcf 637 }
73c04bcf
A
638
639returnResult:
46f4442e
A
640 ucol_tok_closeTokenList(&sourceParser);
641 ucol_tok_closeTokenList(&targetParser);
642 return result;
73c04bcf
A
643
644}
645
646U_CAPI int32_t U_EXPORT2
647ucol_getDisplayName( const char *objLoc,
46f4442e
A
648 const char *dispLoc,
649 UChar *result,
650 int32_t resultLength,
651 UErrorCode *status)
73c04bcf 652{
46f4442e
A
653 U_NAMESPACE_USE
654
655 if(U_FAILURE(*status)) return -1;
656 UnicodeString dst;
657 if(!(result==NULL && resultLength==0)) {
658 // NULL destination for pure preflighting: empty dummy string
659 // otherwise, alias the destination buffer
660 dst.setTo(result, 0, resultLength);
661 }
662 Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
663 return dst.extract(result, resultLength, *status);
73c04bcf
A
664}
665
666U_CAPI const char* U_EXPORT2
667ucol_getAvailable(int32_t index)
668{
46f4442e
A
669 int32_t count = 0;
670 const Locale *loc = Collator::getAvailableLocales(count);
671 if (loc != NULL && index < count) {
672 return loc[index].getName();
673 }
674 return NULL;
73c04bcf
A
675}
676
677U_CAPI int32_t U_EXPORT2
678ucol_countAvailable()
679{
46f4442e
A
680 int32_t count = 0;
681 Collator::getAvailableLocales(count);
682 return count;
73c04bcf
A
683}
684
685#if !UCONFIG_NO_SERVICE
686U_CAPI UEnumeration* U_EXPORT2
687ucol_openAvailableLocales(UErrorCode *status) {
46f4442e
A
688 U_NAMESPACE_USE
689
73c04bcf
A
690 // This is a wrapper over Collator::getAvailableLocales()
691 if (U_FAILURE(*status)) {
692 return NULL;
693 }
694 StringEnumeration *s = Collator::getAvailableLocales();
695 if (s == NULL) {
696 *status = U_MEMORY_ALLOCATION_ERROR;
697 return NULL;
698 }
699 return uenum_openStringEnumeration(s, status);
700}
701#endif
702
703// Note: KEYWORDS[0] != RESOURCE_NAME - alan
704
46f4442e 705static const char RESOURCE_NAME[] = "collations";
73c04bcf 706
46f4442e 707static const char* const KEYWORDS[] = { "collation" };
73c04bcf
A
708
709#define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0]))
710
711U_CAPI UEnumeration* U_EXPORT2
712ucol_getKeywords(UErrorCode *status) {
713 UEnumeration *result = NULL;
714 if (U_SUCCESS(*status)) {
715 return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
716 }
717 return result;
718}
719
720U_CAPI UEnumeration* U_EXPORT2
721ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
46f4442e
A
722 if (U_FAILURE(*status)) {
723 return NULL;
724 }
73c04bcf
A
725 // hard-coded to accept exactly one collation keyword
726 // modify if additional collation keyword is added later
46f4442e
A
727 if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0)
728 {
73c04bcf
A
729 *status = U_ILLEGAL_ARGUMENT_ERROR;
730 return NULL;
731 }
732 return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
733}
734
735U_CAPI int32_t U_EXPORT2
736ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
737 const char* keyword, const char* locale,
46f4442e
A
738 UBool* isAvailable, UErrorCode* status)
739{
73c04bcf
A
740 // N.B.: Resource name is "collations" but keyword is "collation"
741 return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
46f4442e
A
742 "collations", keyword, locale,
743 isAvailable, TRUE, status);
73c04bcf
A
744}
745
746/* returns the locale name the collation data comes from */
747U_CAPI const char * U_EXPORT2
748ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
46f4442e 749 return ucol_getLocaleByType(coll, type, status);
73c04bcf
A
750}
751
752U_CAPI const char * U_EXPORT2
753ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
46f4442e
A
754 const char *result = NULL;
755 if(status == NULL || U_FAILURE(*status)) {
756 return NULL;
757 }
758 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
759 UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
760
761 switch(type) {
762 case ULOC_ACTUAL_LOCALE:
763 result = coll->actualLocale;
764 break;
765 case ULOC_VALID_LOCALE:
766 result = coll->validLocale;
767 break;
768 case ULOC_REQUESTED_LOCALE:
769 result = coll->requestedLocale;
770 break;
771 default:
772 *status = U_ILLEGAL_ARGUMENT_ERROR;
773 }
774 UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
775 UTRACE_EXIT_STATUS(*status);
776 return result;
73c04bcf
A
777}
778
46f4442e
A
779U_CFUNC void U_EXPORT2
780ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt, char *actualLocaleToAdopt)
73c04bcf 781{
46f4442e
A
782 if (coll) {
783 if (coll->validLocale) {
784 uprv_free(coll->validLocale);
785 }
786 coll->validLocale = validLocaleToAdopt;
787 if (coll->requestedLocale) { // should always have
788 uprv_free(coll->requestedLocale);
789 }
790 coll->requestedLocale = requestedLocaleToAdopt;
791 if (coll->actualLocale) {
792 uprv_free(coll->actualLocale);
793 }
794 coll->actualLocale = actualLocaleToAdopt;
73c04bcf 795 }
73c04bcf
A
796}
797
798U_CAPI USet * U_EXPORT2
799ucol_getTailoredSet(const UCollator *coll, UErrorCode *status)
800{
46f4442e
A
801 U_NAMESPACE_USE
802
803 if(status == NULL || U_FAILURE(*status)) {
804 return NULL;
805 }
806 if(coll == NULL || coll->UCA == NULL) {
807 *status = U_ILLEGAL_ARGUMENT_ERROR;
808 return NULL;
809 }
810 UParseError parseError;
811 UColTokenParser src;
812 int32_t rulesLen = 0;
813 const UChar *rules = ucol_getRules(coll, &rulesLen);
814 UBool startOfRules = TRUE;
815 // we internally use the C++ class, for the following reasons:
816 // 1. we need to utilize canonical iterator, which is a C++ only class
817 // 2. canonical iterator returns UnicodeStrings - USet cannot take them
818 // 3. USet is internally really UnicodeSet, C is just a wrapper
819 UnicodeSet *tailored = new UnicodeSet();
820 UnicodeString pattern;
821 UnicodeString empty;
822 CanonicalIterator it(empty, *status);
823
824
825 // The idea is to tokenize the rule set. For each non-reset token,
826 // we add all the canonicaly equivalent FCD sequences
827 ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, status);
828 while (ucol_tok_parseNextToken(&src, startOfRules, &parseError, status) != NULL) {
829 startOfRules = FALSE;
830 if(src.parsedToken.strength != UCOL_TOK_RESET) {
831 const UChar *stuff = src.source+(src.parsedToken.charsOffset);
832 it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status);
833 pattern = it.next();
834 while(!pattern.isBogus()) {
835 if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) {
836 tailored->add(pattern);
837 }
838 pattern = it.next();
839 }
73c04bcf 840 }
73c04bcf 841 }
46f4442e
A
842 ucol_tok_closeTokenList(&src);
843 return (USet *)tailored;
73c04bcf
A
844}
845
846#endif /* #if !UCONFIG_NO_COLLATION */