]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/ucol_res.cpp
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_res.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2008, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucol_res.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * Description:
12 * This file contains dependencies that the collation run-time doesn't normally
13 * need. This mainly contains resource bundle usage and collation meta information
14 *
15 * Modification history
16 * Date Name Comments
17 * 1996-1999 various members of ICU team maintained C API for collation framework
18 * 02/16/2001 synwee Added internal method getPrevSpecialCE
19 * 03/01/2001 synwee Added maxexpansion functionality.
20 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
21 * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp
22 */
23
24 #include "unicode/utypes.h"
25
26 #if !UCONFIG_NO_COLLATION
27 #include "unicode/uloc.h"
28 #include "unicode/coll.h"
29 #include "unicode/tblcoll.h"
30 #include "unicode/caniter.h"
31 #include "unicode/ustring.h"
32
33 #include "ucol_bld.h"
34 #include "ucol_imp.h"
35 #include "ucol_tok.h"
36 #include "ucol_elm.h"
37 #include "uresimp.h"
38 #include "ustr_imp.h"
39 #include "cstring.h"
40 #include "umutex.h"
41 #include "ucln_in.h"
42 #include "ustrenum.h"
43 #include "putilimp.h"
44 #include "utracimp.h"
45 #include "cmemory.h"
46
47 U_NAMESPACE_USE
48
49 // static UCA. There is only one. Collators don't use it.
50 // It is referenced only in ucol_initUCA and ucol_cleanup
51 static UCollator* _staticUCA = NULL;
52 // static pointer to udata memory. Inited in ucol_initUCA
53 // used for cleanup in ucol_cleanup
54 static UDataMemory* UCA_DATA_MEM = NULL;
55
56 U_CDECL_BEGIN
57 static UBool U_CALLCONV
58 ucol_res_cleanup(void)
59 {
60 if (UCA_DATA_MEM) {
61 udata_close(UCA_DATA_MEM);
62 UCA_DATA_MEM = NULL;
63 }
64 if (_staticUCA) {
65 ucol_close(_staticUCA);
66 _staticUCA = NULL;
67 }
68 return TRUE;
69 }
70
71 static UBool U_CALLCONV
72 isAcceptableUCA(void * /*context*/,
73 const char * /*type*/, const char * /*name*/,
74 const UDataInfo *pInfo){
75 /* context, type & name are intentionally not used */
76 if( pInfo->size>=20 &&
77 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
78 pInfo->charsetFamily==U_CHARSET_FAMILY &&
79 pInfo->dataFormat[0]==UCA_DATA_FORMAT_0 && /* dataFormat="UCol" */
80 pInfo->dataFormat[1]==UCA_DATA_FORMAT_1 &&
81 pInfo->dataFormat[2]==UCA_DATA_FORMAT_2 &&
82 pInfo->dataFormat[3]==UCA_DATA_FORMAT_3 &&
83 pInfo->formatVersion[0]==UCA_FORMAT_VERSION_0 &&
84 pInfo->formatVersion[1]>=UCA_FORMAT_VERSION_1// &&
85 //pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 &&
86 //pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh
87 //pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh
88 ) {
89 UVersionInfo UCDVersion;
90 u_getUnicodeVersion(UCDVersion);
91 return (UBool)(pInfo->dataVersion[0]==UCDVersion[0]
92 && pInfo->dataVersion[1]==UCDVersion[1]);
93 //&& pInfo->dataVersion[2]==ucaDataInfo.dataVersion[2]
94 //&& pInfo->dataVersion[3]==ucaDataInfo.dataVersion[3]);
95 } else {
96 return FALSE;
97 }
98 }
99 U_CDECL_END
100
101 /* do not close UCA returned by ucol_initUCA! */
102 UCollator *
103 ucol_initUCA(UErrorCode *status) {
104 if(U_FAILURE(*status)) {
105 return NULL;
106 }
107 UBool needsInit;
108 UMTX_CHECK(NULL, (_staticUCA == NULL), needsInit);
109
110 if(needsInit) {
111 UDataMemory *result = udata_openChoice(NULL, UCA_DATA_TYPE, UCA_DATA_NAME, isAcceptableUCA, NULL, status);
112
113 if(U_SUCCESS(*status)){
114 UCollator *newUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), NULL, NULL, status);
115 if(U_SUCCESS(*status)){
116 umtx_lock(NULL);
117 if(_staticUCA == NULL) {
118 _staticUCA = newUCA;
119 newUCA = NULL;
120 UCA_DATA_MEM = result;
121 result = NULL;
122 }
123 umtx_unlock(NULL);
124
125 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup);
126 if(newUCA != NULL) {
127 ucol_close(newUCA);
128 udata_close(result);
129 }
130 // Initalize variables for implicit generation
131 uprv_uca_initImplicitConstants(status);
132 }else{
133 ucol_close(newUCA);
134 udata_close(result);
135 }
136 }
137 else {
138 udata_close(result);
139 }
140 }
141 return _staticUCA;
142 }
143
144 U_CAPI void U_EXPORT2
145 ucol_forgetUCA(void)
146 {
147 _staticUCA = NULL;
148 UCA_DATA_MEM = NULL;
149 }
150
151 /****************************************************************************/
152 /* Following are the open/close functions */
153 /* */
154 /****************************************************************************/
155 static UCollator*
156 tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
157 int32_t rulesLen = 0;
158 const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status);
159 return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
160 }
161
162
163 // API in ucol_imp.h
164
165 U_CFUNC UCollator*
166 ucol_open_internal(const char *loc,
167 UErrorCode *status)
168 {
169 UErrorCode intStatus = U_ZERO_ERROR;
170 const UCollator* UCA = ucol_initUCA(status);
171
172 /* New version */
173 if(U_FAILURE(*status)) return 0;
174
175
176
177 UCollator *result = NULL;
178 UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
179
180 /* we try to find stuff from keyword */
181 UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
182 UResourceBundle *collElem = NULL;
183 char keyBuffer[256];
184 // if there is a keyword, we pick it up and try to get elements
185 if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) {
186 // no keyword. we try to find the default setting, which will give us the keyword value
187 intStatus = U_ZERO_ERROR;
188 // finding default value does not affect collation fallback status
189 UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus);
190 if(U_SUCCESS(intStatus)) {
191 int32_t defaultKeyLen = 0;
192 const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus);
193 u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
194 keyBuffer[defaultKeyLen] = 0;
195 } else {
196 *status = U_INTERNAL_PROGRAM_ERROR;
197 return NULL;
198 }
199 ures_close(defaultColl);
200 }
201 collElem = ures_getByKeyWithFallback(collations, keyBuffer, collations, status);
202 collations = NULL; // We just reused the collations object as collElem.
203
204 UResourceBundle *binary = NULL;
205
206 if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
207 *status = U_USING_DEFAULT_WARNING;
208 result = ucol_initCollator(UCA->image, result, UCA, status);
209 if (U_FAILURE(*status)) {
210 goto clean;
211 }
212 // if we use UCA, real locale is root
213 ures_close(b);
214 b = ures_open(U_ICUDATA_COLL, "", status);
215 ures_close(collElem);
216 collElem = ures_open(U_ICUDATA_COLL, "", status);
217 if(U_FAILURE(*status)) {
218 goto clean;
219 }
220 result->hasRealData = FALSE;
221 } else if(U_SUCCESS(*status)) {
222 intStatus = U_ZERO_ERROR;
223
224 binary = ures_getByKey(collElem, "%%CollationBin", NULL, &intStatus);
225
226 if(intStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
227 binary = NULL;
228 result = tryOpeningFromRules(collElem, status);
229 if(U_FAILURE(*status)) {
230 goto clean;
231 }
232 } else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */
233 int32_t len = 0;
234 const uint8_t *inData = ures_getBinary(binary, &len, status);
235 UCATableHeader *colData = (UCATableHeader *)inData;
236 if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
237 uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
238 colData->version[0] != UCOL_BUILDER_VERSION)
239 {
240 *status = U_DIFFERENT_UCA_VERSION;
241 result = tryOpeningFromRules(collElem, status);
242 } else {
243 if(U_FAILURE(*status)){
244 goto clean;
245 }
246 if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
247 result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
248 if(U_FAILURE(*status)){
249 goto clean;
250 }
251 result->hasRealData = TRUE;
252 } else {
253 result = ucol_initCollator(UCA->image, result, UCA, status);
254 ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
255 if(U_FAILURE(*status)){
256 goto clean;
257 }
258 result->hasRealData = FALSE;
259 }
260 result->freeImageOnClose = FALSE;
261 }
262 }
263 intStatus = U_ZERO_ERROR;
264 result->rules = ures_getStringByKey(collElem, "Sequence", &result->rulesLength, &intStatus);
265 result->freeRulesOnClose = FALSE;
266 } else { /* There is another error, and we're just gonna clean up */
267 goto clean;
268 }
269
270 intStatus = U_ZERO_ERROR;
271 result->ucaRules = ures_getStringByKey(b,"UCARules",NULL,&intStatus);
272
273 if(loc == NULL) {
274 loc = ures_getLocale(b, status);
275 }
276 result->requestedLocale = uprv_strdup(loc);
277 /* test for NULL */
278 if (result->requestedLocale == NULL) {
279 *status = U_MEMORY_ALLOCATION_ERROR;
280 goto clean;
281 }
282 loc = ures_getLocale(collElem, status);
283 result->actualLocale = uprv_strdup(loc);
284 /* test for NULL */
285 if (result->actualLocale == NULL) {
286 *status = U_MEMORY_ALLOCATION_ERROR;
287 goto clean;
288 }
289 loc = ures_getLocale(b, status);
290 result->validLocale = uprv_strdup(loc);
291 /* test for NULL */
292 if (result->validLocale == NULL) {
293 *status = U_MEMORY_ALLOCATION_ERROR;
294 goto clean;
295 }
296
297 ures_close(b);
298 ures_close(collElem);
299 ures_close(binary);
300 return result;
301
302 clean:
303 ures_close(b);
304 ures_close(collElem);
305 ures_close(binary);
306 ucol_close(result);
307 return NULL;
308 }
309
310 U_CAPI UCollator*
311 ucol_open(const char *loc,
312 UErrorCode *status)
313 {
314 U_NAMESPACE_USE
315
316 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
317 UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
318 UCollator *result = NULL;
319
320 u_init(status);
321 #if !UCONFIG_NO_SERVICE
322 result = Collator::createUCollator(loc, status);
323 if (result == NULL)
324 #endif
325 {
326 result = ucol_open_internal(loc, status);
327 }
328 UTRACE_EXIT_PTR_STATUS(result, *status);
329 return result;
330 }
331
332 U_CAPI UCollator* U_EXPORT2
333 ucol_openRules( const UChar *rules,
334 int32_t rulesLength,
335 UColAttributeValue normalizationMode,
336 UCollationStrength strength,
337 UParseError *parseError,
338 UErrorCode *status)
339 {
340 UColTokenParser src;
341 UColAttributeValue norm;
342 UParseError tErr;
343
344 if(status == NULL || U_FAILURE(*status)){
345 return 0;
346 }
347
348 u_init(status);
349 if (U_FAILURE(*status)) {
350 return NULL;
351 }
352
353 if(rules == NULL || rulesLength < -1) {
354 *status = U_ILLEGAL_ARGUMENT_ERROR;
355 return 0;
356 }
357
358 if(rulesLength == -1) {
359 rulesLength = u_strlen(rules);
360 }
361
362 if(parseError == NULL){
363 parseError = &tErr;
364 }
365
366 switch(normalizationMode) {
367 case UCOL_OFF:
368 case UCOL_ON:
369 case UCOL_DEFAULT:
370 norm = normalizationMode;
371 break;
372 default:
373 *status = U_ILLEGAL_ARGUMENT_ERROR;
374 return 0;
375 }
376
377 UCollator *result = NULL;
378 UCATableHeader *table = NULL;
379 UCollator *UCA = ucol_initUCA(status);
380
381 if(U_FAILURE(*status)){
382 return NULL;
383 }
384
385 ucol_tok_initTokenList(&src, rules, rulesLength, UCA, status);
386 ucol_tok_assembleTokenList(&src,parseError, status);
387
388 if(U_FAILURE(*status)) {
389 /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */
390 /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */
391 /* so something might be done here... or on lower level */
392 #ifdef UCOL_DEBUG
393 if(*status == U_ILLEGAL_ARGUMENT_ERROR) {
394 fprintf(stderr, "bad option starting at offset %i\n", src.current-src.source);
395 } else {
396 fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source);
397 }
398 #endif
399 goto cleanup;
400 }
401
402 if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */
403 /* also, if we wanted to remove some contractions, we should make a tailoring */
404 table = ucol_assembleTailoringTable(&src, status);
405 if(U_SUCCESS(*status)) {
406 // builder version
407 table->version[0] = UCOL_BUILDER_VERSION;
408 // no tailoring information on this level
409 table->version[1] = table->version[2] = table->version[3] = 0;
410 // set UCD version
411 u_getUnicodeVersion(table->UCDVersion);
412 // set UCA version
413 uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo));
414 result = ucol_initCollator(table, 0, UCA, status);
415 if (U_FAILURE(*status)) {
416 goto cleanup;
417 }
418 result->hasRealData = TRUE;
419 result->freeImageOnClose = TRUE;
420 }
421 } else { /* no rules, but no error either */
422 // must be only options
423 // We will init the collator from UCA
424 result = ucol_initCollator(UCA->image, 0, UCA, status);
425 // Check for null result
426 if (U_FAILURE(*status)) {
427 goto cleanup;
428 }
429 // And set only the options
430 UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
431 /* test for NULL */
432 if (opts == NULL) {
433 *status = U_MEMORY_ALLOCATION_ERROR;
434 goto cleanup;
435 }
436 uprv_memcpy(opts, src.opts, sizeof(UColOptionSet));
437 ucol_setOptionsFromHeader(result, opts, status);
438 result->freeOptionsOnClose = TRUE;
439 result->hasRealData = FALSE;
440 result->freeImageOnClose = FALSE;
441 }
442
443 if(U_SUCCESS(*status)) {
444 UChar *newRules;
445 result->dataVersion[0] = UCOL_BUILDER_VERSION;
446 if(rulesLength > 0) {
447 newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR);
448 /* test for NULL */
449 if (newRules == NULL) {
450 *status = U_MEMORY_ALLOCATION_ERROR;
451 goto cleanup;
452 }
453 uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR);
454 newRules[rulesLength]=0;
455 result->rules = newRules;
456 result->rulesLength = rulesLength;
457 result->freeRulesOnClose = TRUE;
458 }
459 result->ucaRules = NULL;
460 result->actualLocale = NULL;
461 result->validLocale = NULL;
462 result->requestedLocale = NULL;
463 ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
464 ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
465 } else {
466 cleanup:
467 if(result != NULL) {
468 ucol_close(result);
469 } else {
470 if(table != NULL) {
471 uprv_free(table);
472 }
473 }
474 result = NULL;
475 }
476
477 ucol_tok_closeTokenList(&src);
478
479 return result;
480 }
481
482 U_CAPI int32_t U_EXPORT2
483 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
484 UErrorCode status = U_ZERO_ERROR;
485 int32_t len = 0;
486 int32_t UCAlen = 0;
487 const UChar* ucaRules = 0;
488 const UChar *rules = ucol_getRules(coll, &len);
489 if(delta == UCOL_FULL_RULES) {
490 /* take the UCA rules and append real rules at the end */
491 /* UCA rules will be probably coming from the root RB */
492 ucaRules = coll->ucaRules;
493 if (ucaRules) {
494 UCAlen = u_strlen(ucaRules);
495 }
496 /*
497 ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
498 UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
499 UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
500 ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
501 ures_close(uca);
502 ures_close(cresb);
503 */
504 }
505 if(U_FAILURE(status)) {
506 return 0;
507 }
508 if(buffer!=0 && bufferLen>0){
509 *buffer=0;
510 if(UCAlen > 0) {
511 u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen));
512 }
513 if(len > 0 && bufferLen > UCAlen) {
514 u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen));
515 }
516 }
517 return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status);
518 }
519
520 static const UChar _NUL = 0;
521
522 U_CAPI const UChar* U_EXPORT2
523 ucol_getRules( const UCollator *coll,
524 int32_t *length)
525 {
526 if(coll->rules != NULL) {
527 *length = coll->rulesLength;
528 return coll->rules;
529 }
530 else {
531 *length = 0;
532 return &_NUL;
533 }
534 }
535
536 U_CAPI UBool U_EXPORT2
537 ucol_equals(const UCollator *source, const UCollator *target) {
538 UErrorCode status = U_ZERO_ERROR;
539 // if pointers are equal, collators are equal
540 if(source == target) {
541 return TRUE;
542 }
543 int32_t i = 0, j = 0;
544 // if any of attributes are different, collators are not equal
545 for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
546 if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) {
547 return FALSE;
548 }
549 }
550
551 int32_t sourceRulesLen = 0, targetRulesLen = 0;
552 const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen);
553 const UChar *targetRules = ucol_getRules(target, &targetRulesLen);
554
555 if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) {
556 // all the attributes are equal and the rules are equal - collators are equal
557 return(TRUE);
558 }
559 // hard part, need to construct tree from rules and see if they yield the same tailoring
560 UBool result = TRUE;
561 UParseError parseError;
562 UColTokenParser sourceParser, targetParser;
563 int32_t sourceListLen = 0, targetListLen = 0;
564 ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status);
565 ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status);
566 sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status);
567 targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status);
568
569 if(sourceListLen != targetListLen) {
570 // different number of resets
571 result = FALSE;
572 } else {
573 UColToken *sourceReset = NULL, *targetReset = NULL;
574 UChar *sourceResetString = NULL, *targetResetString = NULL;
575 int32_t sourceStringLen = 0, targetStringLen = 0;
576 for(i = 0; i < sourceListLen; i++) {
577 sourceReset = sourceParser.lh[i].reset;
578 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
579 sourceStringLen = sourceReset->source >> 24;
580 for(j = 0; j < sourceListLen; j++) {
581 targetReset = targetParser.lh[j].reset;
582 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
583 targetStringLen = targetReset->source >> 24;
584 if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) {
585 sourceReset = sourceParser.lh[i].first;
586 targetReset = targetParser.lh[j].first;
587 while(sourceReset != NULL && targetReset != NULL) {
588 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
589 sourceStringLen = sourceReset->source >> 24;
590 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
591 targetStringLen = targetReset->source >> 24;
592 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
593 result = FALSE;
594 goto returnResult;
595 }
596 // probably also need to check the expansions
597 if(sourceReset->expansion) {
598 if(!targetReset->expansion) {
599 result = FALSE;
600 goto returnResult;
601 } else {
602 // compare expansions
603 sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF);
604 sourceStringLen = sourceReset->expansion >> 24;
605 targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF);
606 targetStringLen = targetReset->expansion >> 24;
607 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
608 result = FALSE;
609 goto returnResult;
610 }
611 }
612 } else {
613 if(targetReset->expansion) {
614 result = FALSE;
615 goto returnResult;
616 }
617 }
618 sourceReset = sourceReset->next;
619 targetReset = targetReset->next;
620 }
621 if(sourceReset != targetReset) { // at least one is not NULL
622 // there are more tailored elements in one list
623 result = FALSE;
624 goto returnResult;
625 }
626
627
628 break;
629 }
630 }
631 // couldn't find the reset anchor, so the collators are not equal
632 if(j == sourceListLen) {
633 result = FALSE;
634 goto returnResult;
635 }
636 }
637 }
638
639 returnResult:
640 ucol_tok_closeTokenList(&sourceParser);
641 ucol_tok_closeTokenList(&targetParser);
642 return result;
643
644 }
645
646 U_CAPI int32_t U_EXPORT2
647 ucol_getDisplayName( const char *objLoc,
648 const char *dispLoc,
649 UChar *result,
650 int32_t resultLength,
651 UErrorCode *status)
652 {
653 U_NAMESPACE_USE
654
655 if(U_FAILURE(*status)) return -1;
656 UnicodeString dst;
657 if(!(result==NULL && resultLength==0)) {
658 // NULL destination for pure preflighting: empty dummy string
659 // otherwise, alias the destination buffer
660 dst.setTo(result, 0, resultLength);
661 }
662 Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
663 return dst.extract(result, resultLength, *status);
664 }
665
666 U_CAPI const char* U_EXPORT2
667 ucol_getAvailable(int32_t index)
668 {
669 int32_t count = 0;
670 const Locale *loc = Collator::getAvailableLocales(count);
671 if (loc != NULL && index < count) {
672 return loc[index].getName();
673 }
674 return NULL;
675 }
676
677 U_CAPI int32_t U_EXPORT2
678 ucol_countAvailable()
679 {
680 int32_t count = 0;
681 Collator::getAvailableLocales(count);
682 return count;
683 }
684
685 #if !UCONFIG_NO_SERVICE
686 U_CAPI UEnumeration* U_EXPORT2
687 ucol_openAvailableLocales(UErrorCode *status) {
688 U_NAMESPACE_USE
689
690 // This is a wrapper over Collator::getAvailableLocales()
691 if (U_FAILURE(*status)) {
692 return NULL;
693 }
694 StringEnumeration *s = Collator::getAvailableLocales();
695 if (s == NULL) {
696 *status = U_MEMORY_ALLOCATION_ERROR;
697 return NULL;
698 }
699 return uenum_openStringEnumeration(s, status);
700 }
701 #endif
702
703 // Note: KEYWORDS[0] != RESOURCE_NAME - alan
704
705 static const char RESOURCE_NAME[] = "collations";
706
707 static const char* const KEYWORDS[] = { "collation" };
708
709 #define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0]))
710
711 U_CAPI UEnumeration* U_EXPORT2
712 ucol_getKeywords(UErrorCode *status) {
713 UEnumeration *result = NULL;
714 if (U_SUCCESS(*status)) {
715 return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
716 }
717 return result;
718 }
719
720 U_CAPI UEnumeration* U_EXPORT2
721 ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
722 if (U_FAILURE(*status)) {
723 return NULL;
724 }
725 // hard-coded to accept exactly one collation keyword
726 // modify if additional collation keyword is added later
727 if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0)
728 {
729 *status = U_ILLEGAL_ARGUMENT_ERROR;
730 return NULL;
731 }
732 return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
733 }
734
735 U_CAPI int32_t U_EXPORT2
736 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
737 const char* keyword, const char* locale,
738 UBool* isAvailable, UErrorCode* status)
739 {
740 // N.B.: Resource name is "collations" but keyword is "collation"
741 return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
742 "collations", keyword, locale,
743 isAvailable, TRUE, status);
744 }
745
746 /* returns the locale name the collation data comes from */
747 U_CAPI const char * U_EXPORT2
748 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
749 return ucol_getLocaleByType(coll, type, status);
750 }
751
752 U_CAPI const char * U_EXPORT2
753 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
754 const char *result = NULL;
755 if(status == NULL || U_FAILURE(*status)) {
756 return NULL;
757 }
758 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
759 UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
760
761 switch(type) {
762 case ULOC_ACTUAL_LOCALE:
763 result = coll->actualLocale;
764 break;
765 case ULOC_VALID_LOCALE:
766 result = coll->validLocale;
767 break;
768 case ULOC_REQUESTED_LOCALE:
769 result = coll->requestedLocale;
770 break;
771 default:
772 *status = U_ILLEGAL_ARGUMENT_ERROR;
773 }
774 UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
775 UTRACE_EXIT_STATUS(*status);
776 return result;
777 }
778
779 U_CFUNC void U_EXPORT2
780 ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt, char *actualLocaleToAdopt)
781 {
782 if (coll) {
783 if (coll->validLocale) {
784 uprv_free(coll->validLocale);
785 }
786 coll->validLocale = validLocaleToAdopt;
787 if (coll->requestedLocale) { // should always have
788 uprv_free(coll->requestedLocale);
789 }
790 coll->requestedLocale = requestedLocaleToAdopt;
791 if (coll->actualLocale) {
792 uprv_free(coll->actualLocale);
793 }
794 coll->actualLocale = actualLocaleToAdopt;
795 }
796 }
797
798 U_CAPI USet * U_EXPORT2
799 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status)
800 {
801 U_NAMESPACE_USE
802
803 if(status == NULL || U_FAILURE(*status)) {
804 return NULL;
805 }
806 if(coll == NULL || coll->UCA == NULL) {
807 *status = U_ILLEGAL_ARGUMENT_ERROR;
808 return NULL;
809 }
810 UParseError parseError;
811 UColTokenParser src;
812 int32_t rulesLen = 0;
813 const UChar *rules = ucol_getRules(coll, &rulesLen);
814 UBool startOfRules = TRUE;
815 // we internally use the C++ class, for the following reasons:
816 // 1. we need to utilize canonical iterator, which is a C++ only class
817 // 2. canonical iterator returns UnicodeStrings - USet cannot take them
818 // 3. USet is internally really UnicodeSet, C is just a wrapper
819 UnicodeSet *tailored = new UnicodeSet();
820 UnicodeString pattern;
821 UnicodeString empty;
822 CanonicalIterator it(empty, *status);
823
824
825 // The idea is to tokenize the rule set. For each non-reset token,
826 // we add all the canonicaly equivalent FCD sequences
827 ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, status);
828 while (ucol_tok_parseNextToken(&src, startOfRules, &parseError, status) != NULL) {
829 startOfRules = FALSE;
830 if(src.parsedToken.strength != UCOL_TOK_RESET) {
831 const UChar *stuff = src.source+(src.parsedToken.charsOffset);
832 it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status);
833 pattern = it.next();
834 while(!pattern.isBogus()) {
835 if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) {
836 tailored->add(pattern);
837 }
838 pattern = it.next();
839 }
840 }
841 }
842 ucol_tok_closeTokenList(&src);
843 return (USet *)tailored;
844 }
845
846 #endif /* #if !UCONFIG_NO_COLLATION */