]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/ucol_res.cpp
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_res.cpp
CommitLineData
73c04bcf
A
1/*
2*******************************************************************************
3* Copyright (C) 1996-2006, International Business Machines
4* Corporation and others. All Rights Reserved.
5*******************************************************************************
6* file name: ucol_res.cpp
7* encoding: US-ASCII
8* tab size: 8 (not used)
9* indentation:4
10*
11* Description:
12* This file contains dependencies that the collation run-time doesn't normally
13* need. This mainly contains resource bundle usage and collation meta information
14*
15* Modification history
16* Date Name Comments
17* 1996-1999 various members of ICU team maintained C API for collation framework
18* 02/16/2001 synwee Added internal method getPrevSpecialCE
19* 03/01/2001 synwee Added maxexpansion functionality.
20* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
21* 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp
22*/
23
24#include "unicode/utypes.h"
25
26#if !UCONFIG_NO_COLLATION
27#include "unicode/uloc.h"
28#include "unicode/coll.h"
29#include "unicode/tblcoll.h"
30#include "unicode/caniter.h"
31#include "unicode/ustring.h"
32
33#include "ucol_bld.h"
34#include "ucol_imp.h"
35#include "ucol_tok.h"
36#include "ucol_elm.h"
37#include "uresimp.h"
38#include "ustr_imp.h"
39#include "cstring.h"
40#include "umutex.h"
41#include "ustrenum.h"
42#include "putilimp.h"
43#include "utracimp.h"
44#include "cmemory.h"
45
46U_CDECL_BEGIN
47static void U_CALLCONV
48ucol_prv_closeResources(UCollator *coll) {
49 if(coll->rb != NULL) { /* pointing to read-only memory */
50 ures_close(coll->rb);
51 }
52 if(coll->elements != NULL) {
53 ures_close(coll->elements);
54 }
55}
56U_CDECL_END
57
58/****************************************************************************/
59/* Following are the open/close functions */
60/* */
61/****************************************************************************/
62static UCollator*
63tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
64 int32_t rulesLen = 0;
65 const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status);
66 return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
67
68}
69
70
71// API in ucol_imp.h
72
73U_CFUNC UCollator*
74ucol_open_internal(const char *loc,
75 UErrorCode *status)
76{
77 const UCollator* UCA = ucol_initUCA(status);
78
79 /* New version */
80 if(U_FAILURE(*status)) return 0;
81
82
83
84 UCollator *result = NULL;
85 UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
86
87 /* we try to find stuff from keyword */
88 UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
89 UResourceBundle *collElem = NULL;
90 char keyBuffer[256];
91 // if there is a keyword, we pick it up and try to get elements
92 if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) {
93 // no keyword. we try to find the default setting, which will give us the keyword value
94 UErrorCode intStatus = U_ZERO_ERROR;
95 // finding default value does not affect collation fallback status
96 UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus);
97 if(U_SUCCESS(intStatus)) {
98 int32_t defaultKeyLen = 0;
99 const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus);
100 u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
101 keyBuffer[defaultKeyLen] = 0;
102 } else {
103 *status = U_INTERNAL_PROGRAM_ERROR;
104 return NULL;
105 }
106 ures_close(defaultColl);
107 }
108 collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status);
109
110 UResourceBundle *binary = NULL;
111
112 if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
113 *status = U_USING_DEFAULT_WARNING;
114 result = ucol_initCollator(UCA->image, result, UCA, status);
115 // if we use UCA, real locale is root
116 result->rb = ures_open(U_ICUDATA_COLL, "", status);
117 result->elements = ures_open(U_ICUDATA_COLL, "", status);
118 if(U_FAILURE(*status)) {
119 goto clean;
120 }
121 ures_close(b);
122 result->hasRealData = FALSE;
123 } else if(U_SUCCESS(*status)) {
124 int32_t len = 0;
125 UErrorCode binaryStatus = U_ZERO_ERROR;
126
127 binary = ures_getByKey(collElem, "%%CollationBin", NULL, &binaryStatus);
128
129 if(binaryStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
130 binary = NULL;
131 result = tryOpeningFromRules(collElem, status);
132 if(U_FAILURE(*status)) {
133 goto clean;
134 }
135 } else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */
136 const uint8_t *inData = ures_getBinary(binary, &len, status);
137 UCATableHeader *colData = (UCATableHeader *)inData;
138 if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
139 uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
140 colData->version[0] != UCOL_BUILDER_VERSION)
141 {
142 *status = U_DIFFERENT_UCA_VERSION;
143 result = tryOpeningFromRules(collElem, status);
144 } else {
145 if(U_FAILURE(*status)){
146 goto clean;
147 }
148 if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
149 result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
150 if(U_FAILURE(*status)){
151 goto clean;
152 }
153 result->hasRealData = TRUE;
154 } else {
155 result = ucol_initCollator(UCA->image, result, UCA, status);
156 ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
157 if(U_FAILURE(*status)){
158 goto clean;
159 }
160 result->hasRealData = FALSE;
161 }
162 result->freeImageOnClose = FALSE;
163 }
164 }
165 result->rb = b;
166 result->elements = collElem;
167 len = 0;
168 binaryStatus = U_ZERO_ERROR;
169 result->rules = ures_getStringByKey(result->elements, "Sequence", &len, &binaryStatus);
170 result->rulesLength = len;
171 result->freeRulesOnClose = FALSE;
172 } else { /* There is another error, and we're just gonna clean up */
173 goto clean;
174 }
175
176 result->validLocale = NULL; // default is to use rb info
177
178 if(loc == NULL) {
179 loc = ures_getLocale(result->rb, status);
180 }
181 result->requestedLocale = (char *)uprv_malloc((uprv_strlen(loc)+1)*sizeof(char));
182 /* test for NULL */
183 if (result->requestedLocale == NULL) {
184 *status = U_MEMORY_ALLOCATION_ERROR;
185 goto clean;
186 }
187 uprv_strcpy(result->requestedLocale, loc);
188
189 ures_close(binary);
190 ures_close(collations); //??? we have to decide on that. Probably affects something :)
191 result->resCleaner = ucol_prv_closeResources;
192 return result;
193
194clean:
195 ures_close(b);
196 ures_close(collElem);
197 ures_close(collations);
198 ures_close(binary);
199 return NULL;
200}
201
202U_CAPI UCollator*
203ucol_open(const char *loc,
204 UErrorCode *status)
205{
206 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
207 UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
208 UCollator *result = NULL;
209
210 u_init(status);
211#if !UCONFIG_NO_SERVICE
212 result = Collator::createUCollator(loc, status);
213 if (result == NULL)
214#endif
215 {
216 result = ucol_open_internal(loc, status);
217 }
218 UTRACE_EXIT_PTR_STATUS(result, *status);
219 return result;
220}
221
222U_CAPI UCollator* U_EXPORT2
223ucol_openRules( const UChar *rules,
224 int32_t rulesLength,
225 UColAttributeValue normalizationMode,
226 UCollationStrength strength,
227 UParseError *parseError,
228 UErrorCode *status)
229{
230 uint32_t listLen = 0;
231 UColTokenParser src;
232 UColAttributeValue norm;
233 UParseError tErr;
234
235 if(status == NULL || U_FAILURE(*status)){
236 return 0;
237 }
238
239 u_init(status);
240 if (U_FAILURE(*status)) {
241 return NULL;
242 }
243
244 if(rules == NULL || rulesLength < -1) {
245 *status = U_ILLEGAL_ARGUMENT_ERROR;
246 return 0;
247 }
248
249 if(rulesLength == -1) {
250 rulesLength = u_strlen(rules);
251 }
252
253 if(parseError == NULL){
254 parseError = &tErr;
255 }
256
257 switch(normalizationMode) {
258 case UCOL_OFF:
259 case UCOL_ON:
260 case UCOL_DEFAULT:
261 norm = normalizationMode;
262 break;
263 default:
264 *status = U_ILLEGAL_ARGUMENT_ERROR;
265 return 0;
266 }
267
268 UCollator *UCA = ucol_initUCA(status);
269
270 if(U_FAILURE(*status)){
271 return NULL;
272 }
273
274 ucol_tok_initTokenList(&src, rules, rulesLength, UCA, status);
275 listLen = ucol_tok_assembleTokenList(&src,parseError, status);
276
277 if(U_FAILURE(*status)) {
278 /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */
279 /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */
280 /* so something might be done here... or on lower level */
281#ifdef UCOL_DEBUG
282 if(*status == U_ILLEGAL_ARGUMENT_ERROR) {
283 fprintf(stderr, "bad option starting at offset %i\n", src.current-src.source);
284 } else {
285 fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source);
286 }
287#endif
288 ucol_tok_closeTokenList(&src);
289 return NULL;
290 }
291 UCollator *result = NULL;
292 UCATableHeader *table = NULL;
293
294 if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */
295 /* also, if we wanted to remove some contractions, we should make a tailoring */
296 table = ucol_assembleTailoringTable(&src, status);
297 if(U_SUCCESS(*status)) {
298 // builder version
299 table->version[0] = UCOL_BUILDER_VERSION;
300 // no tailoring information on this level
301 table->version[1] = table->version[2] = table->version[3] = 0;
302 // set UCD version
303 u_getUnicodeVersion(table->UCDVersion);
304 // set UCA version
305 uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo));
306 result = ucol_initCollator(table, 0, UCA, status);
307 result->hasRealData = TRUE;
308 result->freeImageOnClose = TRUE;
309 }
310 } else { /* no rules, but no error either */
311 // must be only options
312 // We will init the collator from UCA
313 result = ucol_initCollator(UCA->image, 0, UCA, status);
314 // And set only the options
315 UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
316 /* test for NULL */
317 if (opts == NULL) {
318 *status = U_MEMORY_ALLOCATION_ERROR;
319 goto cleanup;
320 }
321 uprv_memcpy(opts, src.opts, sizeof(UColOptionSet));
322 ucol_setOptionsFromHeader(result, opts, status);
323 result->freeOptionsOnClose = TRUE;
324 result->hasRealData = FALSE;
325 result->freeImageOnClose = FALSE;
326 }
327
328 if(U_SUCCESS(*status)) {
329 UChar *newRules;
330 result->dataVersion[0] = UCOL_BUILDER_VERSION;
331 if(rulesLength > 0) {
332 newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR);
333 /* test for NULL */
334 if (newRules == NULL) {
335 *status = U_MEMORY_ALLOCATION_ERROR;
336 goto cleanup;
337 }
338 uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR);
339 newRules[rulesLength]=0;
340 result->rules = newRules;
341 result->rulesLength = rulesLength;
342 result->freeRulesOnClose = TRUE;
343 }
344 result->rb = NULL;
345 result->elements = NULL;
346 result->validLocale = NULL;
347 result->requestedLocale = NULL;
348 ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
349 ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
350 } else {
351cleanup:
352 if(result != NULL) {
353 ucol_close(result);
354 } else {
355 if(table != NULL) {
356 uprv_free(table);
357 }
358 }
359 result = NULL;
360 }
361
362 ucol_tok_closeTokenList(&src);
363
364 return result;
365}
366
367U_CAPI int32_t U_EXPORT2
368ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
369 UErrorCode status = U_ZERO_ERROR;
370 int32_t len = 0;
371 int32_t UCAlen = 0;
372 const UChar* ucaRules = 0;
373 const UChar *rules = ucol_getRules(coll, &len);
374 if(delta == UCOL_FULL_RULES) {
375 /* take the UCA rules and append real rules at the end */
376 /* UCA rules will be probably coming from the root RB */
377 ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
378 /*
379 UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
380 UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
381 ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
382 ures_close(uca);
383 ures_close(cresb);
384 */
385 }
386 if(U_FAILURE(status)) {
387 return 0;
388 }
389 if(buffer!=0 && bufferLen>0){
390 *buffer=0;
391 if(UCAlen > 0) {
392 u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen));
393 }
394 if(len > 0 && bufferLen > UCAlen) {
395 u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen));
396 }
397 }
398 return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status);
399}
400
401static const UChar _NUL = 0;
402
403U_CAPI const UChar* U_EXPORT2
404ucol_getRules( const UCollator *coll,
405 int32_t *length)
406{
407 if(coll->rules != NULL) {
408 *length = coll->rulesLength;
409 return coll->rules;
410 }
411 else {
412 *length = 0;
413 return &_NUL;
414 }
415}
416
417U_CAPI UBool U_EXPORT2
418ucol_equals(const UCollator *source, const UCollator *target) {
419 UErrorCode status = U_ZERO_ERROR;
420 // if pointers are equal, collators are equal
421 if(source == target) {
422 return TRUE;
423 }
424 int32_t i = 0, j = 0;
425 // if any of attributes are different, collators are not equal
426 for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
427 if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) {
428 return FALSE;
429 }
430 }
431
432 int32_t sourceRulesLen = 0, targetRulesLen = 0;
433 const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen);
434 const UChar *targetRules = ucol_getRules(target, &targetRulesLen);
435
436 if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) {
437 // all the attributes are equal and the rules are equal - collators are equal
438 return(TRUE);
439 }
440 // hard part, need to construct tree from rules and see if they yield the same tailoring
441 UBool result = TRUE;
442 UParseError parseError;
443 UColTokenParser sourceParser, targetParser;
444 int32_t sourceListLen = 0, targetListLen = 0;
445 ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status);
446 ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status);
447 sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status);
448 targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status);
449
450 if(sourceListLen != targetListLen) {
451 // different number of resets
452 result = FALSE;
453 } else {
454 UColToken *sourceReset = NULL, *targetReset = NULL;
455 UChar *sourceResetString = NULL, *targetResetString = NULL;
456 int32_t sourceStringLen = 0, targetStringLen = 0;
457 for(i = 0; i < sourceListLen; i++) {
458 sourceReset = sourceParser.lh[i].reset;
459 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
460 sourceStringLen = sourceReset->source >> 24;
461 for(j = 0; j < sourceListLen; j++) {
462 targetReset = targetParser.lh[j].reset;
463 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
464 targetStringLen = targetReset->source >> 24;
465 if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) {
466 sourceReset = sourceParser.lh[i].first;
467 targetReset = targetParser.lh[j].first;
468 while(sourceReset != NULL && targetReset != NULL) {
469 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
470 sourceStringLen = sourceReset->source >> 24;
471 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
472 targetStringLen = targetReset->source >> 24;
473 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
474 result = FALSE;
475 goto returnResult;
476 }
477 // probably also need to check the expansions
478 if(sourceReset->expansion) {
479 if(!targetReset->expansion) {
480 result = FALSE;
481 goto returnResult;
482 } else {
483 // compare expansions
484 sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF);
485 sourceStringLen = sourceReset->expansion >> 24;
486 targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF);
487 targetStringLen = targetReset->expansion >> 24;
488 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
489 result = FALSE;
490 goto returnResult;
491 }
492 }
493 } else {
494 if(targetReset->expansion) {
495 result = FALSE;
496 goto returnResult;
497 }
498 }
499 sourceReset = sourceReset->next;
500 targetReset = targetReset->next;
501 }
502 if(sourceReset != targetReset) { // at least one is not NULL
503 // there are more tailored elements in one list
504 result = FALSE;
505 goto returnResult;
506 }
507
508
509 break;
510 }
511 }
512 // couldn't find the reset anchor, so the collators are not equal
513 if(j == sourceListLen) {
514 result = FALSE;
515 goto returnResult;
516 }
517 }
518 }
519
520returnResult:
521 ucol_tok_closeTokenList(&sourceParser);
522 ucol_tok_closeTokenList(&targetParser);
523 return result;
524
525}
526
527U_CAPI int32_t U_EXPORT2
528ucol_getDisplayName( const char *objLoc,
529 const char *dispLoc,
530 UChar *result,
531 int32_t resultLength,
532 UErrorCode *status)
533{
534
535 if(U_FAILURE(*status)) return -1;
536 UnicodeString dst;
537 if(!(result==NULL && resultLength==0)) {
538 // NULL destination for pure preflighting: empty dummy string
539 // otherwise, alias the destination buffer
540 dst.setTo(result, 0, resultLength);
541 }
542 Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
543 return dst.extract(result, resultLength, *status);
544}
545
546U_CAPI const char* U_EXPORT2
547ucol_getAvailable(int32_t index)
548{
549 return uloc_getAvailable(index);
550}
551
552U_CAPI int32_t U_EXPORT2
553ucol_countAvailable()
554{
555 return uloc_countAvailable();
556}
557
558#if !UCONFIG_NO_SERVICE
559U_CAPI UEnumeration* U_EXPORT2
560ucol_openAvailableLocales(UErrorCode *status) {
561 // This is a wrapper over Collator::getAvailableLocales()
562 if (U_FAILURE(*status)) {
563 return NULL;
564 }
565 StringEnumeration *s = Collator::getAvailableLocales();
566 if (s == NULL) {
567 *status = U_MEMORY_ALLOCATION_ERROR;
568 return NULL;
569 }
570 return uenum_openStringEnumeration(s, status);
571}
572#endif
573
574// Note: KEYWORDS[0] != RESOURCE_NAME - alan
575
576static const char* RESOURCE_NAME = "collations";
577
578static const char* KEYWORDS[] = { "collation" };
579
580#define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0]))
581
582U_CAPI UEnumeration* U_EXPORT2
583ucol_getKeywords(UErrorCode *status) {
584 UEnumeration *result = NULL;
585 if (U_SUCCESS(*status)) {
586 return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
587 }
588 return result;
589}
590
591U_CAPI UEnumeration* U_EXPORT2
592ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
593 // hard-coded to accept exactly one collation keyword
594 // modify if additional collation keyword is added later
595 if (U_SUCCESS(*status) &&
596 keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0) {
597 *status = U_ILLEGAL_ARGUMENT_ERROR;
598 return NULL;
599 }
600 return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
601}
602
603U_CAPI int32_t U_EXPORT2
604ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
605 const char* keyword, const char* locale,
606 UBool* isAvailable, UErrorCode* status) {
607 // N.B.: Resource name is "collations" but keyword is "collation"
608 return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
609 "collations", keyword, locale,
610 isAvailable, TRUE, status);
611}
612
613/* returns the locale name the collation data comes from */
614U_CAPI const char * U_EXPORT2
615ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
616 return ucol_getLocaleByType(coll, type, status);
617}
618
619U_CAPI const char * U_EXPORT2
620ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
621 const char *result = NULL;
622 if(status == NULL || U_FAILURE(*status)) {
623 return NULL;
624 }
625 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
626 UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
627
628 switch(type) {
629 case ULOC_ACTUAL_LOCALE:
630 // validLocale is set only if service registration has explicitly set the
631 // requested and valid locales. if this is the case, the actual locale
632 // is considered to be the valid locale.
633 if (coll->validLocale != NULL) {
634 result = coll->validLocale;
635 } else if(coll->elements != NULL) {
636 result = ures_getLocale(coll->elements, status);
637 }
638 break;
639 case ULOC_VALID_LOCALE:
640 if (coll->validLocale != NULL) {
641 result = coll->validLocale;
642 } else if(coll->rb != NULL) {
643 result = ures_getLocale(coll->rb, status);
644 }
645 break;
646 case ULOC_REQUESTED_LOCALE:
647 result = coll->requestedLocale;
648 break;
649 default:
650 *status = U_ILLEGAL_ARGUMENT_ERROR;
651 }
652 UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
653 UTRACE_EXIT_STATUS(*status);
654 return result;
655}
656
657U_CAPI void U_EXPORT2
658ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt)
659{
660 if (coll) {
661 if (coll->validLocale) {
662 uprv_free(coll->validLocale);
663 }
664 coll->validLocale = validLocaleToAdopt;
665 if (coll->requestedLocale) { // should always have
666 uprv_free(coll->requestedLocale);
667 }
668 coll->requestedLocale = requestedLocaleToAdopt;
669 }
670}
671
672U_CAPI USet * U_EXPORT2
673ucol_getTailoredSet(const UCollator *coll, UErrorCode *status)
674{
675 if(status == NULL || U_FAILURE(*status)) {
676 return NULL;
677 }
678 if(coll == NULL || coll->UCA == NULL) {
679 *status = U_ILLEGAL_ARGUMENT_ERROR;
680 return NULL;
681 }
682 UParseError parseError;
683 UColTokenParser src;
684 int32_t rulesLen = 0;
685 const UChar *rules = ucol_getRules(coll, &rulesLen);
686 const UChar *current = NULL;
687 UBool startOfRules = TRUE;
688 // we internally use the C++ class, for the following reasons:
689 // 1. we need to utilize canonical iterator, which is a C++ only class
690 // 2. canonical iterator returns UnicodeStrings - USet cannot take them
691 // 3. USet is internally really UnicodeSet, C is just a wrapper
692 UnicodeSet *tailored = new UnicodeSet();
693 UnicodeString pattern;
694 UnicodeString empty;
695 CanonicalIterator it(empty, *status);
696
697
698 // The idea is to tokenize the rule set. For each non-reset token,
699 // we add all the canonicaly equivalent FCD sequences
700 ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, status);
701 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError, status)) != NULL) {
702 startOfRules = FALSE;
703 if(src.parsedToken.strength != UCOL_TOK_RESET) {
704 const UChar *stuff = src.source+(src.parsedToken.charsOffset);
705 it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status);
706 pattern = it.next();
707 while(!pattern.isBogus()) {
708 if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) {
709 tailored->add(pattern);
710 }
711 pattern = it.next();
712 }
713 }
714 }
715 ucol_tok_closeTokenList(&src);
716 return (USet *)tailored;
717}
718
719#endif /* #if !UCONFIG_NO_COLLATION */