]>
Commit | Line | Data |
---|---|---|
73c04bcf A |
1 | /* |
2 | ******************************************************************************* | |
3 | * Copyright (C) 1996-2006, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ******************************************************************************* | |
6 | * file name: ucol_res.cpp | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * Description: | |
12 | * This file contains dependencies that the collation run-time doesn't normally | |
13 | * need. This mainly contains resource bundle usage and collation meta information | |
14 | * | |
15 | * Modification history | |
16 | * Date Name Comments | |
17 | * 1996-1999 various members of ICU team maintained C API for collation framework | |
18 | * 02/16/2001 synwee Added internal method getPrevSpecialCE | |
19 | * 03/01/2001 synwee Added maxexpansion functionality. | |
20 | * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant | |
21 | * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp | |
22 | */ | |
23 | ||
24 | #include "unicode/utypes.h" | |
25 | ||
26 | #if !UCONFIG_NO_COLLATION | |
27 | #include "unicode/uloc.h" | |
28 | #include "unicode/coll.h" | |
29 | #include "unicode/tblcoll.h" | |
30 | #include "unicode/caniter.h" | |
31 | #include "unicode/ustring.h" | |
32 | ||
33 | #include "ucol_bld.h" | |
34 | #include "ucol_imp.h" | |
35 | #include "ucol_tok.h" | |
36 | #include "ucol_elm.h" | |
37 | #include "uresimp.h" | |
38 | #include "ustr_imp.h" | |
39 | #include "cstring.h" | |
40 | #include "umutex.h" | |
41 | #include "ustrenum.h" | |
42 | #include "putilimp.h" | |
43 | #include "utracimp.h" | |
44 | #include "cmemory.h" | |
45 | ||
46 | U_CDECL_BEGIN | |
47 | static void U_CALLCONV | |
48 | ucol_prv_closeResources(UCollator *coll) { | |
49 | if(coll->rb != NULL) { /* pointing to read-only memory */ | |
50 | ures_close(coll->rb); | |
51 | } | |
52 | if(coll->elements != NULL) { | |
53 | ures_close(coll->elements); | |
54 | } | |
55 | } | |
56 | U_CDECL_END | |
57 | ||
58 | /****************************************************************************/ | |
59 | /* Following are the open/close functions */ | |
60 | /* */ | |
61 | /****************************************************************************/ | |
62 | static UCollator* | |
63 | tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) { | |
64 | int32_t rulesLen = 0; | |
65 | const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status); | |
66 | return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status); | |
67 | ||
68 | } | |
69 | ||
70 | ||
71 | // API in ucol_imp.h | |
72 | ||
73 | U_CFUNC UCollator* | |
74 | ucol_open_internal(const char *loc, | |
75 | UErrorCode *status) | |
76 | { | |
77 | const UCollator* UCA = ucol_initUCA(status); | |
78 | ||
79 | /* New version */ | |
80 | if(U_FAILURE(*status)) return 0; | |
81 | ||
82 | ||
83 | ||
84 | UCollator *result = NULL; | |
85 | UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status); | |
86 | ||
87 | /* we try to find stuff from keyword */ | |
88 | UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status); | |
89 | UResourceBundle *collElem = NULL; | |
90 | char keyBuffer[256]; | |
91 | // if there is a keyword, we pick it up and try to get elements | |
92 | if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) { | |
93 | // no keyword. we try to find the default setting, which will give us the keyword value | |
94 | UErrorCode intStatus = U_ZERO_ERROR; | |
95 | // finding default value does not affect collation fallback status | |
96 | UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus); | |
97 | if(U_SUCCESS(intStatus)) { | |
98 | int32_t defaultKeyLen = 0; | |
99 | const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus); | |
100 | u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen); | |
101 | keyBuffer[defaultKeyLen] = 0; | |
102 | } else { | |
103 | *status = U_INTERNAL_PROGRAM_ERROR; | |
104 | return NULL; | |
105 | } | |
106 | ures_close(defaultColl); | |
107 | } | |
108 | collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status); | |
109 | ||
110 | UResourceBundle *binary = NULL; | |
111 | ||
112 | if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */ | |
113 | *status = U_USING_DEFAULT_WARNING; | |
114 | result = ucol_initCollator(UCA->image, result, UCA, status); | |
115 | // if we use UCA, real locale is root | |
116 | result->rb = ures_open(U_ICUDATA_COLL, "", status); | |
117 | result->elements = ures_open(U_ICUDATA_COLL, "", status); | |
118 | if(U_FAILURE(*status)) { | |
119 | goto clean; | |
120 | } | |
121 | ures_close(b); | |
122 | result->hasRealData = FALSE; | |
123 | } else if(U_SUCCESS(*status)) { | |
124 | int32_t len = 0; | |
125 | UErrorCode binaryStatus = U_ZERO_ERROR; | |
126 | ||
127 | binary = ures_getByKey(collElem, "%%CollationBin", NULL, &binaryStatus); | |
128 | ||
129 | if(binaryStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */ | |
130 | binary = NULL; | |
131 | result = tryOpeningFromRules(collElem, status); | |
132 | if(U_FAILURE(*status)) { | |
133 | goto clean; | |
134 | } | |
135 | } else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */ | |
136 | const uint8_t *inData = ures_getBinary(binary, &len, status); | |
137 | UCATableHeader *colData = (UCATableHeader *)inData; | |
138 | if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 || | |
139 | uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 || | |
140 | colData->version[0] != UCOL_BUILDER_VERSION) | |
141 | { | |
142 | *status = U_DIFFERENT_UCA_VERSION; | |
143 | result = tryOpeningFromRules(collElem, status); | |
144 | } else { | |
145 | if(U_FAILURE(*status)){ | |
146 | goto clean; | |
147 | } | |
148 | if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) { | |
149 | result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status); | |
150 | if(U_FAILURE(*status)){ | |
151 | goto clean; | |
152 | } | |
153 | result->hasRealData = TRUE; | |
154 | } else { | |
155 | result = ucol_initCollator(UCA->image, result, UCA, status); | |
156 | ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status); | |
157 | if(U_FAILURE(*status)){ | |
158 | goto clean; | |
159 | } | |
160 | result->hasRealData = FALSE; | |
161 | } | |
162 | result->freeImageOnClose = FALSE; | |
163 | } | |
164 | } | |
165 | result->rb = b; | |
166 | result->elements = collElem; | |
167 | len = 0; | |
168 | binaryStatus = U_ZERO_ERROR; | |
169 | result->rules = ures_getStringByKey(result->elements, "Sequence", &len, &binaryStatus); | |
170 | result->rulesLength = len; | |
171 | result->freeRulesOnClose = FALSE; | |
172 | } else { /* There is another error, and we're just gonna clean up */ | |
173 | goto clean; | |
174 | } | |
175 | ||
176 | result->validLocale = NULL; // default is to use rb info | |
177 | ||
178 | if(loc == NULL) { | |
179 | loc = ures_getLocale(result->rb, status); | |
180 | } | |
181 | result->requestedLocale = (char *)uprv_malloc((uprv_strlen(loc)+1)*sizeof(char)); | |
182 | /* test for NULL */ | |
183 | if (result->requestedLocale == NULL) { | |
184 | *status = U_MEMORY_ALLOCATION_ERROR; | |
185 | goto clean; | |
186 | } | |
187 | uprv_strcpy(result->requestedLocale, loc); | |
188 | ||
189 | ures_close(binary); | |
190 | ures_close(collations); //??? we have to decide on that. Probably affects something :) | |
191 | result->resCleaner = ucol_prv_closeResources; | |
192 | return result; | |
193 | ||
194 | clean: | |
195 | ures_close(b); | |
196 | ures_close(collElem); | |
197 | ures_close(collations); | |
198 | ures_close(binary); | |
199 | return NULL; | |
200 | } | |
201 | ||
202 | U_CAPI UCollator* | |
203 | ucol_open(const char *loc, | |
204 | UErrorCode *status) | |
205 | { | |
206 | UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN); | |
207 | UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc); | |
208 | UCollator *result = NULL; | |
209 | ||
210 | u_init(status); | |
211 | #if !UCONFIG_NO_SERVICE | |
212 | result = Collator::createUCollator(loc, status); | |
213 | if (result == NULL) | |
214 | #endif | |
215 | { | |
216 | result = ucol_open_internal(loc, status); | |
217 | } | |
218 | UTRACE_EXIT_PTR_STATUS(result, *status); | |
219 | return result; | |
220 | } | |
221 | ||
222 | U_CAPI UCollator* U_EXPORT2 | |
223 | ucol_openRules( const UChar *rules, | |
224 | int32_t rulesLength, | |
225 | UColAttributeValue normalizationMode, | |
226 | UCollationStrength strength, | |
227 | UParseError *parseError, | |
228 | UErrorCode *status) | |
229 | { | |
230 | uint32_t listLen = 0; | |
231 | UColTokenParser src; | |
232 | UColAttributeValue norm; | |
233 | UParseError tErr; | |
234 | ||
235 | if(status == NULL || U_FAILURE(*status)){ | |
236 | return 0; | |
237 | } | |
238 | ||
239 | u_init(status); | |
240 | if (U_FAILURE(*status)) { | |
241 | return NULL; | |
242 | } | |
243 | ||
244 | if(rules == NULL || rulesLength < -1) { | |
245 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
246 | return 0; | |
247 | } | |
248 | ||
249 | if(rulesLength == -1) { | |
250 | rulesLength = u_strlen(rules); | |
251 | } | |
252 | ||
253 | if(parseError == NULL){ | |
254 | parseError = &tErr; | |
255 | } | |
256 | ||
257 | switch(normalizationMode) { | |
258 | case UCOL_OFF: | |
259 | case UCOL_ON: | |
260 | case UCOL_DEFAULT: | |
261 | norm = normalizationMode; | |
262 | break; | |
263 | default: | |
264 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
265 | return 0; | |
266 | } | |
267 | ||
268 | UCollator *UCA = ucol_initUCA(status); | |
269 | ||
270 | if(U_FAILURE(*status)){ | |
271 | return NULL; | |
272 | } | |
273 | ||
274 | ucol_tok_initTokenList(&src, rules, rulesLength, UCA, status); | |
275 | listLen = ucol_tok_assembleTokenList(&src,parseError, status); | |
276 | ||
277 | if(U_FAILURE(*status)) { | |
278 | /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */ | |
279 | /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */ | |
280 | /* so something might be done here... or on lower level */ | |
281 | #ifdef UCOL_DEBUG | |
282 | if(*status == U_ILLEGAL_ARGUMENT_ERROR) { | |
283 | fprintf(stderr, "bad option starting at offset %i\n", src.current-src.source); | |
284 | } else { | |
285 | fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source); | |
286 | } | |
287 | #endif | |
288 | ucol_tok_closeTokenList(&src); | |
289 | return NULL; | |
290 | } | |
291 | UCollator *result = NULL; | |
292 | UCATableHeader *table = NULL; | |
293 | ||
294 | if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */ | |
295 | /* also, if we wanted to remove some contractions, we should make a tailoring */ | |
296 | table = ucol_assembleTailoringTable(&src, status); | |
297 | if(U_SUCCESS(*status)) { | |
298 | // builder version | |
299 | table->version[0] = UCOL_BUILDER_VERSION; | |
300 | // no tailoring information on this level | |
301 | table->version[1] = table->version[2] = table->version[3] = 0; | |
302 | // set UCD version | |
303 | u_getUnicodeVersion(table->UCDVersion); | |
304 | // set UCA version | |
305 | uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)); | |
306 | result = ucol_initCollator(table, 0, UCA, status); | |
307 | result->hasRealData = TRUE; | |
308 | result->freeImageOnClose = TRUE; | |
309 | } | |
310 | } else { /* no rules, but no error either */ | |
311 | // must be only options | |
312 | // We will init the collator from UCA | |
313 | result = ucol_initCollator(UCA->image, 0, UCA, status); | |
314 | // And set only the options | |
315 | UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet)); | |
316 | /* test for NULL */ | |
317 | if (opts == NULL) { | |
318 | *status = U_MEMORY_ALLOCATION_ERROR; | |
319 | goto cleanup; | |
320 | } | |
321 | uprv_memcpy(opts, src.opts, sizeof(UColOptionSet)); | |
322 | ucol_setOptionsFromHeader(result, opts, status); | |
323 | result->freeOptionsOnClose = TRUE; | |
324 | result->hasRealData = FALSE; | |
325 | result->freeImageOnClose = FALSE; | |
326 | } | |
327 | ||
328 | if(U_SUCCESS(*status)) { | |
329 | UChar *newRules; | |
330 | result->dataVersion[0] = UCOL_BUILDER_VERSION; | |
331 | if(rulesLength > 0) { | |
332 | newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR); | |
333 | /* test for NULL */ | |
334 | if (newRules == NULL) { | |
335 | *status = U_MEMORY_ALLOCATION_ERROR; | |
336 | goto cleanup; | |
337 | } | |
338 | uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR); | |
339 | newRules[rulesLength]=0; | |
340 | result->rules = newRules; | |
341 | result->rulesLength = rulesLength; | |
342 | result->freeRulesOnClose = TRUE; | |
343 | } | |
344 | result->rb = NULL; | |
345 | result->elements = NULL; | |
346 | result->validLocale = NULL; | |
347 | result->requestedLocale = NULL; | |
348 | ucol_setAttribute(result, UCOL_STRENGTH, strength, status); | |
349 | ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status); | |
350 | } else { | |
351 | cleanup: | |
352 | if(result != NULL) { | |
353 | ucol_close(result); | |
354 | } else { | |
355 | if(table != NULL) { | |
356 | uprv_free(table); | |
357 | } | |
358 | } | |
359 | result = NULL; | |
360 | } | |
361 | ||
362 | ucol_tok_closeTokenList(&src); | |
363 | ||
364 | return result; | |
365 | } | |
366 | ||
367 | U_CAPI int32_t U_EXPORT2 | |
368 | ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) { | |
369 | UErrorCode status = U_ZERO_ERROR; | |
370 | int32_t len = 0; | |
371 | int32_t UCAlen = 0; | |
372 | const UChar* ucaRules = 0; | |
373 | const UChar *rules = ucol_getRules(coll, &len); | |
374 | if(delta == UCOL_FULL_RULES) { | |
375 | /* take the UCA rules and append real rules at the end */ | |
376 | /* UCA rules will be probably coming from the root RB */ | |
377 | ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status); | |
378 | /* | |
379 | UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status); | |
380 | UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status); | |
381 | ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status); | |
382 | ures_close(uca); | |
383 | ures_close(cresb); | |
384 | */ | |
385 | } | |
386 | if(U_FAILURE(status)) { | |
387 | return 0; | |
388 | } | |
389 | if(buffer!=0 && bufferLen>0){ | |
390 | *buffer=0; | |
391 | if(UCAlen > 0) { | |
392 | u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen)); | |
393 | } | |
394 | if(len > 0 && bufferLen > UCAlen) { | |
395 | u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen)); | |
396 | } | |
397 | } | |
398 | return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status); | |
399 | } | |
400 | ||
401 | static const UChar _NUL = 0; | |
402 | ||
403 | U_CAPI const UChar* U_EXPORT2 | |
404 | ucol_getRules( const UCollator *coll, | |
405 | int32_t *length) | |
406 | { | |
407 | if(coll->rules != NULL) { | |
408 | *length = coll->rulesLength; | |
409 | return coll->rules; | |
410 | } | |
411 | else { | |
412 | *length = 0; | |
413 | return &_NUL; | |
414 | } | |
415 | } | |
416 | ||
417 | U_CAPI UBool U_EXPORT2 | |
418 | ucol_equals(const UCollator *source, const UCollator *target) { | |
419 | UErrorCode status = U_ZERO_ERROR; | |
420 | // if pointers are equal, collators are equal | |
421 | if(source == target) { | |
422 | return TRUE; | |
423 | } | |
424 | int32_t i = 0, j = 0; | |
425 | // if any of attributes are different, collators are not equal | |
426 | for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { | |
427 | if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) { | |
428 | return FALSE; | |
429 | } | |
430 | } | |
431 | ||
432 | int32_t sourceRulesLen = 0, targetRulesLen = 0; | |
433 | const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen); | |
434 | const UChar *targetRules = ucol_getRules(target, &targetRulesLen); | |
435 | ||
436 | if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) { | |
437 | // all the attributes are equal and the rules are equal - collators are equal | |
438 | return(TRUE); | |
439 | } | |
440 | // hard part, need to construct tree from rules and see if they yield the same tailoring | |
441 | UBool result = TRUE; | |
442 | UParseError parseError; | |
443 | UColTokenParser sourceParser, targetParser; | |
444 | int32_t sourceListLen = 0, targetListLen = 0; | |
445 | ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status); | |
446 | ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status); | |
447 | sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status); | |
448 | targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status); | |
449 | ||
450 | if(sourceListLen != targetListLen) { | |
451 | // different number of resets | |
452 | result = FALSE; | |
453 | } else { | |
454 | UColToken *sourceReset = NULL, *targetReset = NULL; | |
455 | UChar *sourceResetString = NULL, *targetResetString = NULL; | |
456 | int32_t sourceStringLen = 0, targetStringLen = 0; | |
457 | for(i = 0; i < sourceListLen; i++) { | |
458 | sourceReset = sourceParser.lh[i].reset; | |
459 | sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); | |
460 | sourceStringLen = sourceReset->source >> 24; | |
461 | for(j = 0; j < sourceListLen; j++) { | |
462 | targetReset = targetParser.lh[j].reset; | |
463 | targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); | |
464 | targetStringLen = targetReset->source >> 24; | |
465 | if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) { | |
466 | sourceReset = sourceParser.lh[i].first; | |
467 | targetReset = targetParser.lh[j].first; | |
468 | while(sourceReset != NULL && targetReset != NULL) { | |
469 | sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); | |
470 | sourceStringLen = sourceReset->source >> 24; | |
471 | targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); | |
472 | targetStringLen = targetReset->source >> 24; | |
473 | if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { | |
474 | result = FALSE; | |
475 | goto returnResult; | |
476 | } | |
477 | // probably also need to check the expansions | |
478 | if(sourceReset->expansion) { | |
479 | if(!targetReset->expansion) { | |
480 | result = FALSE; | |
481 | goto returnResult; | |
482 | } else { | |
483 | // compare expansions | |
484 | sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF); | |
485 | sourceStringLen = sourceReset->expansion >> 24; | |
486 | targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF); | |
487 | targetStringLen = targetReset->expansion >> 24; | |
488 | if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { | |
489 | result = FALSE; | |
490 | goto returnResult; | |
491 | } | |
492 | } | |
493 | } else { | |
494 | if(targetReset->expansion) { | |
495 | result = FALSE; | |
496 | goto returnResult; | |
497 | } | |
498 | } | |
499 | sourceReset = sourceReset->next; | |
500 | targetReset = targetReset->next; | |
501 | } | |
502 | if(sourceReset != targetReset) { // at least one is not NULL | |
503 | // there are more tailored elements in one list | |
504 | result = FALSE; | |
505 | goto returnResult; | |
506 | } | |
507 | ||
508 | ||
509 | break; | |
510 | } | |
511 | } | |
512 | // couldn't find the reset anchor, so the collators are not equal | |
513 | if(j == sourceListLen) { | |
514 | result = FALSE; | |
515 | goto returnResult; | |
516 | } | |
517 | } | |
518 | } | |
519 | ||
520 | returnResult: | |
521 | ucol_tok_closeTokenList(&sourceParser); | |
522 | ucol_tok_closeTokenList(&targetParser); | |
523 | return result; | |
524 | ||
525 | } | |
526 | ||
527 | U_CAPI int32_t U_EXPORT2 | |
528 | ucol_getDisplayName( const char *objLoc, | |
529 | const char *dispLoc, | |
530 | UChar *result, | |
531 | int32_t resultLength, | |
532 | UErrorCode *status) | |
533 | { | |
534 | ||
535 | if(U_FAILURE(*status)) return -1; | |
536 | UnicodeString dst; | |
537 | if(!(result==NULL && resultLength==0)) { | |
538 | // NULL destination for pure preflighting: empty dummy string | |
539 | // otherwise, alias the destination buffer | |
540 | dst.setTo(result, 0, resultLength); | |
541 | } | |
542 | Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst); | |
543 | return dst.extract(result, resultLength, *status); | |
544 | } | |
545 | ||
546 | U_CAPI const char* U_EXPORT2 | |
547 | ucol_getAvailable(int32_t index) | |
548 | { | |
549 | return uloc_getAvailable(index); | |
550 | } | |
551 | ||
552 | U_CAPI int32_t U_EXPORT2 | |
553 | ucol_countAvailable() | |
554 | { | |
555 | return uloc_countAvailable(); | |
556 | } | |
557 | ||
558 | #if !UCONFIG_NO_SERVICE | |
559 | U_CAPI UEnumeration* U_EXPORT2 | |
560 | ucol_openAvailableLocales(UErrorCode *status) { | |
561 | // This is a wrapper over Collator::getAvailableLocales() | |
562 | if (U_FAILURE(*status)) { | |
563 | return NULL; | |
564 | } | |
565 | StringEnumeration *s = Collator::getAvailableLocales(); | |
566 | if (s == NULL) { | |
567 | *status = U_MEMORY_ALLOCATION_ERROR; | |
568 | return NULL; | |
569 | } | |
570 | return uenum_openStringEnumeration(s, status); | |
571 | } | |
572 | #endif | |
573 | ||
574 | // Note: KEYWORDS[0] != RESOURCE_NAME - alan | |
575 | ||
576 | static const char* RESOURCE_NAME = "collations"; | |
577 | ||
578 | static const char* KEYWORDS[] = { "collation" }; | |
579 | ||
580 | #define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0])) | |
581 | ||
582 | U_CAPI UEnumeration* U_EXPORT2 | |
583 | ucol_getKeywords(UErrorCode *status) { | |
584 | UEnumeration *result = NULL; | |
585 | if (U_SUCCESS(*status)) { | |
586 | return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status); | |
587 | } | |
588 | return result; | |
589 | } | |
590 | ||
591 | U_CAPI UEnumeration* U_EXPORT2 | |
592 | ucol_getKeywordValues(const char *keyword, UErrorCode *status) { | |
593 | // hard-coded to accept exactly one collation keyword | |
594 | // modify if additional collation keyword is added later | |
595 | if (U_SUCCESS(*status) && | |
596 | keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0) { | |
597 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
598 | return NULL; | |
599 | } | |
600 | return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status); | |
601 | } | |
602 | ||
603 | U_CAPI int32_t U_EXPORT2 | |
604 | ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, | |
605 | const char* keyword, const char* locale, | |
606 | UBool* isAvailable, UErrorCode* status) { | |
607 | // N.B.: Resource name is "collations" but keyword is "collation" | |
608 | return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL, | |
609 | "collations", keyword, locale, | |
610 | isAvailable, TRUE, status); | |
611 | } | |
612 | ||
613 | /* returns the locale name the collation data comes from */ | |
614 | U_CAPI const char * U_EXPORT2 | |
615 | ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { | |
616 | return ucol_getLocaleByType(coll, type, status); | |
617 | } | |
618 | ||
619 | U_CAPI const char * U_EXPORT2 | |
620 | ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { | |
621 | const char *result = NULL; | |
622 | if(status == NULL || U_FAILURE(*status)) { | |
623 | return NULL; | |
624 | } | |
625 | UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE); | |
626 | UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll); | |
627 | ||
628 | switch(type) { | |
629 | case ULOC_ACTUAL_LOCALE: | |
630 | // validLocale is set only if service registration has explicitly set the | |
631 | // requested and valid locales. if this is the case, the actual locale | |
632 | // is considered to be the valid locale. | |
633 | if (coll->validLocale != NULL) { | |
634 | result = coll->validLocale; | |
635 | } else if(coll->elements != NULL) { | |
636 | result = ures_getLocale(coll->elements, status); | |
637 | } | |
638 | break; | |
639 | case ULOC_VALID_LOCALE: | |
640 | if (coll->validLocale != NULL) { | |
641 | result = coll->validLocale; | |
642 | } else if(coll->rb != NULL) { | |
643 | result = ures_getLocale(coll->rb, status); | |
644 | } | |
645 | break; | |
646 | case ULOC_REQUESTED_LOCALE: | |
647 | result = coll->requestedLocale; | |
648 | break; | |
649 | default: | |
650 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
651 | } | |
652 | UTRACE_DATA1(UTRACE_INFO, "result = %s", result); | |
653 | UTRACE_EXIT_STATUS(*status); | |
654 | return result; | |
655 | } | |
656 | ||
657 | U_CAPI void U_EXPORT2 | |
658 | ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt) | |
659 | { | |
660 | if (coll) { | |
661 | if (coll->validLocale) { | |
662 | uprv_free(coll->validLocale); | |
663 | } | |
664 | coll->validLocale = validLocaleToAdopt; | |
665 | if (coll->requestedLocale) { // should always have | |
666 | uprv_free(coll->requestedLocale); | |
667 | } | |
668 | coll->requestedLocale = requestedLocaleToAdopt; | |
669 | } | |
670 | } | |
671 | ||
672 | U_CAPI USet * U_EXPORT2 | |
673 | ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) | |
674 | { | |
675 | if(status == NULL || U_FAILURE(*status)) { | |
676 | return NULL; | |
677 | } | |
678 | if(coll == NULL || coll->UCA == NULL) { | |
679 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
680 | return NULL; | |
681 | } | |
682 | UParseError parseError; | |
683 | UColTokenParser src; | |
684 | int32_t rulesLen = 0; | |
685 | const UChar *rules = ucol_getRules(coll, &rulesLen); | |
686 | const UChar *current = NULL; | |
687 | UBool startOfRules = TRUE; | |
688 | // we internally use the C++ class, for the following reasons: | |
689 | // 1. we need to utilize canonical iterator, which is a C++ only class | |
690 | // 2. canonical iterator returns UnicodeStrings - USet cannot take them | |
691 | // 3. USet is internally really UnicodeSet, C is just a wrapper | |
692 | UnicodeSet *tailored = new UnicodeSet(); | |
693 | UnicodeString pattern; | |
694 | UnicodeString empty; | |
695 | CanonicalIterator it(empty, *status); | |
696 | ||
697 | ||
698 | // The idea is to tokenize the rule set. For each non-reset token, | |
699 | // we add all the canonicaly equivalent FCD sequences | |
700 | ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, status); | |
701 | while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError, status)) != NULL) { | |
702 | startOfRules = FALSE; | |
703 | if(src.parsedToken.strength != UCOL_TOK_RESET) { | |
704 | const UChar *stuff = src.source+(src.parsedToken.charsOffset); | |
705 | it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status); | |
706 | pattern = it.next(); | |
707 | while(!pattern.isBogus()) { | |
708 | if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) { | |
709 | tailored->add(pattern); | |
710 | } | |
711 | pattern = it.next(); | |
712 | } | |
713 | } | |
714 | } | |
715 | ucol_tok_closeTokenList(&src); | |
716 | return (USet *)tailored; | |
717 | } | |
718 | ||
719 | #endif /* #if !UCONFIG_NO_COLLATION */ |