]>
Commit | Line | Data |
---|---|---|
73c04bcf A |
1 | /* |
2 | ******************************************************************************* | |
46f4442e | 3 | * Copyright (C) 1996-2008, International Business Machines |
73c04bcf A |
4 | * Corporation and others. All Rights Reserved. |
5 | ******************************************************************************* | |
6 | * file name: ucol_res.cpp | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * Description: | |
12 | * This file contains dependencies that the collation run-time doesn't normally | |
13 | * need. This mainly contains resource bundle usage and collation meta information | |
14 | * | |
15 | * Modification history | |
16 | * Date Name Comments | |
17 | * 1996-1999 various members of ICU team maintained C API for collation framework | |
18 | * 02/16/2001 synwee Added internal method getPrevSpecialCE | |
19 | * 03/01/2001 synwee Added maxexpansion functionality. | |
20 | * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant | |
21 | * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp | |
22 | */ | |
23 | ||
24 | #include "unicode/utypes.h" | |
25 | ||
26 | #if !UCONFIG_NO_COLLATION | |
27 | #include "unicode/uloc.h" | |
28 | #include "unicode/coll.h" | |
29 | #include "unicode/tblcoll.h" | |
30 | #include "unicode/caniter.h" | |
31 | #include "unicode/ustring.h" | |
32 | ||
33 | #include "ucol_bld.h" | |
34 | #include "ucol_imp.h" | |
35 | #include "ucol_tok.h" | |
36 | #include "ucol_elm.h" | |
37 | #include "uresimp.h" | |
38 | #include "ustr_imp.h" | |
39 | #include "cstring.h" | |
40 | #include "umutex.h" | |
46f4442e | 41 | #include "ucln_in.h" |
73c04bcf A |
42 | #include "ustrenum.h" |
43 | #include "putilimp.h" | |
44 | #include "utracimp.h" | |
45 | #include "cmemory.h" | |
46 | ||
46f4442e A |
47 | U_NAMESPACE_USE |
48 | ||
49 | // static UCA. There is only one. Collators don't use it. | |
50 | // It is referenced only in ucol_initUCA and ucol_cleanup | |
51 | static UCollator* _staticUCA = NULL; | |
52 | // static pointer to udata memory. Inited in ucol_initUCA | |
53 | // used for cleanup in ucol_cleanup | |
54 | static UDataMemory* UCA_DATA_MEM = NULL; | |
55 | ||
73c04bcf | 56 | U_CDECL_BEGIN |
46f4442e A |
57 | static UBool U_CALLCONV |
58 | ucol_res_cleanup(void) | |
59 | { | |
60 | if (UCA_DATA_MEM) { | |
61 | udata_close(UCA_DATA_MEM); | |
62 | UCA_DATA_MEM = NULL; | |
63 | } | |
64 | if (_staticUCA) { | |
65 | ucol_close(_staticUCA); | |
66 | _staticUCA = NULL; | |
67 | } | |
68 | return TRUE; | |
69 | } | |
70 | ||
71 | static UBool U_CALLCONV | |
72 | isAcceptableUCA(void * /*context*/, | |
73 | const char * /*type*/, const char * /*name*/, | |
74 | const UDataInfo *pInfo){ | |
75 | /* context, type & name are intentionally not used */ | |
76 | if( pInfo->size>=20 && | |
77 | pInfo->isBigEndian==U_IS_BIG_ENDIAN && | |
78 | pInfo->charsetFamily==U_CHARSET_FAMILY && | |
79 | pInfo->dataFormat[0]==UCA_DATA_FORMAT_0 && /* dataFormat="UCol" */ | |
80 | pInfo->dataFormat[1]==UCA_DATA_FORMAT_1 && | |
81 | pInfo->dataFormat[2]==UCA_DATA_FORMAT_2 && | |
82 | pInfo->dataFormat[3]==UCA_DATA_FORMAT_3 && | |
83 | pInfo->formatVersion[0]==UCA_FORMAT_VERSION_0 && | |
84 | pInfo->formatVersion[1]>=UCA_FORMAT_VERSION_1// && | |
85 | //pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 && | |
86 | //pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh | |
87 | //pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh | |
88 | ) { | |
89 | UVersionInfo UCDVersion; | |
90 | u_getUnicodeVersion(UCDVersion); | |
91 | return (UBool)(pInfo->dataVersion[0]==UCDVersion[0] | |
92 | && pInfo->dataVersion[1]==UCDVersion[1]); | |
93 | //&& pInfo->dataVersion[2]==ucaDataInfo.dataVersion[2] | |
94 | //&& pInfo->dataVersion[3]==ucaDataInfo.dataVersion[3]); | |
95 | } else { | |
96 | return FALSE; | |
97 | } | |
73c04bcf A |
98 | } |
99 | U_CDECL_END | |
100 | ||
46f4442e A |
101 | /* do not close UCA returned by ucol_initUCA! */ |
102 | UCollator * | |
103 | ucol_initUCA(UErrorCode *status) { | |
104 | if(U_FAILURE(*status)) { | |
105 | return NULL; | |
106 | } | |
107 | UBool needsInit; | |
108 | UMTX_CHECK(NULL, (_staticUCA == NULL), needsInit); | |
109 | ||
110 | if(needsInit) { | |
111 | UDataMemory *result = udata_openChoice(NULL, UCA_DATA_TYPE, UCA_DATA_NAME, isAcceptableUCA, NULL, status); | |
112 | ||
113 | if(U_SUCCESS(*status)){ | |
114 | UCollator *newUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), NULL, NULL, status); | |
115 | if(U_SUCCESS(*status)){ | |
116 | umtx_lock(NULL); | |
117 | if(_staticUCA == NULL) { | |
118 | _staticUCA = newUCA; | |
119 | newUCA = NULL; | |
120 | UCA_DATA_MEM = result; | |
121 | result = NULL; | |
122 | } | |
123 | umtx_unlock(NULL); | |
124 | ||
125 | ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup); | |
126 | if(newUCA != NULL) { | |
127 | ucol_close(newUCA); | |
128 | udata_close(result); | |
129 | } | |
130 | // Initalize variables for implicit generation | |
131 | uprv_uca_initImplicitConstants(status); | |
132 | }else{ | |
133 | ucol_close(newUCA); | |
134 | udata_close(result); | |
135 | } | |
136 | } | |
137 | else { | |
138 | udata_close(result); | |
139 | } | |
140 | } | |
141 | return _staticUCA; | |
142 | } | |
143 | ||
144 | U_CAPI void U_EXPORT2 | |
145 | ucol_forgetUCA(void) | |
146 | { | |
147 | _staticUCA = NULL; | |
148 | UCA_DATA_MEM = NULL; | |
149 | } | |
150 | ||
73c04bcf A |
151 | /****************************************************************************/ |
152 | /* Following are the open/close functions */ | |
153 | /* */ | |
154 | /****************************************************************************/ | |
155 | static UCollator* | |
156 | tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) { | |
46f4442e A |
157 | int32_t rulesLen = 0; |
158 | const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status); | |
159 | return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status); | |
73c04bcf A |
160 | } |
161 | ||
162 | ||
163 | // API in ucol_imp.h | |
164 | ||
165 | U_CFUNC UCollator* | |
166 | ucol_open_internal(const char *loc, | |
167 | UErrorCode *status) | |
168 | { | |
46f4442e | 169 | UErrorCode intStatus = U_ZERO_ERROR; |
73c04bcf A |
170 | const UCollator* UCA = ucol_initUCA(status); |
171 | ||
172 | /* New version */ | |
173 | if(U_FAILURE(*status)) return 0; | |
174 | ||
175 | ||
176 | ||
177 | UCollator *result = NULL; | |
178 | UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status); | |
179 | ||
180 | /* we try to find stuff from keyword */ | |
181 | UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status); | |
182 | UResourceBundle *collElem = NULL; | |
183 | char keyBuffer[256]; | |
184 | // if there is a keyword, we pick it up and try to get elements | |
185 | if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) { | |
186 | // no keyword. we try to find the default setting, which will give us the keyword value | |
46f4442e | 187 | intStatus = U_ZERO_ERROR; |
73c04bcf A |
188 | // finding default value does not affect collation fallback status |
189 | UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus); | |
190 | if(U_SUCCESS(intStatus)) { | |
191 | int32_t defaultKeyLen = 0; | |
192 | const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus); | |
193 | u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen); | |
194 | keyBuffer[defaultKeyLen] = 0; | |
195 | } else { | |
196 | *status = U_INTERNAL_PROGRAM_ERROR; | |
197 | return NULL; | |
198 | } | |
199 | ures_close(defaultColl); | |
200 | } | |
46f4442e A |
201 | collElem = ures_getByKeyWithFallback(collations, keyBuffer, collations, status); |
202 | collations = NULL; // We just reused the collations object as collElem. | |
73c04bcf A |
203 | |
204 | UResourceBundle *binary = NULL; | |
205 | ||
206 | if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */ | |
207 | *status = U_USING_DEFAULT_WARNING; | |
208 | result = ucol_initCollator(UCA->image, result, UCA, status); | |
46f4442e A |
209 | if (U_FAILURE(*status)) { |
210 | goto clean; | |
211 | } | |
73c04bcf | 212 | // if we use UCA, real locale is root |
46f4442e A |
213 | ures_close(b); |
214 | b = ures_open(U_ICUDATA_COLL, "", status); | |
215 | ures_close(collElem); | |
216 | collElem = ures_open(U_ICUDATA_COLL, "", status); | |
73c04bcf A |
217 | if(U_FAILURE(*status)) { |
218 | goto clean; | |
219 | } | |
73c04bcf A |
220 | result->hasRealData = FALSE; |
221 | } else if(U_SUCCESS(*status)) { | |
46f4442e | 222 | intStatus = U_ZERO_ERROR; |
73c04bcf | 223 | |
46f4442e | 224 | binary = ures_getByKey(collElem, "%%CollationBin", NULL, &intStatus); |
73c04bcf | 225 | |
46f4442e | 226 | if(intStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */ |
73c04bcf A |
227 | binary = NULL; |
228 | result = tryOpeningFromRules(collElem, status); | |
229 | if(U_FAILURE(*status)) { | |
230 | goto clean; | |
231 | } | |
232 | } else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */ | |
46f4442e | 233 | int32_t len = 0; |
73c04bcf A |
234 | const uint8_t *inData = ures_getBinary(binary, &len, status); |
235 | UCATableHeader *colData = (UCATableHeader *)inData; | |
236 | if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 || | |
237 | uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 || | |
238 | colData->version[0] != UCOL_BUILDER_VERSION) | |
239 | { | |
240 | *status = U_DIFFERENT_UCA_VERSION; | |
241 | result = tryOpeningFromRules(collElem, status); | |
242 | } else { | |
243 | if(U_FAILURE(*status)){ | |
244 | goto clean; | |
245 | } | |
246 | if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) { | |
247 | result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status); | |
248 | if(U_FAILURE(*status)){ | |
249 | goto clean; | |
250 | } | |
251 | result->hasRealData = TRUE; | |
252 | } else { | |
253 | result = ucol_initCollator(UCA->image, result, UCA, status); | |
254 | ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status); | |
255 | if(U_FAILURE(*status)){ | |
256 | goto clean; | |
257 | } | |
258 | result->hasRealData = FALSE; | |
259 | } | |
260 | result->freeImageOnClose = FALSE; | |
261 | } | |
262 | } | |
46f4442e A |
263 | intStatus = U_ZERO_ERROR; |
264 | result->rules = ures_getStringByKey(collElem, "Sequence", &result->rulesLength, &intStatus); | |
73c04bcf A |
265 | result->freeRulesOnClose = FALSE; |
266 | } else { /* There is another error, and we're just gonna clean up */ | |
267 | goto clean; | |
268 | } | |
269 | ||
46f4442e A |
270 | intStatus = U_ZERO_ERROR; |
271 | result->ucaRules = ures_getStringByKey(b,"UCARules",NULL,&intStatus); | |
73c04bcf A |
272 | |
273 | if(loc == NULL) { | |
46f4442e | 274 | loc = ures_getLocale(b, status); |
73c04bcf | 275 | } |
46f4442e | 276 | result->requestedLocale = uprv_strdup(loc); |
73c04bcf A |
277 | /* test for NULL */ |
278 | if (result->requestedLocale == NULL) { | |
279 | *status = U_MEMORY_ALLOCATION_ERROR; | |
280 | goto clean; | |
281 | } | |
46f4442e A |
282 | loc = ures_getLocale(collElem, status); |
283 | result->actualLocale = uprv_strdup(loc); | |
284 | /* test for NULL */ | |
285 | if (result->actualLocale == NULL) { | |
286 | *status = U_MEMORY_ALLOCATION_ERROR; | |
287 | goto clean; | |
288 | } | |
289 | loc = ures_getLocale(b, status); | |
290 | result->validLocale = uprv_strdup(loc); | |
291 | /* test for NULL */ | |
292 | if (result->validLocale == NULL) { | |
293 | *status = U_MEMORY_ALLOCATION_ERROR; | |
294 | goto clean; | |
295 | } | |
73c04bcf | 296 | |
46f4442e A |
297 | ures_close(b); |
298 | ures_close(collElem); | |
73c04bcf | 299 | ures_close(binary); |
73c04bcf A |
300 | return result; |
301 | ||
302 | clean: | |
303 | ures_close(b); | |
304 | ures_close(collElem); | |
73c04bcf | 305 | ures_close(binary); |
46f4442e | 306 | ucol_close(result); |
73c04bcf A |
307 | return NULL; |
308 | } | |
309 | ||
310 | U_CAPI UCollator* | |
311 | ucol_open(const char *loc, | |
312 | UErrorCode *status) | |
313 | { | |
46f4442e A |
314 | U_NAMESPACE_USE |
315 | ||
316 | UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN); | |
317 | UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc); | |
318 | UCollator *result = NULL; | |
73c04bcf | 319 | |
46f4442e | 320 | u_init(status); |
73c04bcf | 321 | #if !UCONFIG_NO_SERVICE |
46f4442e A |
322 | result = Collator::createUCollator(loc, status); |
323 | if (result == NULL) | |
73c04bcf | 324 | #endif |
46f4442e A |
325 | { |
326 | result = ucol_open_internal(loc, status); | |
327 | } | |
328 | UTRACE_EXIT_PTR_STATUS(result, *status); | |
329 | return result; | |
73c04bcf A |
330 | } |
331 | ||
332 | U_CAPI UCollator* U_EXPORT2 | |
333 | ucol_openRules( const UChar *rules, | |
46f4442e A |
334 | int32_t rulesLength, |
335 | UColAttributeValue normalizationMode, | |
336 | UCollationStrength strength, | |
337 | UParseError *parseError, | |
338 | UErrorCode *status) | |
73c04bcf | 339 | { |
46f4442e A |
340 | UColTokenParser src; |
341 | UColAttributeValue norm; | |
342 | UParseError tErr; | |
73c04bcf | 343 | |
46f4442e A |
344 | if(status == NULL || U_FAILURE(*status)){ |
345 | return 0; | |
346 | } | |
73c04bcf | 347 | |
46f4442e A |
348 | u_init(status); |
349 | if (U_FAILURE(*status)) { | |
350 | return NULL; | |
73c04bcf | 351 | } |
73c04bcf | 352 | |
46f4442e A |
353 | if(rules == NULL || rulesLength < -1) { |
354 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
355 | return 0; | |
356 | } | |
357 | ||
358 | if(rulesLength == -1) { | |
359 | rulesLength = u_strlen(rules); | |
360 | } | |
361 | ||
362 | if(parseError == NULL){ | |
363 | parseError = &tErr; | |
364 | } | |
365 | ||
366 | switch(normalizationMode) { | |
367 | case UCOL_OFF: | |
368 | case UCOL_ON: | |
369 | case UCOL_DEFAULT: | |
370 | norm = normalizationMode; | |
371 | break; | |
372 | default: | |
373 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
374 | return 0; | |
375 | } | |
376 | ||
377 | UCollator *result = NULL; | |
378 | UCATableHeader *table = NULL; | |
379 | UCollator *UCA = ucol_initUCA(status); | |
380 | ||
381 | if(U_FAILURE(*status)){ | |
382 | return NULL; | |
383 | } | |
384 | ||
385 | ucol_tok_initTokenList(&src, rules, rulesLength, UCA, status); | |
386 | ucol_tok_assembleTokenList(&src,parseError, status); | |
387 | ||
388 | if(U_FAILURE(*status)) { | |
389 | /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */ | |
390 | /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */ | |
391 | /* so something might be done here... or on lower level */ | |
392 | #ifdef UCOL_DEBUG | |
393 | if(*status == U_ILLEGAL_ARGUMENT_ERROR) { | |
394 | fprintf(stderr, "bad option starting at offset %i\n", src.current-src.source); | |
395 | } else { | |
396 | fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source); | |
397 | } | |
398 | #endif | |
73c04bcf A |
399 | goto cleanup; |
400 | } | |
46f4442e A |
401 | |
402 | if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */ | |
403 | /* also, if we wanted to remove some contractions, we should make a tailoring */ | |
404 | table = ucol_assembleTailoringTable(&src, status); | |
405 | if(U_SUCCESS(*status)) { | |
406 | // builder version | |
407 | table->version[0] = UCOL_BUILDER_VERSION; | |
408 | // no tailoring information on this level | |
409 | table->version[1] = table->version[2] = table->version[3] = 0; | |
410 | // set UCD version | |
411 | u_getUnicodeVersion(table->UCDVersion); | |
412 | // set UCA version | |
413 | uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)); | |
414 | result = ucol_initCollator(table, 0, UCA, status); | |
415 | if (U_FAILURE(*status)) { | |
416 | goto cleanup; | |
417 | } | |
418 | result->hasRealData = TRUE; | |
419 | result->freeImageOnClose = TRUE; | |
420 | } | |
421 | } else { /* no rules, but no error either */ | |
422 | // must be only options | |
423 | // We will init the collator from UCA | |
424 | result = ucol_initCollator(UCA->image, 0, UCA, status); | |
425 | // Check for null result | |
426 | if (U_FAILURE(*status)) { | |
427 | goto cleanup; | |
428 | } | |
429 | // And set only the options | |
430 | UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet)); | |
431 | /* test for NULL */ | |
432 | if (opts == NULL) { | |
433 | *status = U_MEMORY_ALLOCATION_ERROR; | |
434 | goto cleanup; | |
435 | } | |
436 | uprv_memcpy(opts, src.opts, sizeof(UColOptionSet)); | |
437 | ucol_setOptionsFromHeader(result, opts, status); | |
438 | result->freeOptionsOnClose = TRUE; | |
439 | result->hasRealData = FALSE; | |
440 | result->freeImageOnClose = FALSE; | |
441 | } | |
442 | ||
443 | if(U_SUCCESS(*status)) { | |
444 | UChar *newRules; | |
445 | result->dataVersion[0] = UCOL_BUILDER_VERSION; | |
446 | if(rulesLength > 0) { | |
447 | newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR); | |
448 | /* test for NULL */ | |
449 | if (newRules == NULL) { | |
450 | *status = U_MEMORY_ALLOCATION_ERROR; | |
451 | goto cleanup; | |
452 | } | |
453 | uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR); | |
454 | newRules[rulesLength]=0; | |
455 | result->rules = newRules; | |
456 | result->rulesLength = rulesLength; | |
457 | result->freeRulesOnClose = TRUE; | |
458 | } | |
459 | result->ucaRules = NULL; | |
460 | result->actualLocale = NULL; | |
461 | result->validLocale = NULL; | |
462 | result->requestedLocale = NULL; | |
463 | ucol_setAttribute(result, UCOL_STRENGTH, strength, status); | |
464 | ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status); | |
73c04bcf | 465 | } else { |
46f4442e A |
466 | cleanup: |
467 | if(result != NULL) { | |
468 | ucol_close(result); | |
469 | } else { | |
470 | if(table != NULL) { | |
471 | uprv_free(table); | |
472 | } | |
473 | } | |
474 | result = NULL; | |
73c04bcf | 475 | } |
73c04bcf | 476 | |
46f4442e | 477 | ucol_tok_closeTokenList(&src); |
73c04bcf | 478 | |
46f4442e | 479 | return result; |
73c04bcf A |
480 | } |
481 | ||
482 | U_CAPI int32_t U_EXPORT2 | |
483 | ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) { | |
46f4442e A |
484 | UErrorCode status = U_ZERO_ERROR; |
485 | int32_t len = 0; | |
486 | int32_t UCAlen = 0; | |
487 | const UChar* ucaRules = 0; | |
488 | const UChar *rules = ucol_getRules(coll, &len); | |
489 | if(delta == UCOL_FULL_RULES) { | |
490 | /* take the UCA rules and append real rules at the end */ | |
491 | /* UCA rules will be probably coming from the root RB */ | |
492 | ucaRules = coll->ucaRules; | |
493 | if (ucaRules) { | |
494 | UCAlen = u_strlen(ucaRules); | |
495 | } | |
496 | /* | |
497 | ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status); | |
498 | UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status); | |
499 | UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status); | |
500 | ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status); | |
501 | ures_close(uca); | |
502 | ures_close(cresb); | |
503 | */ | |
504 | } | |
505 | if(U_FAILURE(status)) { | |
506 | return 0; | |
507 | } | |
508 | if(buffer!=0 && bufferLen>0){ | |
509 | *buffer=0; | |
510 | if(UCAlen > 0) { | |
511 | u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen)); | |
512 | } | |
513 | if(len > 0 && bufferLen > UCAlen) { | |
514 | u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen)); | |
515 | } | |
516 | } | |
517 | return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status); | |
73c04bcf A |
518 | } |
519 | ||
520 | static const UChar _NUL = 0; | |
521 | ||
522 | U_CAPI const UChar* U_EXPORT2 | |
523 | ucol_getRules( const UCollator *coll, | |
46f4442e | 524 | int32_t *length) |
73c04bcf A |
525 | { |
526 | if(coll->rules != NULL) { | |
527 | *length = coll->rulesLength; | |
528 | return coll->rules; | |
529 | } | |
530 | else { | |
531 | *length = 0; | |
532 | return &_NUL; | |
533 | } | |
534 | } | |
535 | ||
536 | U_CAPI UBool U_EXPORT2 | |
537 | ucol_equals(const UCollator *source, const UCollator *target) { | |
46f4442e A |
538 | UErrorCode status = U_ZERO_ERROR; |
539 | // if pointers are equal, collators are equal | |
540 | if(source == target) { | |
541 | return TRUE; | |
542 | } | |
543 | int32_t i = 0, j = 0; | |
544 | // if any of attributes are different, collators are not equal | |
545 | for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { | |
546 | if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) { | |
547 | return FALSE; | |
548 | } | |
549 | } | |
550 | ||
551 | int32_t sourceRulesLen = 0, targetRulesLen = 0; | |
552 | const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen); | |
553 | const UChar *targetRules = ucol_getRules(target, &targetRulesLen); | |
554 | ||
555 | if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) { | |
556 | // all the attributes are equal and the rules are equal - collators are equal | |
557 | return(TRUE); | |
558 | } | |
559 | // hard part, need to construct tree from rules and see if they yield the same tailoring | |
560 | UBool result = TRUE; | |
561 | UParseError parseError; | |
562 | UColTokenParser sourceParser, targetParser; | |
563 | int32_t sourceListLen = 0, targetListLen = 0; | |
564 | ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status); | |
565 | ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status); | |
566 | sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status); | |
567 | targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status); | |
568 | ||
569 | if(sourceListLen != targetListLen) { | |
570 | // different number of resets | |
571 | result = FALSE; | |
572 | } else { | |
573 | UColToken *sourceReset = NULL, *targetReset = NULL; | |
574 | UChar *sourceResetString = NULL, *targetResetString = NULL; | |
575 | int32_t sourceStringLen = 0, targetStringLen = 0; | |
576 | for(i = 0; i < sourceListLen; i++) { | |
577 | sourceReset = sourceParser.lh[i].reset; | |
73c04bcf A |
578 | sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); |
579 | sourceStringLen = sourceReset->source >> 24; | |
46f4442e A |
580 | for(j = 0; j < sourceListLen; j++) { |
581 | targetReset = targetParser.lh[j].reset; | |
582 | targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); | |
583 | targetStringLen = targetReset->source >> 24; | |
584 | if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) { | |
585 | sourceReset = sourceParser.lh[i].first; | |
586 | targetReset = targetParser.lh[j].first; | |
587 | while(sourceReset != NULL && targetReset != NULL) { | |
588 | sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); | |
589 | sourceStringLen = sourceReset->source >> 24; | |
590 | targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); | |
591 | targetStringLen = targetReset->source >> 24; | |
592 | if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { | |
593 | result = FALSE; | |
594 | goto returnResult; | |
595 | } | |
596 | // probably also need to check the expansions | |
597 | if(sourceReset->expansion) { | |
598 | if(!targetReset->expansion) { | |
599 | result = FALSE; | |
600 | goto returnResult; | |
601 | } else { | |
602 | // compare expansions | |
603 | sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF); | |
604 | sourceStringLen = sourceReset->expansion >> 24; | |
605 | targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF); | |
606 | targetStringLen = targetReset->expansion >> 24; | |
607 | if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { | |
608 | result = FALSE; | |
609 | goto returnResult; | |
610 | } | |
611 | } | |
612 | } else { | |
613 | if(targetReset->expansion) { | |
614 | result = FALSE; | |
615 | goto returnResult; | |
616 | } | |
617 | } | |
618 | sourceReset = sourceReset->next; | |
619 | targetReset = targetReset->next; | |
620 | } | |
621 | if(sourceReset != targetReset) { // at least one is not NULL | |
622 | // there are more tailored elements in one list | |
623 | result = FALSE; | |
624 | goto returnResult; | |
625 | } | |
626 | ||
627 | ||
628 | break; | |
73c04bcf | 629 | } |
46f4442e A |
630 | } |
631 | // couldn't find the reset anchor, so the collators are not equal | |
632 | if(j == sourceListLen) { | |
73c04bcf A |
633 | result = FALSE; |
634 | goto returnResult; | |
73c04bcf | 635 | } |
73c04bcf | 636 | } |
73c04bcf | 637 | } |
73c04bcf A |
638 | |
639 | returnResult: | |
46f4442e A |
640 | ucol_tok_closeTokenList(&sourceParser); |
641 | ucol_tok_closeTokenList(&targetParser); | |
642 | return result; | |
73c04bcf A |
643 | |
644 | } | |
645 | ||
646 | U_CAPI int32_t U_EXPORT2 | |
647 | ucol_getDisplayName( const char *objLoc, | |
46f4442e A |
648 | const char *dispLoc, |
649 | UChar *result, | |
650 | int32_t resultLength, | |
651 | UErrorCode *status) | |
73c04bcf | 652 | { |
46f4442e A |
653 | U_NAMESPACE_USE |
654 | ||
655 | if(U_FAILURE(*status)) return -1; | |
656 | UnicodeString dst; | |
657 | if(!(result==NULL && resultLength==0)) { | |
658 | // NULL destination for pure preflighting: empty dummy string | |
659 | // otherwise, alias the destination buffer | |
660 | dst.setTo(result, 0, resultLength); | |
661 | } | |
662 | Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst); | |
663 | return dst.extract(result, resultLength, *status); | |
73c04bcf A |
664 | } |
665 | ||
666 | U_CAPI const char* U_EXPORT2 | |
667 | ucol_getAvailable(int32_t index) | |
668 | { | |
46f4442e A |
669 | int32_t count = 0; |
670 | const Locale *loc = Collator::getAvailableLocales(count); | |
671 | if (loc != NULL && index < count) { | |
672 | return loc[index].getName(); | |
673 | } | |
674 | return NULL; | |
73c04bcf A |
675 | } |
676 | ||
677 | U_CAPI int32_t U_EXPORT2 | |
678 | ucol_countAvailable() | |
679 | { | |
46f4442e A |
680 | int32_t count = 0; |
681 | Collator::getAvailableLocales(count); | |
682 | return count; | |
73c04bcf A |
683 | } |
684 | ||
685 | #if !UCONFIG_NO_SERVICE | |
686 | U_CAPI UEnumeration* U_EXPORT2 | |
687 | ucol_openAvailableLocales(UErrorCode *status) { | |
46f4442e A |
688 | U_NAMESPACE_USE |
689 | ||
73c04bcf A |
690 | // This is a wrapper over Collator::getAvailableLocales() |
691 | if (U_FAILURE(*status)) { | |
692 | return NULL; | |
693 | } | |
694 | StringEnumeration *s = Collator::getAvailableLocales(); | |
695 | if (s == NULL) { | |
696 | *status = U_MEMORY_ALLOCATION_ERROR; | |
697 | return NULL; | |
698 | } | |
699 | return uenum_openStringEnumeration(s, status); | |
700 | } | |
701 | #endif | |
702 | ||
703 | // Note: KEYWORDS[0] != RESOURCE_NAME - alan | |
704 | ||
46f4442e | 705 | static const char RESOURCE_NAME[] = "collations"; |
73c04bcf | 706 | |
46f4442e | 707 | static const char* const KEYWORDS[] = { "collation" }; |
73c04bcf A |
708 | |
709 | #define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0])) | |
710 | ||
711 | U_CAPI UEnumeration* U_EXPORT2 | |
712 | ucol_getKeywords(UErrorCode *status) { | |
713 | UEnumeration *result = NULL; | |
714 | if (U_SUCCESS(*status)) { | |
715 | return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status); | |
716 | } | |
717 | return result; | |
718 | } | |
719 | ||
720 | U_CAPI UEnumeration* U_EXPORT2 | |
721 | ucol_getKeywordValues(const char *keyword, UErrorCode *status) { | |
46f4442e A |
722 | if (U_FAILURE(*status)) { |
723 | return NULL; | |
724 | } | |
73c04bcf A |
725 | // hard-coded to accept exactly one collation keyword |
726 | // modify if additional collation keyword is added later | |
46f4442e A |
727 | if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0) |
728 | { | |
73c04bcf A |
729 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
730 | return NULL; | |
731 | } | |
732 | return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status); | |
733 | } | |
734 | ||
735 | U_CAPI int32_t U_EXPORT2 | |
736 | ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, | |
737 | const char* keyword, const char* locale, | |
46f4442e A |
738 | UBool* isAvailable, UErrorCode* status) |
739 | { | |
73c04bcf A |
740 | // N.B.: Resource name is "collations" but keyword is "collation" |
741 | return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL, | |
46f4442e A |
742 | "collations", keyword, locale, |
743 | isAvailable, TRUE, status); | |
73c04bcf A |
744 | } |
745 | ||
746 | /* returns the locale name the collation data comes from */ | |
747 | U_CAPI const char * U_EXPORT2 | |
748 | ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { | |
46f4442e | 749 | return ucol_getLocaleByType(coll, type, status); |
73c04bcf A |
750 | } |
751 | ||
752 | U_CAPI const char * U_EXPORT2 | |
753 | ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { | |
46f4442e A |
754 | const char *result = NULL; |
755 | if(status == NULL || U_FAILURE(*status)) { | |
756 | return NULL; | |
757 | } | |
758 | UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE); | |
759 | UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll); | |
760 | ||
761 | switch(type) { | |
762 | case ULOC_ACTUAL_LOCALE: | |
763 | result = coll->actualLocale; | |
764 | break; | |
765 | case ULOC_VALID_LOCALE: | |
766 | result = coll->validLocale; | |
767 | break; | |
768 | case ULOC_REQUESTED_LOCALE: | |
769 | result = coll->requestedLocale; | |
770 | break; | |
771 | default: | |
772 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
773 | } | |
774 | UTRACE_DATA1(UTRACE_INFO, "result = %s", result); | |
775 | UTRACE_EXIT_STATUS(*status); | |
776 | return result; | |
73c04bcf A |
777 | } |
778 | ||
46f4442e A |
779 | U_CFUNC void U_EXPORT2 |
780 | ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt, char *actualLocaleToAdopt) | |
73c04bcf | 781 | { |
46f4442e A |
782 | if (coll) { |
783 | if (coll->validLocale) { | |
784 | uprv_free(coll->validLocale); | |
785 | } | |
786 | coll->validLocale = validLocaleToAdopt; | |
787 | if (coll->requestedLocale) { // should always have | |
788 | uprv_free(coll->requestedLocale); | |
789 | } | |
790 | coll->requestedLocale = requestedLocaleToAdopt; | |
791 | if (coll->actualLocale) { | |
792 | uprv_free(coll->actualLocale); | |
793 | } | |
794 | coll->actualLocale = actualLocaleToAdopt; | |
73c04bcf | 795 | } |
73c04bcf A |
796 | } |
797 | ||
798 | U_CAPI USet * U_EXPORT2 | |
799 | ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) | |
800 | { | |
46f4442e A |
801 | U_NAMESPACE_USE |
802 | ||
803 | if(status == NULL || U_FAILURE(*status)) { | |
804 | return NULL; | |
805 | } | |
806 | if(coll == NULL || coll->UCA == NULL) { | |
807 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
808 | return NULL; | |
809 | } | |
810 | UParseError parseError; | |
811 | UColTokenParser src; | |
812 | int32_t rulesLen = 0; | |
813 | const UChar *rules = ucol_getRules(coll, &rulesLen); | |
814 | UBool startOfRules = TRUE; | |
815 | // we internally use the C++ class, for the following reasons: | |
816 | // 1. we need to utilize canonical iterator, which is a C++ only class | |
817 | // 2. canonical iterator returns UnicodeStrings - USet cannot take them | |
818 | // 3. USet is internally really UnicodeSet, C is just a wrapper | |
819 | UnicodeSet *tailored = new UnicodeSet(); | |
820 | UnicodeString pattern; | |
821 | UnicodeString empty; | |
822 | CanonicalIterator it(empty, *status); | |
823 | ||
824 | ||
825 | // The idea is to tokenize the rule set. For each non-reset token, | |
826 | // we add all the canonicaly equivalent FCD sequences | |
827 | ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, status); | |
828 | while (ucol_tok_parseNextToken(&src, startOfRules, &parseError, status) != NULL) { | |
829 | startOfRules = FALSE; | |
830 | if(src.parsedToken.strength != UCOL_TOK_RESET) { | |
831 | const UChar *stuff = src.source+(src.parsedToken.charsOffset); | |
832 | it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status); | |
833 | pattern = it.next(); | |
834 | while(!pattern.isBogus()) { | |
835 | if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) { | |
836 | tailored->add(pattern); | |
837 | } | |
838 | pattern = it.next(); | |
839 | } | |
73c04bcf | 840 | } |
73c04bcf | 841 | } |
46f4442e A |
842 | ucol_tok_closeTokenList(&src); |
843 | return (USet *)tailored; | |
73c04bcf A |
844 | } |
845 | ||
846 | #endif /* #if !UCONFIG_NO_COLLATION */ |