2 *******************************************************************************
3 * Copyright (C) 1996-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucol_res.cpp
8 * tab size: 8 (not used)
12 * This file contains dependencies that the collation run-time doesn't normally
13 * need. This mainly contains resource bundle usage and collation meta information
15 * Modification history
17 * 1996-1999 various members of ICU team maintained C API for collation framework
18 * 02/16/2001 synwee Added internal method getPrevSpecialCE
19 * 03/01/2001 synwee Added maxexpansion functionality.
20 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
21 * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp
22 * 2012-2014 markus Rewritten in C++ again.
25 #include "unicode/utypes.h"
27 #if !UCONFIG_NO_COLLATION
29 #include "unicode/coll.h"
30 #include "unicode/localpointer.h"
31 #include "unicode/locid.h"
32 #include "unicode/tblcoll.h"
33 #include "unicode/ucol.h"
34 #include "unicode/uloc.h"
35 #include "unicode/unistr.h"
36 #include "unicode/ures.h"
39 #include "collationdatareader.h"
40 #include "collationroot.h"
41 #include "collationtailoring.h"
53 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
59 static const UChar
*rootRules
= NULL
;
60 static int32_t rootRulesLength
= 0;
61 static UResourceBundle
*rootBundle
= NULL
;
62 static UInitOnce gInitOnce
= U_INITONCE_INITIALIZER
;
68 static UBool U_CALLCONV
72 ures_close(rootBundle
);
81 CollationLoader::loadRootRules(UErrorCode
&errorCode
) {
82 if(U_FAILURE(errorCode
)) { return; }
83 rootBundle
= ures_open(U_ICUDATA_COLL
, kRootLocaleName
, &errorCode
);
84 if(U_FAILURE(errorCode
)) { return; }
85 rootRules
= ures_getStringByKey(rootBundle
, "UCARules", &rootRulesLength
, &errorCode
);
86 if(U_FAILURE(errorCode
)) {
87 ures_close(rootBundle
);
91 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES
, ucol_res_cleanup
);
95 CollationLoader::appendRootRules(UnicodeString
&s
) {
96 UErrorCode errorCode
= U_ZERO_ERROR
;
97 umtx_initOnce(gInitOnce
, CollationLoader::loadRootRules
, errorCode
);
98 if(U_SUCCESS(errorCode
)) {
99 s
.append(rootRules
, rootRulesLength
);
104 CollationLoader::loadRules(const char *localeID
, const char *collationType
, UErrorCode
&errorCode
) {
105 if(U_FAILURE(errorCode
)) { return NULL
; }
106 U_ASSERT(collationType
!= NULL
&& *collationType
!= 0);
108 LocalUResourceBundlePointer
bundle(ures_open(U_ICUDATA_COLL
, localeID
, &errorCode
));
109 LocalUResourceBundlePointer
collations(
110 ures_getByKey(bundle
.getAlias(), "collations", NULL
, &errorCode
));
111 LocalUResourceBundlePointer
data(
112 ures_getByKeyWithFallback(collations
.getAlias(), collationType
, NULL
, &errorCode
));
114 const UChar
*s
= ures_getStringByKey(data
.getAlias(), "Sequence", &length
, &errorCode
);
115 if(U_FAILURE(errorCode
)) { return NULL
; }
117 // No string pointer aliasing so that we need not hold onto the resource bundle.
118 UnicodeString
*rules
= new UnicodeString(s
, length
);
120 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
126 const CollationTailoring
*
127 CollationLoader::loadTailoring(const Locale
&locale
, Locale
&validLocale
, UErrorCode
&errorCode
) {
128 const CollationTailoring
*root
= CollationRoot::getRoot(errorCode
);
129 if(U_FAILURE(errorCode
)) { return NULL
; }
130 const char *name
= locale
.getName();
131 if(*name
== 0 || uprv_strcmp(name
, "root") == 0) {
132 validLocale
= Locale::getRoot();
136 LocalUResourceBundlePointer
bundle(ures_open(U_ICUDATA_COLL
, name
, &errorCode
));
137 if(errorCode
== U_MISSING_RESOURCE_ERROR
) {
138 errorCode
= U_USING_DEFAULT_WARNING
;
139 validLocale
= Locale::getRoot();
142 const char *vLocale
= ures_getLocaleByType(bundle
.getAlias(), ULOC_ACTUAL_LOCALE
, &errorCode
);
143 if(U_FAILURE(errorCode
)) { return NULL
; }
144 validLocale
= Locale(vLocale
);
146 // There are zero or more tailorings in the collations table.
147 LocalUResourceBundlePointer
collations(
148 ures_getByKey(bundle
.getAlias(), "collations", NULL
, &errorCode
));
149 if(errorCode
== U_MISSING_RESOURCE_ERROR
) {
150 errorCode
= U_USING_DEFAULT_WARNING
;
153 if(U_FAILURE(errorCode
)) { return NULL
; }
155 // Fetch the collation type from the locale ID and the default type from the data.
157 int32_t typeLength
= locale
.getKeywordValue("collation", type
, LENGTHOF(type
) - 1, errorCode
);
158 if(U_FAILURE(errorCode
)) {
159 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
162 type
[typeLength
] = 0; // in case of U_NOT_TERMINATED_WARNING
163 char defaultType
[16];
165 UErrorCode internalErrorCode
= U_ZERO_ERROR
;
166 LocalUResourceBundlePointer
def(
167 ures_getByKeyWithFallback(collations
.getAlias(), "default", NULL
,
168 &internalErrorCode
));
170 const UChar
*s
= ures_getString(def
.getAlias(), &length
, &internalErrorCode
);
171 if(U_SUCCESS(internalErrorCode
) && length
< LENGTHOF(defaultType
)) {
172 u_UCharsToChars(s
, defaultType
, length
+ 1);
174 uprv_strcpy(defaultType
, "standard");
177 if(typeLength
== 0 || uprv_strcmp(type
, "default") == 0) {
178 uprv_strcpy(type
, defaultType
);
181 // Load the collations/type tailoring, with type fallback.
182 UBool typeFallback
= FALSE
;
183 LocalUResourceBundlePointer
data(
184 ures_getByKeyWithFallback(collations
.getAlias(), type
, NULL
, &errorCode
));
185 if(errorCode
== U_MISSING_RESOURCE_ERROR
&&
186 typeLength
> 6 && uprv_strncmp(type
, "search", 6) == 0) {
187 // fall back from something like "searchjl" to "search"
190 errorCode
= U_ZERO_ERROR
;
192 ures_getByKeyWithFallback(collations
.getAlias(), type
, NULL
, &errorCode
));
194 if(errorCode
== U_MISSING_RESOURCE_ERROR
&& uprv_strcmp(type
, defaultType
) != 0) {
195 // fall back to the default type
197 uprv_strcpy(type
, defaultType
);
198 errorCode
= U_ZERO_ERROR
;
200 ures_getByKeyWithFallback(collations
.getAlias(), type
, NULL
, &errorCode
));
202 if(errorCode
== U_MISSING_RESOURCE_ERROR
&& uprv_strcmp(type
, "standard") != 0) {
203 // fall back to the "standard" type
205 uprv_strcpy(type
, "standard");
206 errorCode
= U_ZERO_ERROR
;
208 ures_getByKeyWithFallback(collations
.getAlias(), type
, NULL
, &errorCode
));
210 if(errorCode
== U_MISSING_RESOURCE_ERROR
) {
211 errorCode
= U_USING_DEFAULT_WARNING
;
214 if(U_FAILURE(errorCode
)) { return NULL
; }
216 LocalPointer
<CollationTailoring
> t(new CollationTailoring(root
->settings
));
217 if(t
.isNull() || t
->isBogus()) {
218 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
222 // Is this the same as the root collator? If so, then use that instead.
223 const char *actualLocale
= ures_getLocaleByType(data
.getAlias(), ULOC_ACTUAL_LOCALE
, &errorCode
);
224 if(U_FAILURE(errorCode
)) { return NULL
; }
225 if((*actualLocale
== 0 || uprv_strcmp(actualLocale
, "root") == 0) &&
226 uprv_strcmp(type
, "standard") == 0) {
228 errorCode
= U_USING_DEFAULT_WARNING
;
232 t
->actualLocale
= Locale(actualLocale
);
235 LocalUResourceBundlePointer
binary(
236 ures_getByKey(data
.getAlias(), "%%CollationBin", NULL
, &errorCode
));
237 // Note: U_MISSING_RESOURCE_ERROR --> The old code built from rules if available
238 // but that created undesirable dependencies.
240 const uint8_t *inBytes
= ures_getBinary(binary
.getAlias(), &length
, &errorCode
);
241 if(U_FAILURE(errorCode
)) { return NULL
; }
242 CollationDataReader::read(root
, inBytes
, length
, *t
, errorCode
);
243 // Note: U_COLLATOR_VERSION_MISMATCH --> The old code built from rules if available
244 // but that created undesirable dependencies.
245 if(U_FAILURE(errorCode
)) { return NULL
; }
247 // Try to fetch the optional rules string.
249 UErrorCode internalErrorCode
= U_ZERO_ERROR
;
251 const UChar
*s
= ures_getStringByKey(data
.getAlias(), "Sequence", &length
,
253 if(U_SUCCESS(errorCode
)) {
254 t
->rules
.setTo(TRUE
, s
, length
);
258 // Set the collation types on the informational locales,
259 // except when they match the default types (for brevity and backwards compatibility).
260 // For the valid locale, suppress the default type.
261 if(uprv_strcmp(type
, defaultType
) != 0) {
262 validLocale
.setKeywordValue("collation", type
, errorCode
);
263 if(U_FAILURE(errorCode
)) { return NULL
; }
266 // For the actual locale, suppress the default type *according to the actual locale*.
267 // For example, zh has default=pinyin and contains all of the Chinese tailorings.
268 // zh_Hant has default=stroke but has no other data.
269 // For the valid locale "zh_Hant" we need to suppress stroke.
270 // For the actual locale "zh" we need to suppress pinyin instead.
271 if(uprv_strcmp(actualLocale
, vLocale
) != 0) {
272 // Opening a bundle for the actual locale should always succeed.
273 LocalUResourceBundlePointer
actualBundle(
274 ures_open(U_ICUDATA_COLL
, actualLocale
, &errorCode
));
275 if(U_FAILURE(errorCode
)) { return NULL
; }
276 UErrorCode internalErrorCode
= U_ZERO_ERROR
;
277 LocalUResourceBundlePointer
def(
278 ures_getByKeyWithFallback(actualBundle
.getAlias(), "collations/default", NULL
,
279 &internalErrorCode
));
281 const UChar
*s
= ures_getString(def
.getAlias(), &length
, &internalErrorCode
);
282 if(U_SUCCESS(internalErrorCode
) && length
< LENGTHOF(defaultType
)) {
283 u_UCharsToChars(s
, defaultType
, length
+ 1);
285 uprv_strcpy(defaultType
, "standard");
288 if(uprv_strcmp(type
, defaultType
) != 0) {
289 t
->actualLocale
.setKeywordValue("collation", type
, errorCode
);
290 if(U_FAILURE(errorCode
)) { return NULL
; }
294 errorCode
= U_USING_DEFAULT_WARNING
;
296 t
->bundle
= bundle
.orphan();
305 ucol_open(const char *loc
,
310 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN
);
311 UTRACE_DATA1(UTRACE_INFO
, "locale = \"%s\"", loc
);
312 UCollator
*result
= NULL
;
314 Collator
*coll
= Collator::createInstance(loc
, *status
);
315 if(U_SUCCESS(*status
)) {
316 result
= coll
->toUCollator();
318 UTRACE_EXIT_PTR_STATUS(result
, *status
);
323 U_CAPI
int32_t U_EXPORT2
324 ucol_getDisplayName( const char *objLoc
,
327 int32_t resultLength
,
332 if(U_FAILURE(*status
)) return -1;
334 if(!(result
==NULL
&& resultLength
==0)) {
335 // NULL destination for pure preflighting: empty dummy string
336 // otherwise, alias the destination buffer
337 dst
.setTo(result
, 0, resultLength
);
339 Collator::getDisplayName(Locale(objLoc
), Locale(dispLoc
), dst
);
340 return dst
.extract(result
, resultLength
, *status
);
343 U_CAPI
const char* U_EXPORT2
344 ucol_getAvailable(int32_t index
)
347 const Locale
*loc
= Collator::getAvailableLocales(count
);
348 if (loc
!= NULL
&& index
< count
) {
349 return loc
[index
].getName();
354 U_CAPI
int32_t U_EXPORT2
355 ucol_countAvailable()
358 Collator::getAvailableLocales(count
);
362 #if !UCONFIG_NO_SERVICE
363 U_CAPI UEnumeration
* U_EXPORT2
364 ucol_openAvailableLocales(UErrorCode
*status
) {
367 // This is a wrapper over Collator::getAvailableLocales()
368 if (U_FAILURE(*status
)) {
371 StringEnumeration
*s
= icu::Collator::getAvailableLocales();
373 *status
= U_MEMORY_ALLOCATION_ERROR
;
376 return uenum_openFromStringEnumeration(s
, status
);
380 // Note: KEYWORDS[0] != RESOURCE_NAME - alan
382 static const char RESOURCE_NAME
[] = "collations";
384 static const char* const KEYWORDS
[] = { "collation" };
386 #define KEYWORD_COUNT LENGTHOF(KEYWORDS)
388 U_CAPI UEnumeration
* U_EXPORT2
389 ucol_getKeywords(UErrorCode
*status
) {
390 UEnumeration
*result
= NULL
;
391 if (U_SUCCESS(*status
)) {
392 return uenum_openCharStringsEnumeration(KEYWORDS
, KEYWORD_COUNT
, status
);
397 U_CAPI UEnumeration
* U_EXPORT2
398 ucol_getKeywordValues(const char *keyword
, UErrorCode
*status
) {
399 if (U_FAILURE(*status
)) {
402 // hard-coded to accept exactly one collation keyword
403 // modify if additional collation keyword is added later
404 if (keyword
==NULL
|| uprv_strcmp(keyword
, KEYWORDS
[0])!=0)
406 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
409 return ures_getKeywordValues(U_ICUDATA_COLL
, RESOURCE_NAME
, status
);
412 static const UEnumeration defaultKeywordValues
= {
415 ulist_close_keyword_values_iterator
,
416 ulist_count_keyword_values
,
418 ulist_next_keyword_value
,
419 ulist_reset_keyword_values_iterator
424 U_CAPI UEnumeration
* U_EXPORT2
425 ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale
,
426 UBool
/*commonlyUsed*/, UErrorCode
* status
) {
427 /* Get the locale base name. */
428 char localeBuffer
[ULOC_FULLNAME_CAPACITY
] = "";
429 uloc_getBaseName(locale
, localeBuffer
, sizeof(localeBuffer
), status
);
431 /* Create the 2 lists
432 * -values is the temp location for the keyword values
433 * -results hold the actual list used by the UEnumeration object
435 UList
*values
= ulist_createEmptyList(status
);
436 UList
*results
= ulist_createEmptyList(status
);
437 UEnumeration
*en
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
438 if (U_FAILURE(*status
) || en
== NULL
) {
440 *status
= U_MEMORY_ALLOCATION_ERROR
;
444 ulist_deleteList(values
);
445 ulist_deleteList(results
);
449 memcpy(en
, &defaultKeywordValues
, sizeof(UEnumeration
));
450 en
->context
= results
;
452 /* Open the resource bundle for collation with the given locale. */
453 UResourceBundle bundle
, collations
, collres
, defres
;
454 ures_initStackObject(&bundle
);
455 ures_initStackObject(&collations
);
456 ures_initStackObject(&collres
);
457 ures_initStackObject(&defres
);
459 ures_openFillIn(&bundle
, U_ICUDATA_COLL
, localeBuffer
, status
);
461 while (U_SUCCESS(*status
)) {
462 ures_getByKey(&bundle
, RESOURCE_NAME
, &collations
, status
);
463 ures_resetIterator(&collations
);
464 while (U_SUCCESS(*status
) && ures_hasNext(&collations
)) {
465 ures_getNextResource(&collations
, &collres
, status
);
466 const char *key
= ures_getKey(&collres
);
467 /* If the key is default, get the string and store it in results list only
468 * if results list is empty.
470 if (uprv_strcmp(key
, "default") == 0) {
471 if (ulist_getListSize(results
) == 0) {
472 char *defcoll
= (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY
);
473 int32_t defcollLength
= ULOC_KEYWORDS_CAPACITY
;
475 ures_getNextResource(&collres
, &defres
, status
);
476 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
477 /* optimize - use the utf-8 string */
478 ures_getUTF8String(&defres
, defcoll
, &defcollLength
, TRUE
, status
);
481 const UChar
* defString
= ures_getString(&defres
, &defcollLength
, status
);
482 if(U_SUCCESS(*status
)) {
483 if(defcollLength
+1 > ULOC_KEYWORDS_CAPACITY
) {
484 *status
= U_BUFFER_OVERFLOW_ERROR
;
486 u_UCharsToChars(defString
, defcoll
, defcollLength
+1);
492 ulist_addItemBeginList(results
, defcoll
, TRUE
, status
);
495 ulist_addItemEndList(values
, key
, FALSE
, status
);
499 /* If the locale is "" this is root so exit. */
500 if (uprv_strlen(localeBuffer
) == 0) {
503 /* Get the parent locale and open a new resource bundle. */
504 uloc_getParent(localeBuffer
, localeBuffer
, sizeof(localeBuffer
), status
);
505 ures_openFillIn(&bundle
, U_ICUDATA_COLL
, localeBuffer
, status
);
509 ures_close(&collres
);
510 ures_close(&collations
);
513 if (U_SUCCESS(*status
)) {
515 ulist_resetList(values
);
516 while ((value
= (char *)ulist_getNext(values
)) != NULL
) {
517 if (!ulist_containsString(results
, value
, (int32_t)uprv_strlen(value
))) {
518 ulist_addItemEndList(results
, value
, FALSE
, status
);
519 if (U_FAILURE(*status
)) {
526 ulist_deleteList(values
);
528 if (U_FAILURE(*status
)){
532 ulist_resetList(results
);
538 U_CAPI
int32_t U_EXPORT2
539 ucol_getFunctionalEquivalent(char* result
, int32_t resultCapacity
,
540 const char* keyword
, const char* locale
,
541 UBool
* isAvailable
, UErrorCode
* status
)
543 // N.B.: Resource name is "collations" but keyword is "collation"
544 return ures_getFunctionalEquivalent(result
, resultCapacity
, U_ICUDATA_COLL
,
545 "collations", keyword
, locale
,
546 isAvailable
, TRUE
, status
);
549 #endif /* #if !UCONFIG_NO_COLLATION */