]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/ucol_res.cpp
ICU-531.31.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_res.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucol_res.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * Description:
12 * This file contains dependencies that the collation run-time doesn't normally
13 * need. This mainly contains resource bundle usage and collation meta information
14 *
15 * Modification history
16 * Date Name Comments
17 * 1996-1999 various members of ICU team maintained C API for collation framework
18 * 02/16/2001 synwee Added internal method getPrevSpecialCE
19 * 03/01/2001 synwee Added maxexpansion functionality.
20 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
21 * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp
22 * 2012-2014 markus Rewritten in C++ again.
23 */
24
25 #include "unicode/utypes.h"
26
27 #if !UCONFIG_NO_COLLATION
28
29 #include "unicode/coll.h"
30 #include "unicode/localpointer.h"
31 #include "unicode/locid.h"
32 #include "unicode/tblcoll.h"
33 #include "unicode/ucol.h"
34 #include "unicode/uloc.h"
35 #include "unicode/unistr.h"
36 #include "unicode/ures.h"
37 #include "cmemory.h"
38 #include "cstring.h"
39 #include "collationdatareader.h"
40 #include "collationroot.h"
41 #include "collationtailoring.h"
42 #include "putilimp.h"
43 #include "uassert.h"
44 #include "ucln_in.h"
45 #include "ucol_imp.h"
46 #include "uenumimp.h"
47 #include "ulist.h"
48 #include "umutex.h"
49 #include "uresimp.h"
50 #include "ustrenum.h"
51 #include "utracimp.h"
52
53 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
54
55 U_NAMESPACE_BEGIN
56
57 namespace {
58
59 static const UChar *rootRules = NULL;
60 static int32_t rootRulesLength = 0;
61 static UResourceBundle *rootBundle = NULL;
62 static UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
63
64 } // namespace
65
66 U_CDECL_BEGIN
67
68 static UBool U_CALLCONV
69 ucol_res_cleanup() {
70 rootRules = NULL;
71 rootRulesLength = 0;
72 ures_close(rootBundle);
73 rootBundle = NULL;
74 gInitOnce.reset();
75 return TRUE;
76 }
77
78 U_CDECL_END
79
80 void
81 CollationLoader::loadRootRules(UErrorCode &errorCode) {
82 if(U_FAILURE(errorCode)) { return; }
83 rootBundle = ures_open(U_ICUDATA_COLL, kRootLocaleName, &errorCode);
84 if(U_FAILURE(errorCode)) { return; }
85 rootRules = ures_getStringByKey(rootBundle, "UCARules", &rootRulesLength, &errorCode);
86 if(U_FAILURE(errorCode)) {
87 ures_close(rootBundle);
88 rootBundle = NULL;
89 return;
90 }
91 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup);
92 }
93
94 void
95 CollationLoader::appendRootRules(UnicodeString &s) {
96 UErrorCode errorCode = U_ZERO_ERROR;
97 umtx_initOnce(gInitOnce, CollationLoader::loadRootRules, errorCode);
98 if(U_SUCCESS(errorCode)) {
99 s.append(rootRules, rootRulesLength);
100 }
101 }
102
103 UnicodeString *
104 CollationLoader::loadRules(const char *localeID, const char *collationType, UErrorCode &errorCode) {
105 if(U_FAILURE(errorCode)) { return NULL; }
106 U_ASSERT(collationType != NULL && *collationType != 0);
107
108 LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, localeID, &errorCode));
109 LocalUResourceBundlePointer collations(
110 ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode));
111 LocalUResourceBundlePointer data(
112 ures_getByKeyWithFallback(collations.getAlias(), collationType, NULL, &errorCode));
113 int32_t length;
114 const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length, &errorCode);
115 if(U_FAILURE(errorCode)) { return NULL; }
116
117 // No string pointer aliasing so that we need not hold onto the resource bundle.
118 UnicodeString *rules = new UnicodeString(s, length);
119 if(rules == NULL) {
120 errorCode = U_MEMORY_ALLOCATION_ERROR;
121 return NULL;
122 }
123 return rules;
124 }
125
126 const CollationTailoring *
127 CollationLoader::loadTailoring(const Locale &locale, Locale &validLocale, UErrorCode &errorCode) {
128 const CollationTailoring *root = CollationRoot::getRoot(errorCode);
129 if(U_FAILURE(errorCode)) { return NULL; }
130 const char *name = locale.getName();
131 if(*name == 0 || uprv_strcmp(name, "root") == 0) {
132 validLocale = Locale::getRoot();
133 return root;
134 }
135
136 LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, name, &errorCode));
137 if(errorCode == U_MISSING_RESOURCE_ERROR) {
138 errorCode = U_USING_DEFAULT_WARNING;
139 validLocale = Locale::getRoot();
140 return root;
141 }
142 const char *vLocale = ures_getLocaleByType(bundle.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode);
143 if(U_FAILURE(errorCode)) { return NULL; }
144 validLocale = Locale(vLocale);
145
146 // There are zero or more tailorings in the collations table.
147 LocalUResourceBundlePointer collations(
148 ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode));
149 if(errorCode == U_MISSING_RESOURCE_ERROR) {
150 errorCode = U_USING_DEFAULT_WARNING;
151 return root;
152 }
153 if(U_FAILURE(errorCode)) { return NULL; }
154
155 // Fetch the collation type from the locale ID and the default type from the data.
156 char type[16];
157 int32_t typeLength = locale.getKeywordValue("collation", type, LENGTHOF(type) - 1, errorCode);
158 if(U_FAILURE(errorCode)) {
159 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
160 return NULL;
161 }
162 type[typeLength] = 0; // in case of U_NOT_TERMINATED_WARNING
163 char defaultType[16];
164 {
165 UErrorCode internalErrorCode = U_ZERO_ERROR;
166 LocalUResourceBundlePointer def(
167 ures_getByKeyWithFallback(collations.getAlias(), "default", NULL,
168 &internalErrorCode));
169 int32_t length;
170 const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode);
171 if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) {
172 u_UCharsToChars(s, defaultType, length + 1);
173 } else {
174 uprv_strcpy(defaultType, "standard");
175 }
176 }
177 if(typeLength == 0 || uprv_strcmp(type, "default") == 0) {
178 uprv_strcpy(type, defaultType);
179 }
180
181 // Load the collations/type tailoring, with type fallback.
182 UBool typeFallback = FALSE;
183 LocalUResourceBundlePointer data(
184 ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
185 if(errorCode == U_MISSING_RESOURCE_ERROR &&
186 typeLength > 6 && uprv_strncmp(type, "search", 6) == 0) {
187 // fall back from something like "searchjl" to "search"
188 typeFallback = TRUE;
189 type[6] = 0;
190 errorCode = U_ZERO_ERROR;
191 data.adoptInstead(
192 ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
193 }
194 if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, defaultType) != 0) {
195 // fall back to the default type
196 typeFallback = TRUE;
197 uprv_strcpy(type, defaultType);
198 errorCode = U_ZERO_ERROR;
199 data.adoptInstead(
200 ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
201 }
202 if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, "standard") != 0) {
203 // fall back to the "standard" type
204 typeFallback = TRUE;
205 uprv_strcpy(type, "standard");
206 errorCode = U_ZERO_ERROR;
207 data.adoptInstead(
208 ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
209 }
210 if(errorCode == U_MISSING_RESOURCE_ERROR) {
211 errorCode = U_USING_DEFAULT_WARNING;
212 return root;
213 }
214 if(U_FAILURE(errorCode)) { return NULL; }
215
216 LocalPointer<CollationTailoring> t(new CollationTailoring(root->settings));
217 if(t.isNull() || t->isBogus()) {
218 errorCode = U_MEMORY_ALLOCATION_ERROR;
219 return NULL;
220 }
221
222 // Is this the same as the root collator? If so, then use that instead.
223 const char *actualLocale = ures_getLocaleByType(data.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode);
224 if(U_FAILURE(errorCode)) { return NULL; }
225 if((*actualLocale == 0 || uprv_strcmp(actualLocale, "root") == 0) &&
226 uprv_strcmp(type, "standard") == 0) {
227 if(typeFallback) {
228 errorCode = U_USING_DEFAULT_WARNING;
229 }
230 return root;
231 }
232 t->actualLocale = Locale(actualLocale);
233
234 // deserialize
235 LocalUResourceBundlePointer binary(
236 ures_getByKey(data.getAlias(), "%%CollationBin", NULL, &errorCode));
237 // Note: U_MISSING_RESOURCE_ERROR --> The old code built from rules if available
238 // but that created undesirable dependencies.
239 int32_t length;
240 const uint8_t *inBytes = ures_getBinary(binary.getAlias(), &length, &errorCode);
241 if(U_FAILURE(errorCode)) { return NULL; }
242 CollationDataReader::read(root, inBytes, length, *t, errorCode);
243 // Note: U_COLLATOR_VERSION_MISMATCH --> The old code built from rules if available
244 // but that created undesirable dependencies.
245 if(U_FAILURE(errorCode)) { return NULL; }
246
247 // Try to fetch the optional rules string.
248 {
249 UErrorCode internalErrorCode = U_ZERO_ERROR;
250 int32_t length;
251 const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length,
252 &internalErrorCode);
253 if(U_SUCCESS(errorCode)) {
254 t->rules.setTo(TRUE, s, length);
255 }
256 }
257
258 // Set the collation types on the informational locales,
259 // except when they match the default types (for brevity and backwards compatibility).
260 // For the valid locale, suppress the default type.
261 if(uprv_strcmp(type, defaultType) != 0) {
262 validLocale.setKeywordValue("collation", type, errorCode);
263 if(U_FAILURE(errorCode)) { return NULL; }
264 }
265
266 // For the actual locale, suppress the default type *according to the actual locale*.
267 // For example, zh has default=pinyin and contains all of the Chinese tailorings.
268 // zh_Hant has default=stroke but has no other data.
269 // For the valid locale "zh_Hant" we need to suppress stroke.
270 // For the actual locale "zh" we need to suppress pinyin instead.
271 if(uprv_strcmp(actualLocale, vLocale) != 0) {
272 // Opening a bundle for the actual locale should always succeed.
273 LocalUResourceBundlePointer actualBundle(
274 ures_open(U_ICUDATA_COLL, actualLocale, &errorCode));
275 if(U_FAILURE(errorCode)) { return NULL; }
276 UErrorCode internalErrorCode = U_ZERO_ERROR;
277 LocalUResourceBundlePointer def(
278 ures_getByKeyWithFallback(actualBundle.getAlias(), "collations/default", NULL,
279 &internalErrorCode));
280 int32_t length;
281 const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode);
282 if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) {
283 u_UCharsToChars(s, defaultType, length + 1);
284 } else {
285 uprv_strcpy(defaultType, "standard");
286 }
287 }
288 if(uprv_strcmp(type, defaultType) != 0) {
289 t->actualLocale.setKeywordValue("collation", type, errorCode);
290 if(U_FAILURE(errorCode)) { return NULL; }
291 }
292
293 if(typeFallback) {
294 errorCode = U_USING_DEFAULT_WARNING;
295 }
296 t->bundle = bundle.orphan();
297 return t.orphan();
298 }
299
300 U_NAMESPACE_END
301
302 U_NAMESPACE_USE
303
304 U_CAPI UCollator*
305 ucol_open(const char *loc,
306 UErrorCode *status)
307 {
308 U_NAMESPACE_USE
309
310 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
311 UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
312 UCollator *result = NULL;
313
314 Collator *coll = Collator::createInstance(loc, *status);
315 if(U_SUCCESS(*status)) {
316 result = coll->toUCollator();
317 }
318 UTRACE_EXIT_PTR_STATUS(result, *status);
319 return result;
320 }
321
322
323 U_CAPI int32_t U_EXPORT2
324 ucol_getDisplayName( const char *objLoc,
325 const char *dispLoc,
326 UChar *result,
327 int32_t resultLength,
328 UErrorCode *status)
329 {
330 U_NAMESPACE_USE
331
332 if(U_FAILURE(*status)) return -1;
333 UnicodeString dst;
334 if(!(result==NULL && resultLength==0)) {
335 // NULL destination for pure preflighting: empty dummy string
336 // otherwise, alias the destination buffer
337 dst.setTo(result, 0, resultLength);
338 }
339 Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
340 return dst.extract(result, resultLength, *status);
341 }
342
343 U_CAPI const char* U_EXPORT2
344 ucol_getAvailable(int32_t index)
345 {
346 int32_t count = 0;
347 const Locale *loc = Collator::getAvailableLocales(count);
348 if (loc != NULL && index < count) {
349 return loc[index].getName();
350 }
351 return NULL;
352 }
353
354 U_CAPI int32_t U_EXPORT2
355 ucol_countAvailable()
356 {
357 int32_t count = 0;
358 Collator::getAvailableLocales(count);
359 return count;
360 }
361
362 #if !UCONFIG_NO_SERVICE
363 U_CAPI UEnumeration* U_EXPORT2
364 ucol_openAvailableLocales(UErrorCode *status) {
365 U_NAMESPACE_USE
366
367 // This is a wrapper over Collator::getAvailableLocales()
368 if (U_FAILURE(*status)) {
369 return NULL;
370 }
371 StringEnumeration *s = icu::Collator::getAvailableLocales();
372 if (s == NULL) {
373 *status = U_MEMORY_ALLOCATION_ERROR;
374 return NULL;
375 }
376 return uenum_openFromStringEnumeration(s, status);
377 }
378 #endif
379
380 // Note: KEYWORDS[0] != RESOURCE_NAME - alan
381
382 static const char RESOURCE_NAME[] = "collations";
383
384 static const char* const KEYWORDS[] = { "collation" };
385
386 #define KEYWORD_COUNT LENGTHOF(KEYWORDS)
387
388 U_CAPI UEnumeration* U_EXPORT2
389 ucol_getKeywords(UErrorCode *status) {
390 UEnumeration *result = NULL;
391 if (U_SUCCESS(*status)) {
392 return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
393 }
394 return result;
395 }
396
397 U_CAPI UEnumeration* U_EXPORT2
398 ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
399 if (U_FAILURE(*status)) {
400 return NULL;
401 }
402 // hard-coded to accept exactly one collation keyword
403 // modify if additional collation keyword is added later
404 if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0)
405 {
406 *status = U_ILLEGAL_ARGUMENT_ERROR;
407 return NULL;
408 }
409 return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
410 }
411
412 static const UEnumeration defaultKeywordValues = {
413 NULL,
414 NULL,
415 ulist_close_keyword_values_iterator,
416 ulist_count_keyword_values,
417 uenum_unextDefault,
418 ulist_next_keyword_value,
419 ulist_reset_keyword_values_iterator
420 };
421
422 #include <stdio.h>
423
424 U_CAPI UEnumeration* U_EXPORT2
425 ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale,
426 UBool /*commonlyUsed*/, UErrorCode* status) {
427 /* Get the locale base name. */
428 char localeBuffer[ULOC_FULLNAME_CAPACITY] = "";
429 uloc_getBaseName(locale, localeBuffer, sizeof(localeBuffer), status);
430
431 /* Create the 2 lists
432 * -values is the temp location for the keyword values
433 * -results hold the actual list used by the UEnumeration object
434 */
435 UList *values = ulist_createEmptyList(status);
436 UList *results = ulist_createEmptyList(status);
437 UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
438 if (U_FAILURE(*status) || en == NULL) {
439 if (en == NULL) {
440 *status = U_MEMORY_ALLOCATION_ERROR;
441 } else {
442 uprv_free(en);
443 }
444 ulist_deleteList(values);
445 ulist_deleteList(results);
446 return NULL;
447 }
448
449 memcpy(en, &defaultKeywordValues, sizeof(UEnumeration));
450 en->context = results;
451
452 /* Open the resource bundle for collation with the given locale. */
453 UResourceBundle bundle, collations, collres, defres;
454 ures_initStackObject(&bundle);
455 ures_initStackObject(&collations);
456 ures_initStackObject(&collres);
457 ures_initStackObject(&defres);
458
459 ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status);
460
461 while (U_SUCCESS(*status)) {
462 ures_getByKey(&bundle, RESOURCE_NAME, &collations, status);
463 ures_resetIterator(&collations);
464 while (U_SUCCESS(*status) && ures_hasNext(&collations)) {
465 ures_getNextResource(&collations, &collres, status);
466 const char *key = ures_getKey(&collres);
467 /* If the key is default, get the string and store it in results list only
468 * if results list is empty.
469 */
470 if (uprv_strcmp(key, "default") == 0) {
471 if (ulist_getListSize(results) == 0) {
472 char *defcoll = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY);
473 int32_t defcollLength = ULOC_KEYWORDS_CAPACITY;
474
475 ures_getNextResource(&collres, &defres, status);
476 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
477 /* optimize - use the utf-8 string */
478 ures_getUTF8String(&defres, defcoll, &defcollLength, TRUE, status);
479 #else
480 {
481 const UChar* defString = ures_getString(&defres, &defcollLength, status);
482 if(U_SUCCESS(*status)) {
483 if(defcollLength+1 > ULOC_KEYWORDS_CAPACITY) {
484 *status = U_BUFFER_OVERFLOW_ERROR;
485 } else {
486 u_UCharsToChars(defString, defcoll, defcollLength+1);
487 }
488 }
489 }
490 #endif
491
492 ulist_addItemBeginList(results, defcoll, TRUE, status);
493 }
494 } else {
495 ulist_addItemEndList(values, key, FALSE, status);
496 }
497 }
498
499 /* If the locale is "" this is root so exit. */
500 if (uprv_strlen(localeBuffer) == 0) {
501 break;
502 }
503 /* Get the parent locale and open a new resource bundle. */
504 uloc_getParent(localeBuffer, localeBuffer, sizeof(localeBuffer), status);
505 ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status);
506 }
507
508 ures_close(&defres);
509 ures_close(&collres);
510 ures_close(&collations);
511 ures_close(&bundle);
512
513 if (U_SUCCESS(*status)) {
514 char *value = NULL;
515 ulist_resetList(values);
516 while ((value = (char *)ulist_getNext(values)) != NULL) {
517 if (!ulist_containsString(results, value, (int32_t)uprv_strlen(value))) {
518 ulist_addItemEndList(results, value, FALSE, status);
519 if (U_FAILURE(*status)) {
520 break;
521 }
522 }
523 }
524 }
525
526 ulist_deleteList(values);
527
528 if (U_FAILURE(*status)){
529 uenum_close(en);
530 en = NULL;
531 } else {
532 ulist_resetList(results);
533 }
534
535 return en;
536 }
537
538 U_CAPI int32_t U_EXPORT2
539 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
540 const char* keyword, const char* locale,
541 UBool* isAvailable, UErrorCode* status)
542 {
543 // N.B.: Resource name is "collations" but keyword is "collation"
544 return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
545 "collations", keyword, locale,
546 isAvailable, TRUE, status);
547 }
548
549 #endif /* #if !UCONFIG_NO_COLLATION */