ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / common / locutil.cpp
CommitLineData
4388f060 1/*
73c04bcf 2 *******************************************************************************
b331163b 3 * Copyright (C) 2002-2014, International Business Machines Corporation and
729e4ab9 4 * others. All Rights Reserved.
73c04bcf
A
5 *******************************************************************************
6 */
7#include "unicode/utypes.h"
8
9#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
10
11#include "unicode/resbund.h"
12#include "cmemory.h"
13#include "ustrfmt.h"
14#include "locutil.h"
15#include "charstr.h"
16#include "ucln_cmn.h"
17#include "uassert.h"
18#include "umutex.h"
19
20// see LocaleUtility::getAvailableLocaleNames
b331163b 21static icu::UInitOnce LocaleUtilityInitOnce = U_INITONCE_INITIALIZER;
4388f060 22static icu::Hashtable * LocaleUtility_cache = NULL;
73c04bcf
A
23
24#define UNDERSCORE_CHAR ((UChar)0x005f)
25#define AT_SIGN_CHAR ((UChar)64)
26#define PERIOD_CHAR ((UChar)46)
27
28/*
29 ******************************************************************
30 */
31
32/**
33 * Release all static memory held by Locale Utility.
34 */
35U_CDECL_BEGIN
36static UBool U_CALLCONV service_cleanup(void) {
37 if (LocaleUtility_cache) {
38 delete LocaleUtility_cache;
39 LocaleUtility_cache = NULL;
40 }
41 return TRUE;
42}
b331163b
A
43
44
45static void U_CALLCONV locale_utility_init(UErrorCode &status) {
46 using namespace icu;
47 U_ASSERT(LocaleUtility_cache == NULL);
48 ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
49 LocaleUtility_cache = new Hashtable(status);
50 if (U_FAILURE(status)) {
51 delete LocaleUtility_cache;
52 LocaleUtility_cache = NULL;
53 return;
54 }
55 if (LocaleUtility_cache == NULL) {
56 status = U_MEMORY_ALLOCATION_ERROR;
57 return;
58 }
59 LocaleUtility_cache->setValueDeleter(uhash_deleteHashtable);
60}
61
73c04bcf
A
62U_CDECL_END
63
64U_NAMESPACE_BEGIN
65
66UnicodeString&
67LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
68{
69 if (id == NULL) {
70 result.setToBogus();
71 } else {
72 // Fix case only (no other changes) up to the first '@' or '.' or
73 // end of string, whichever comes first. In 3.0 I changed this to
74 // stop at first '@' or '.'. It used to run out to the end of
75 // string. My fix makes the tests pass but is probably
76 // structurally incorrect. See below. [alan 3.0]
77
78 // TODO: Doug, you might want to revise this...
79 result = *id;
80 int32_t i = 0;
81 int32_t end = result.indexOf(AT_SIGN_CHAR);
82 int32_t n = result.indexOf(PERIOD_CHAR);
83 if (n >= 0 && n < end) {
84 end = n;
85 }
86 if (end < 0) {
87 end = result.length();
88 }
89 n = result.indexOf(UNDERSCORE_CHAR);
90 if (n < 0) {
91 n = end;
92 }
93 for (; i < n; ++i) {
94 UChar c = result.charAt(i);
95 if (c >= 0x0041 && c <= 0x005a) {
96 c += 0x20;
97 result.setCharAt(i, c);
98 }
99 }
100 for (n = end; i < n; ++i) {
101 UChar c = result.charAt(i);
102 if (c >= 0x0061 && c <= 0x007a) {
103 c -= 0x20;
104 result.setCharAt(i, c);
105 }
106 }
107 }
108 return result;
109
110#if 0
111 // This code does a proper full level 2 canonicalization of id.
112 // It's nasty to go from UChar to char to char to UChar -- but
113 // that's what you have to do to use the uloc_canonicalize
114 // function on UnicodeStrings.
115
116 // I ended up doing the alternate fix (see above) not for
117 // performance reasons, although performance will certainly be
118 // better, but because doing a full level 2 canonicalization
119 // causes some tests to fail. [alan 3.0]
120
121 // TODO: Doug, you might want to revisit this...
122 result.setToBogus();
123 if (id != 0) {
124 int32_t buflen = id->length() + 8; // space for NUL
125 char* buf = (char*) uprv_malloc(buflen);
126 char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen);
127 if (buf != 0 && canon != 0) {
128 U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen);
129 UErrorCode ec = U_ZERO_ERROR;
130 uloc_canonicalize(buf, canon, buflen, &ec);
131 if (U_SUCCESS(ec)) {
132 result = UnicodeString(canon);
133 }
134 }
135 uprv_free(buf);
136 uprv_free(canon);
137 }
138 return result;
139#endif
140}
141
142Locale&
143LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
144{
145 enum { BUFLEN = 128 }; // larger than ever needed
146
147 if (id.isBogus() || id.length() >= BUFLEN) {
148 result.setToBogus();
149 } else {
150 /*
151 * We need to convert from a UnicodeString to char * in order to
152 * create a Locale.
153 *
154 * Problem: Locale ID strings may contain '@' which is a variant
155 * character and cannot be handled by invariant-character conversion.
156 *
157 * Hack: Since ICU code can handle locale IDs with multiple encodings
158 * of '@' (at least for EBCDIC; it's not known to be a problem for
159 * ASCII-based systems),
160 * we use regular invariant-character conversion for everything else
161 * and manually convert U+0040 into a compiler-char-constant '@'.
162 * While this compilation-time constant may not match the runtime
163 * encoding of '@', it should be one of the encodings which ICU
164 * recognizes.
165 *
166 * There should be only at most one '@' in a locale ID.
167 */
168 char buffer[BUFLEN];
169 int32_t prev, i;
170 prev = 0;
171 for(;;) {
172 i = id.indexOf((UChar)0x40, prev);
173 if(i < 0) {
174 // no @ between prev and the rest of the string
175 id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
176 break; // done
177 } else {
178 // normal invariant-character conversion for text between @s
179 id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
180 // manually "convert" U+0040 at id[i] into '@' at buffer[i]
181 buffer[i] = '@';
182 prev = i + 1;
183 }
184 }
185 result = Locale::createFromName(buffer);
186 }
187 return result;
188}
189
190UnicodeString&
191LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
192{
193 if (locale.isBogus()) {
194 result.setToBogus();
195 } else {
196 result.append(UnicodeString(locale.getName(), -1, US_INV));
197 }
198 return result;
199}
200
201const Hashtable*
202LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
203{
204 // LocaleUtility_cache is a hash-of-hashes. The top-level keys
205 // are path strings ('bundleID') passed to
206 // ures_openAvailableLocales. The top-level values are
207 // second-level hashes. The second-level keys are result strings
208 // from ures_openAvailableLocales. The second-level values are
209 // garbage ((void*)1 or other random pointer).
210
211 UErrorCode status = U_ZERO_ERROR;
b331163b
A
212 umtx_initOnce(LocaleUtilityInitOnce, locale_utility_init, status);
213 Hashtable *cache = LocaleUtility_cache;
73c04bcf 214 if (cache == NULL) {
b331163b
A
215 // Catastrophic failure.
216 return NULL;
73c04bcf
A
217 }
218
73c04bcf
A
219 Hashtable* htp;
220 umtx_lock(NULL);
221 htp = (Hashtable*) cache->get(bundleID);
222 umtx_unlock(NULL);
223
224 if (htp == NULL) {
225 htp = new Hashtable(status);
226 if (htp && U_SUCCESS(status)) {
729e4ab9
A
227 CharString cbundleID;
228 cbundleID.appendInvariantChars(bundleID, status);
229 const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data();
73c04bcf
A
230 UEnumeration *uenum = ures_openAvailableLocales(path, &status);
231 for (;;) {
232 const UChar* id = uenum_unext(uenum, NULL, &status);
233 if (id == NULL) {
234 break;
235 }
236 htp->put(UnicodeString(id), (void*)htp, status);
237 }
238 uenum_close(uenum);
239 if (U_FAILURE(status)) {
240 delete htp;
241 return NULL;
242 }
243 umtx_lock(NULL);
b331163b
A
244 Hashtable *t = static_cast<Hashtable *>(cache->get(bundleID));
245 if (t != NULL) {
246 // Another thread raced through this code, creating the cache entry first.
247 // Discard ours and return theirs.
248 umtx_unlock(NULL);
249 delete htp;
250 htp = t;
251 } else {
252 cache->put(bundleID, (void*)htp, status);
253 umtx_unlock(NULL);
254 }
73c04bcf
A
255 }
256 }
257 return htp;
258}
259
260UBool
261LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
262{
263 return child.indexOf(root) == 0 &&
264 (child.length() == root.length() ||
265 child.charAt(root.length()) == UNDERSCORE_CHAR);
266}
267
268U_NAMESPACE_END
269
270/* !UCONFIG_NO_SERVICE */
271#endif
272
273