]> git.saurik.com Git - apple/cf.git/blob - CFLocaleIdentifier.c
CF-635.21.tar.gz
[apple/cf.git] / CFLocaleIdentifier.c
1 /*
2 * Copyright (c) 2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 CFLocaleIdentifier.c
26 Copyright (c) 2002-2011, Apple Inc. All rights reserved.
27 Responsibility: David Smith
28
29 CFLocaleIdentifier.c defines
30 - enum value kLocaleIdentifierCStringMax
31 - structs KeyStringToResultString, SpecialCaseUpdates
32 and provides the following data for the functions
33 CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes,
34 CFLocaleCreateCanonicalLocaleIdentifierFromString
35 CFLocaleCreateCanonicalLanguageIdentifierFromString
36
37 1. static const char * regionCodeToLocaleString[]; enum kNumRegionCodeToLocaleString;
38 map RegionCode 0..kNumRegionCodeToLocaleString-1 to canonical locale string
39
40 2. static const char * langCodeToLocaleString[]; enum kNumLangCodeToLocaleString;
41 map LangCode 0..kNumLangCodeToLocaleString-1 to canonical locale string
42
43 3. static const KeyStringToResultString oldAppleLocaleToCanonical[]; enum kNumOldAppleLocaleToCanonical;
44 map old Apple string oldAppleLocaleToCanonical[n].key
45 to canonical locale string oldAppleLocaleToCanonical[n].result
46 for n = 0..kNumOldAppleLocaleToCanonical-1
47
48 4. static const KeyStringToResultString localeStringPrefixToCanonical[]; enum kNumLocaleStringPrefixToCanonical;
49 map non-canonical language prefix (3-letter, obsolete) localeStringPrefixToCanonical[].key
50 to updated replacement localeStringPrefixToCanonical[].result
51 for n = 0..kNumLocaleStringPrefixToCanonical-1
52
53 5. static const SpecialCaseUpdates specialCases[];
54 various special cases for updating region codes, or for updating language codes based on region codes
55
56 6. static const KeyStringToResultString localeStringRegionToDefaults[]; enum kNumLocaleStringRegionToDefaults;
57 map locale string region tag localeStringRegionToDefaults[n].key
58 to default substrings to delete localeStringRegionToDefaults[n].result
59 for n = 0..kNumLocaleStringRegionToDefaults-1
60
61 7. static const KeyStringToResultString localeStringPrefixToDefaults[]; enum kNumLocaleStringPrefixToDefaults;
62 map locale string initial part localeStringPrefixToDefaults[n].key
63 to default substrings to delete localeStringPrefixToDefaults[n].result
64 for n = 0..kNumLocaleStringPrefixToDefaults-1
65
66 8. static const KeyStringToResultString appleLocaleToLanguageString[]; enum kNumAppleLocaleToLanguageString;
67 map Apple locale string appleLocaleToLanguageString[].key
68 to equivalent language string appleLocaleToLanguageString[].result
69 for n = 0..kNumAppleLocaleToLanguageString-1
70
71 */
72
73 #include <CoreFoundation/CFString.h>
74 #include <CoreFoundation/CFCalendar.h>
75 #include <ctype.h>
76 #include <string.h>
77 #include <stdlib.h>
78 #include <stdio.h>
79 #include <unicode/uloc.h>
80 #include "CFInternal.h"
81 #include "CFLocaleInternal.h"
82
83 // Max byte length of locale identifier (ASCII) as C string, including terminating null byte
84 enum {
85 kLocaleIdentifierCStringMax = ULOC_FULLNAME_CAPACITY + ULOC_KEYWORD_AND_VALUES_CAPACITY // currently 56 + 100
86 };
87
88 // KeyStringToResultString struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString
89 struct KeyStringToResultString {
90 const char * key;
91 const char * result;
92 };
93 typedef struct KeyStringToResultString KeyStringToResultString;
94
95 // SpecialCaseUpdates struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString
96 struct SpecialCaseUpdates {
97 const char * lang;
98 const char * reg1;
99 const char * update1;
100 const char * reg2;
101 const char * update2;
102 };
103 typedef struct SpecialCaseUpdates SpecialCaseUpdates;
104
105
106 static const char * const regionCodeToLocaleString[] = {
107 // map RegionCode (array index) to canonical locale string
108 //
109 // canon. string region code; language code; [comment] [ # __CFBundleLocaleAbbreviationsArray
110 // -------- ------------ ------------------ ------------ -------- string, if different ]
111 "en_US", // 0 verUS; 0 langEnglish;
112 "fr_FR", // 1 verFrance; 1 langFrench;
113 "en_GB", // 2 verBritain; 0 langEnglish;
114 "de_DE", // 3 verGermany; 2 langGerman;
115 "it_IT", // 4 verItaly; 3 langItalian;
116 "nl_NL", // 5 verNetherlands; 4 langDutch;
117 "nl_BE", // 6 verFlemish; 34 langFlemish (redundant, =Dutch);
118 "sv_SE", // 7 verSweden; 5 langSwedish;
119 "es_ES", // 8 verSpain; 6 langSpanish;
120 "da_DK", // 9 verDenmark; 7 langDanish;
121 "pt_PT", // 10 verPortugal; 8 langPortuguese;
122 "fr_CA", // 11 verFrCanada; 1 langFrench;
123 "nb_NO", // 12 verNorway; 9 langNorwegian (Bokmal); # "no_NO"
124 "he_IL", // 13 verIsrael; 10 langHebrew;
125 "ja_JP", // 14 verJapan; 11 langJapanese;
126 "en_AU", // 15 verAustralia; 0 langEnglish;
127 "ar", // 16 verArabic; 12 langArabic;
128 "fi_FI", // 17 verFinland; 13 langFinnish;
129 "fr_CH", // 18 verFrSwiss; 1 langFrench;
130 "de_CH", // 19 verGrSwiss; 2 langGerman;
131 "el_GR", // 20 verGreece; 14 langGreek (modern)-Grek-mono;
132 "is_IS", // 21 verIceland; 15 langIcelandic;
133 "mt_MT", // 22 verMalta; 16 langMaltese;
134 "el_CY", // 23 verCyprus; 14 langGreek?; el or tr? guess el # ""
135 "tr_TR", // 24 verTurkey; 17 langTurkish;
136 "hr_HR", // 25 verYugoCroatian; 18 langCroatian; * one-way mapping -> verCroatia
137 "nl_NL", // 26 KCHR, Netherlands; 4 langDutch; * one-way mapping
138 "nl_BE", // 27 KCHR, verFlemish; 34 langFlemish; * one-way mapping
139 "_CA", // 28 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
140 "_CA", // 29 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
141 "pt_PT", // 30 KCHR, Portugal; 8 langPortuguese; * one-way mapping
142 "nb_NO", // 31 KCHR, Norway; 9 langNorwegian (Bokmal); * one-way mapping # "no_NO"
143 "da_DK", // 32 KCHR, Denmark; 7 langDanish; * one-way mapping
144 "hi_IN", // 33 verIndiaHindi; 21 langHindi;
145 "ur_PK", // 34 verPakistanUrdu; 20 langUrdu;
146 "tr_TR", // 35 verTurkishModified; 17 langTurkish; * one-way mapping
147 "it_CH", // 36 verItalianSwiss; 3 langItalian;
148 "en_001", // 37 verInternational; 0 langEnglish; ASCII only # "en"
149 NULL, // 38 *unassigned; -1 none; * one-way mapping # ""
150 "ro_RO", // 39 verRomania; 37 langRomanian;
151 "grc", // 40 verGreekAncient; 148 langGreekAncient -Grek-poly; # "el_GR"
152 "lt_LT", // 41 verLithuania; 24 langLithuanian;
153 "pl_PL", // 42 verPoland; 25 langPolish;
154 "hu_HU", // 43 verHungary; 26 langHungarian;
155 "et_EE", // 44 verEstonia; 27 langEstonian;
156 "lv_LV", // 45 verLatvia; 28 langLatvian;
157 "se", // 46 verSami; 29 langSami;
158 "fo_FO", // 47 verFaroeIsl; 30 langFaroese;
159 "fa_IR", // 48 verIran; 31 langFarsi/Persian;
160 "ru_RU", // 49 verRussia; 32 langRussian;
161 "ga_IE", // 50 verIreland; 35 langIrishGaelic (no dots);
162 "ko_KR", // 51 verKorea; 23 langKorean;
163 "zh_CN", // 52 verChina; 33 langSimpChinese;
164 "zh_TW", // 53 verTaiwan; 19 langTradChinese;
165 "th_TH", // 54 verThailand; 22 langThai;
166 "und", // 55 verScriptGeneric; -1 none; # "" // <1.9>
167 "cs_CZ", // 56 verCzech; 38 langCzech;
168 "sk_SK", // 57 verSlovak; 39 langSlovak;
169 "und", // 58 verEastAsiaGeneric; -1 none; * one-way mapping # "" // <1.9>
170 "hu_HU", // 59 verMagyar; 26 langHungarian; * one-way mapping -> verHungary
171 "bn", // 60 verBengali; 67 langBengali; _IN or _BD? guess generic
172 "be_BY", // 61 verBelarus; 46 langBelorussian;
173 "uk_UA", // 62 verUkraine; 45 langUkrainian;
174 NULL, // 63 *unused; -1 none; * one-way mapping # ""
175 "el_GR", // 64 verGreeceAlt; 14 langGreek (modern)-Grek-mono; * one-way mapping
176 "sr_RS", // 65 verSerbian; 42 langSerbian -Cyrl; // <1.18>
177 "sl_SI", // 66 verSlovenian; 40 langSlovenian;
178 "mk_MK", // 67 verMacedonian; 43 langMacedonian;
179 "hr_HR", // 68 verCroatia; 18 langCroatian;
180 NULL, // 69 *unused; -1 none; * one-way mapping # ""
181 "de-1996", // 70 verGermanReformed; 2 langGerman; 1996 orthogr. # "de_DE"
182 "pt_BR", // 71 verBrazil; 8 langPortuguese;
183 "bg_BG", // 72 verBulgaria; 44 langBulgarian;
184 "ca_ES", // 73 verCatalonia; 130 langCatalan;
185 "mul", // 74 verMultilingual; -1 none; # ""
186 "gd", // 75 verScottishGaelic; 144 langScottishGaelic;
187 "gv", // 76 verManxGaelic; 145 langManxGaelic;
188 "br", // 77 verBreton; 142 langBreton;
189 "iu_CA", // 78 verNunavut; 143 langInuktitut -Cans;
190 "cy", // 79 verWelsh; 128 langWelsh;
191 "_CA", // 80 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
192 "ga-Latg_IE", // 81 verIrishGaelicScrip; 146 langIrishGaelicScript -dots; # "ga_IE" // <xx>
193 "en_CA", // 82 verEngCanada; 0 langEnglish;
194 "dz_BT", // 83 verBhutan; 137 langDzongkha;
195 "hy_AM", // 84 verArmenian; 51 langArmenian;
196 "ka_GE", // 85 verGeorgian; 52 langGeorgian;
197 "es_419", // 86 verSpLatinAmerica; 6 langSpanish; # "es"
198 "es_ES", // 87 KCHR, Spain; 6 langSpanish; * one-way mapping
199 "to_TO", // 88 verTonga; 147 langTongan;
200 "pl_PL", // 89 KCHR, Poland; 25 langPolish; * one-way mapping
201 "ca_ES", // 90 KCHR, Catalonia; 130 langCatalan; * one-way mapping
202 "fr_001", // 91 verFrenchUniversal; 1 langFrench;
203 "de_AT", // 92 verAustria; 2 langGerman;
204 "es_419", // 93 > verSpLatinAmerica; 6 langSpanish; * one-way mapping # "es"
205 "gu_IN", // 94 verGujarati; 69 langGujarati;
206 "pa", // 95 verPunjabi; 70 langPunjabi; _IN or _PK? guess generic
207 "ur_IN", // 96 verIndiaUrdu; 20 langUrdu;
208 "vi_VN", // 97 verVietnam; 80 langVietnamese;
209 "fr_BE", // 98 verFrBelgium; 1 langFrench;
210 "uz_UZ", // 99 verUzbek; 47 langUzbek;
211 "en_SG", // 100 verSingapore; 0 langEnglish?; en, zh, or ms? guess en # ""
212 "nn_NO", // 101 verNynorsk; 151 langNynorsk; # ""
213 "af_ZA", // 102 verAfrikaans; 141 langAfrikaans;
214 "eo", // 103 verEsperanto; 94 langEsperanto;
215 "mr_IN", // 104 verMarathi; 66 langMarathi;
216 "bo", // 105 verTibetan; 63 langTibetan;
217 "ne_NP", // 106 verNepal; 64 langNepali;
218 "kl", // 107 verGreenland; 149 langGreenlandic;
219 "en_IE", // 108 verIrelandEnglish; 0 langEnglish; # (no entry)
220 };
221 enum {
222 kNumRegionCodeToLocaleString = sizeof(regionCodeToLocaleString)/sizeof(char *)
223 };
224
225 static const char * const langCodeToLocaleString[] = {
226 // map LangCode (array index) to canonical locale string
227 //
228 // canon. string language code; [ comment] [ # __CFBundleLanguageAbbreviationsArray
229 // -------- -------------- ---------- -------- string, if different ]
230 "en", // 0 langEnglish;
231 "fr", // 1 langFrench;
232 "de", // 2 langGerman;
233 "it", // 3 langItalian;
234 "nl", // 4 langDutch;
235 "sv", // 5 langSwedish;
236 "es", // 6 langSpanish;
237 "da", // 7 langDanish;
238 "pt", // 8 langPortuguese;
239 "nb", // 9 langNorwegian (Bokmal); # "no"
240 "he", // 10 langHebrew -Hebr;
241 "ja", // 11 langJapanese -Jpan;
242 "ar", // 12 langArabic -Arab;
243 "fi", // 13 langFinnish;
244 "el", // 14 langGreek (modern)-Grek-mono;
245 "is", // 15 langIcelandic;
246 "mt", // 16 langMaltese -Latn;
247 "tr", // 17 langTurkish -Latn;
248 "hr", // 18 langCroatian;
249 "zh-Hant", // 19 langTradChinese; # "zh"
250 "ur", // 20 langUrdu -Arab;
251 "hi", // 21 langHindi -Deva;
252 "th", // 22 langThai -Thai;
253 "ko", // 23 langKorean -Hang;
254 "lt", // 24 langLithuanian;
255 "pl", // 25 langPolish;
256 "hu", // 26 langHungarian;
257 "et", // 27 langEstonian;
258 "lv", // 28 langLatvian;
259 "se", // 29 langSami;
260 "fo", // 30 langFaroese;
261 "fa", // 31 langFarsi/Persian -Arab;
262 "ru", // 32 langRussian -Cyrl;
263 "zh-Hans", // 33 langSimpChinese; # "zh"
264 "nl-BE", // 34 langFlemish (redundant, =Dutch); # "nl"
265 "ga", // 35 langIrishGaelic (no dots);
266 "sq", // 36 langAlbanian; no region codes
267 "ro", // 37 langRomanian;
268 "cs", // 38 langCzech;
269 "sk", // 39 langSlovak;
270 "sl", // 40 langSlovenian;
271 "yi", // 41 langYiddish -Hebr; no region codes
272 "sr", // 42 langSerbian -Cyrl;
273 "mk", // 43 langMacedonian -Cyrl;
274 "bg", // 44 langBulgarian -Cyrl;
275 "uk", // 45 langUkrainian -Cyrl;
276 "be", // 46 langBelorussian -Cyrl;
277 "uz-Cyrl", // 47 langUzbek -Cyrl; also -Latn, -Arab
278 "kk", // 48 langKazakh -Cyrl; no region codes; also -Latn, -Arab
279 "az-Cyrl", // 49 langAzerbaijani -Cyrl; no region codes # "az"
280 "az-Arab", // 50 langAzerbaijanAr -Arab; no region codes # "az"
281 "hy", // 51 langArmenian -Armn;
282 "ka", // 52 langGeorgian -Geor;
283 "mo", // 53 langMoldavian -Cyrl; no region codes
284 "ky", // 54 langKirghiz -Cyrl; no region codes; also -Latn, -Arab
285 "tg-Cyrl", // 55 langTajiki -Cyrl; no region codes; also -Latn, -Arab
286 "tk-Cyrl", // 56 langTurkmen -Cyrl; no region codes; also -Latn, -Arab
287 "mn-Mong", // 57 langMongolian -Mong; no region codes # "mn"
288 "mn-Cyrl", // 58 langMongolianCyr -Cyrl; no region codes # "mn"
289 "ps", // 59 langPashto -Arab; no region codes
290 "ku", // 60 langKurdish -Arab; no region codes
291 "ks", // 61 langKashmiri -Arab; no region codes
292 "sd", // 62 langSindhi -Arab; no region codes
293 "bo", // 63 langTibetan -Tibt;
294 "ne", // 64 langNepali -Deva;
295 "sa", // 65 langSanskrit -Deva; no region codes
296 "mr", // 66 langMarathi -Deva;
297 "bn", // 67 langBengali -Beng;
298 "as", // 68 langAssamese -Beng; no region codes
299 "gu", // 69 langGujarati -Gujr;
300 "pa", // 70 langPunjabi -Guru;
301 "or", // 71 langOriya -Orya; no region codes
302 "ml", // 72 langMalayalam -Mlym; no region codes
303 "kn", // 73 langKannada -Knda; no region codes
304 "ta", // 74 langTamil -Taml; no region codes
305 "te", // 75 langTelugu -Telu; no region codes
306 "si", // 76 langSinhalese -Sinh; no region codes
307 "my", // 77 langBurmese -Mymr; no region codes
308 "km", // 78 langKhmer -Khmr; no region codes
309 "lo", // 79 langLao -Laoo; no region codes
310 "vi", // 80 langVietnamese -Latn;
311 "id", // 81 langIndonesian -Latn; no region codes
312 "tl", // 82 langTagalog -Latn; no region codes
313 "ms", // 83 langMalayRoman -Latn; no region codes # "ms"
314 "ms-Arab", // 84 langMalayArabic -Arab; no region codes # "ms"
315 "am", // 85 langAmharic -Ethi; no region codes
316 "ti", // 86 langTigrinya -Ethi; no region codes
317 "om", // 87 langOromo -Ethi; no region codes
318 "so", // 88 langSomali -Latn; no region codes
319 "sw", // 89 langSwahili -Latn; no region codes
320 "rw", // 90 langKinyarwanda -Latn; no region codes
321 "rn", // 91 langRundi -Latn; no region codes
322 "ny", // 92 langNyanja/Chewa -Latn; no region codes # ""
323 "mg", // 93 langMalagasy -Latn; no region codes
324 "eo", // 94 langEsperanto -Latn;
325 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 95 to 105 (gap)
326 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 106 to 116 (gap)
327 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 107 to 117 (gap)
328 "cy", // 128 langWelsh -Latn;
329 "eu", // 129 langBasque -Latn; no region codes
330 "ca", // 130 langCatalan -Latn;
331 "la", // 131 langLatin -Latn; no region codes
332 "qu", // 132 langQuechua -Latn; no region codes
333 "gn", // 133 langGuarani -Latn; no region codes
334 "ay", // 134 langAymara -Latn; no region codes
335 "tt-Cyrl", // 135 langTatar -Cyrl; no region codes
336 "ug", // 136 langUighur -Arab; no region codes
337 "dz", // 137 langDzongkha -Tibt;
338 "jv", // 138 langJavaneseRom -Latn; no region codes
339 "su", // 139 langSundaneseRom -Latn; no region codes
340 "gl", // 140 langGalician -Latn; no region codes
341 "af", // 141 langAfrikaans -Latn;
342 "br", // 142 langBreton -Latn;
343 "iu", // 143 langInuktitut -Cans;
344 "gd", // 144 langScottishGaelic;
345 "gv", // 145 langManxGaelic -Latn;
346 "ga-Latg", // 146 langIrishGaelicScript -Latn-dots; # "ga" // <xx>
347 "to", // 147 langTongan -Latn;
348 "grc", // 148 langGreekAncient -Grek-poly; # "el"
349 "kl", // 149 langGreenlandic -Latn;
350 "az-Latn", // 150 langAzerbaijanRoman -Latn; no region codes # "az"
351 "nn", // 151 langNynorsk -Latn; # (no entry)
352 };
353 enum {
354 kNumLangCodeToLocaleString = sizeof(langCodeToLocaleString)/sizeof(char *)
355 };
356
357 static const KeyStringToResultString oldAppleLocaleToCanonical[] = {
358 // Map obsolete/old-style Apple strings to canonical
359 // Must be sorted according to how strcmp compares the strings in the first column
360 //
361 // non-canonical canonical [ comment ] # source/reason for non-canonical string
362 // string string
363 // ------------- ---------
364 { "Afrikaans", "af" }, // # __CFBundleLanguageNamesArray
365 { "Albanian", "sq" }, // # __CFBundleLanguageNamesArray
366 { "Amharic", "am" }, // # __CFBundleLanguageNamesArray
367 { "Arabic", "ar" }, // # __CFBundleLanguageNamesArray
368 { "Armenian", "hy" }, // # __CFBundleLanguageNamesArray
369 { "Assamese", "as" }, // # __CFBundleLanguageNamesArray
370 { "Aymara", "ay" }, // # __CFBundleLanguageNamesArray
371 { "Azerbaijani", "az" }, // -Arab,-Cyrl,-Latn? # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn")
372 { "Basque", "eu" }, // # __CFBundleLanguageNamesArray
373 { "Belarusian", "be" }, // # handle other names
374 { "Belorussian", "be" }, // # handle other names
375 { "Bengali", "bn" }, // # __CFBundleLanguageNamesArray
376 { "Brazilian Portugese", "pt-BR" }, // # from Installer.app Info.plist IFLanguages key, misspelled
377 { "Brazilian Portuguese", "pt-BR" }, // # correct spelling for above
378 { "Breton", "br" }, // # __CFBundleLanguageNamesArray
379 { "Bulgarian", "bg" }, // # __CFBundleLanguageNamesArray
380 { "Burmese", "my" }, // # __CFBundleLanguageNamesArray
381 { "Byelorussian", "be" }, // # __CFBundleLanguageNamesArray
382 { "Catalan", "ca" }, // # __CFBundleLanguageNamesArray
383 { "Chewa", "ny" }, // # handle other names
384 { "Chichewa", "ny" }, // # handle other names
385 { "Chinese", "zh" }, // -Hans,-Hant? # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans")
386 { "Chinese, Simplified", "zh-Hans" }, // # from Installer.app Info.plist IFLanguages key
387 { "Chinese, Traditional", "zh-Hant" }, // # correct spelling for below
388 { "Chinese, Tradtional", "zh-Hant" }, // # from Installer.app Info.plist IFLanguages key, misspelled
389 { "Croatian", "hr" }, // # __CFBundleLanguageNamesArray
390 { "Czech", "cs" }, // # __CFBundleLanguageNamesArray
391 { "Danish", "da" }, // # __CFBundleLanguageNamesArray
392 { "Dutch", "nl" }, // # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE")
393 { "Dzongkha", "dz" }, // # __CFBundleLanguageNamesArray
394 { "English", "en" }, // # __CFBundleLanguageNamesArray
395 { "Esperanto", "eo" }, // # __CFBundleLanguageNamesArray
396 { "Estonian", "et" }, // # __CFBundleLanguageNamesArray
397 { "Faroese", "fo" }, // # __CFBundleLanguageNamesArray
398 { "Farsi", "fa" }, // # __CFBundleLanguageNamesArray
399 { "Finnish", "fi" }, // # __CFBundleLanguageNamesArray
400 { "Flemish", "nl-BE" }, // # handle other names
401 { "French", "fr" }, // # __CFBundleLanguageNamesArray
402 { "Galician", "gl" }, // # __CFBundleLanguageNamesArray
403 { "Gallegan", "gl" }, // # handle other names
404 { "Georgian", "ka" }, // # __CFBundleLanguageNamesArray
405 { "German", "de" }, // # __CFBundleLanguageNamesArray
406 { "Greek", "el" }, // # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc")
407 { "Greenlandic", "kl" }, // # __CFBundleLanguageNamesArray
408 { "Guarani", "gn" }, // # __CFBundleLanguageNamesArray
409 { "Gujarati", "gu" }, // # __CFBundleLanguageNamesArray
410 { "Hawaiian", "haw" }, // # handle new languages
411 { "Hebrew", "he" }, // # __CFBundleLanguageNamesArray
412 { "Hindi", "hi" }, // # __CFBundleLanguageNamesArray
413 { "Hungarian", "hu" }, // # __CFBundleLanguageNamesArray
414 { "Icelandic", "is" }, // # __CFBundleLanguageNamesArray
415 { "Indonesian", "id" }, // # __CFBundleLanguageNamesArray
416 { "Inuktitut", "iu" }, // # __CFBundleLanguageNamesArray
417 { "Irish", "ga" }, // # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots")
418 { "Italian", "it" }, // # __CFBundleLanguageNamesArray
419 { "Japanese", "ja" }, // # __CFBundleLanguageNamesArray
420 { "Javanese", "jv" }, // # __CFBundleLanguageNamesArray
421 { "Kalaallisut", "kl" }, // # handle other names
422 { "Kannada", "kn" }, // # __CFBundleLanguageNamesArray
423 { "Kashmiri", "ks" }, // # __CFBundleLanguageNamesArray
424 { "Kazakh", "kk" }, // # __CFBundleLanguageNamesArray
425 { "Khmer", "km" }, // # __CFBundleLanguageNamesArray
426 { "Kinyarwanda", "rw" }, // # __CFBundleLanguageNamesArray
427 { "Kirghiz", "ky" }, // # __CFBundleLanguageNamesArray
428 { "Korean", "ko" }, // # __CFBundleLanguageNamesArray
429 { "Kurdish", "ku" }, // # __CFBundleLanguageNamesArray
430 { "Lao", "lo" }, // # __CFBundleLanguageNamesArray
431 { "Latin", "la" }, // # __CFBundleLanguageNamesArray
432 { "Latvian", "lv" }, // # __CFBundleLanguageNamesArray
433 { "Lithuanian", "lt" }, // # __CFBundleLanguageNamesArray
434 { "Macedonian", "mk" }, // # __CFBundleLanguageNamesArray
435 { "Malagasy", "mg" }, // # __CFBundleLanguageNamesArray
436 { "Malay", "ms" }, // -Latn,-Arab? # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab")
437 { "Malayalam", "ml" }, // # __CFBundleLanguageNamesArray
438 { "Maltese", "mt" }, // # __CFBundleLanguageNamesArray
439 { "Manx", "gv" }, // # __CFBundleLanguageNamesArray
440 { "Marathi", "mr" }, // # __CFBundleLanguageNamesArray
441 { "Moldavian", "mo" }, // # __CFBundleLanguageNamesArray
442 { "Mongolian", "mn" }, // -Mong,-Cyrl? # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl")
443 { "Nepali", "ne" }, // # __CFBundleLanguageNamesArray
444 { "Norwegian", "nb" }, // # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no")
445 { "Nyanja", "ny" }, // # __CFBundleLanguageNamesArray
446 { "Nynorsk", "nn" }, // # handle other names (no entry in __CFBundleLanguageNamesArray)
447 { "Oriya", "or" }, // # __CFBundleLanguageNamesArray
448 { "Oromo", "om" }, // # __CFBundleLanguageNamesArray
449 { "Panjabi", "pa" }, // # handle other names
450 { "Pashto", "ps" }, // # __CFBundleLanguageNamesArray
451 { "Persian", "fa" }, // # handle other names
452 { "Polish", "pl" }, // # __CFBundleLanguageNamesArray
453 { "Portuguese", "pt" }, // # __CFBundleLanguageNamesArray
454 { "Portuguese, Brazilian", "pt-BR" }, // # handle other names
455 { "Punjabi", "pa" }, // # __CFBundleLanguageNamesArray
456 { "Pushto", "ps" }, // # handle other names
457 { "Quechua", "qu" }, // # __CFBundleLanguageNamesArray
458 { "Romanian", "ro" }, // # __CFBundleLanguageNamesArray
459 { "Ruanda", "rw" }, // # handle other names
460 { "Rundi", "rn" }, // # __CFBundleLanguageNamesArray
461 { "Russian", "ru" }, // # __CFBundleLanguageNamesArray
462 { "Sami", "se" }, // # __CFBundleLanguageNamesArray
463 { "Sanskrit", "sa" }, // # __CFBundleLanguageNamesArray
464 { "Scottish", "gd" }, // # __CFBundleLanguageNamesArray
465 { "Serbian", "sr" }, // # __CFBundleLanguageNamesArray
466 { "Simplified Chinese", "zh-Hans" }, // # handle other names
467 { "Sindhi", "sd" }, // # __CFBundleLanguageNamesArray
468 { "Sinhalese", "si" }, // # __CFBundleLanguageNamesArray
469 { "Slovak", "sk" }, // # __CFBundleLanguageNamesArray
470 { "Slovenian", "sl" }, // # __CFBundleLanguageNamesArray
471 { "Somali", "so" }, // # __CFBundleLanguageNamesArray
472 { "Spanish", "es" }, // # __CFBundleLanguageNamesArray
473 { "Sundanese", "su" }, // # __CFBundleLanguageNamesArray
474 { "Swahili", "sw" }, // # __CFBundleLanguageNamesArray
475 { "Swedish", "sv" }, // # __CFBundleLanguageNamesArray
476 { "Tagalog", "tl" }, // # __CFBundleLanguageNamesArray
477 { "Tajik", "tg" }, // # handle other names
478 { "Tajiki", "tg" }, // # __CFBundleLanguageNamesArray
479 { "Tamil", "ta" }, // # __CFBundleLanguageNamesArray
480 { "Tatar", "tt" }, // # __CFBundleLanguageNamesArray
481 { "Telugu", "te" }, // # __CFBundleLanguageNamesArray
482 { "Thai", "th" }, // # __CFBundleLanguageNamesArray
483 { "Tibetan", "bo" }, // # __CFBundleLanguageNamesArray
484 { "Tigrinya", "ti" }, // # __CFBundleLanguageNamesArray
485 { "Tongan", "to" }, // # __CFBundleLanguageNamesArray
486 { "Traditional Chinese", "zh-Hant" }, // # handle other names
487 { "Turkish", "tr" }, // # __CFBundleLanguageNamesArray
488 { "Turkmen", "tk" }, // # __CFBundleLanguageNamesArray
489 { "Uighur", "ug" }, // # __CFBundleLanguageNamesArray
490 { "Ukrainian", "uk" }, // # __CFBundleLanguageNamesArray
491 { "Urdu", "ur" }, // # __CFBundleLanguageNamesArray
492 { "Uzbek", "uz" }, // # __CFBundleLanguageNamesArray
493 { "Vietnamese", "vi" }, // # __CFBundleLanguageNamesArray
494 { "Welsh", "cy" }, // # __CFBundleLanguageNamesArray
495 { "Yiddish", "yi" }, // # __CFBundleLanguageNamesArray
496 { "ar_??", "ar" }, // # from old MapScriptInfoAndISOCodes
497 { "az.Ar", "az-Arab" }, // # from old LocaleRefGetPartString
498 { "az.Cy", "az-Cyrl" }, // # from old LocaleRefGetPartString
499 { "az.La", "az-Latn" }, // # from old LocaleRefGetPartString
500 { "be_??", "be_BY" }, // # from old MapScriptInfoAndISOCodes
501 { "bn_??", "bn" }, // # from old LocaleRefGetPartString
502 { "bo_??", "bo" }, // # from old MapScriptInfoAndISOCodes
503 { "br_??", "br" }, // # from old MapScriptInfoAndISOCodes
504 { "cy_??", "cy" }, // # from old MapScriptInfoAndISOCodes
505 { "de-96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9>
506 { "de_96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9>
507 { "de_??", "de-1996" }, // # from old MapScriptInfoAndISOCodes
508 { "el.El-P", "grc" }, // # from old LocaleRefGetPartString
509 { "en-ascii", "en_001" }, // # from earlier version of tables in this file!
510 { "en_??", "en_001" }, // # from old MapScriptInfoAndISOCodes
511 { "eo_??", "eo" }, // # from old MapScriptInfoAndISOCodes
512 { "es_??", "es_419" }, // # from old MapScriptInfoAndISOCodes
513 { "es_XL", "es_419" }, // # from earlier version of tables in this file!
514 { "fr_??", "fr_001" }, // # from old MapScriptInfoAndISOCodes
515 { "ga-dots", "ga-Latg" }, // # from earlier version of tables in this file! // <1.8>
516 { "ga-dots_IE", "ga-Latg_IE" }, // # from earlier version of tables in this file! // <1.8>
517 { "ga.Lg", "ga-Latg" }, // # from old LocaleRefGetPartString // <1.8>
518 { "ga.Lg_IE", "ga-Latg_IE" }, // # from old LocaleRefGetPartString // <1.8>
519 { "gd_??", "gd" }, // # from old MapScriptInfoAndISOCodes
520 { "gv_??", "gv" }, // # from old MapScriptInfoAndISOCodes
521 { "jv.La", "jv" }, // # logical extension // <1.9>
522 { "jw.La", "jv" }, // # from old LocaleRefGetPartString
523 { "kk.Cy", "kk" }, // # from old LocaleRefGetPartString
524 { "kl.La", "kl" }, // # from old LocaleRefGetPartString
525 { "kl.La_GL", "kl_GL" }, // # from old LocaleRefGetPartString // <1.9>
526 { "lp_??", "se" }, // # from old MapScriptInfoAndISOCodes
527 { "mk_??", "mk_MK" }, // # from old MapScriptInfoAndISOCodes
528 { "mn.Cy", "mn-Cyrl" }, // # from old LocaleRefGetPartString
529 { "mn.Mn", "mn-Mong" }, // # from old LocaleRefGetPartString
530 { "ms.Ar", "ms-Arab" }, // # from old LocaleRefGetPartString
531 { "ms.La", "ms" }, // # from old LocaleRefGetPartString
532 { "nl-be", "nl-BE" }, // # from old LocaleRefGetPartString
533 { "nl-be_BE", "nl_BE" }, // # from old LocaleRefGetPartString
534 { "no-NO", "nb-NO" }, // # not handled by localeStringPrefixToCanonical
535 { "no-NO_NO", "nb-NO_NO" }, // # not handled by localeStringPrefixToCanonical
536 // { "no-bok_NO", "nb_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
537 // { "no-nyn_NO", "nn_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
538 // { "nya", "ny" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
539 { "pa_??", "pa" }, // # from old LocaleRefGetPartString
540 { "sa.Dv", "sa" }, // # from old LocaleRefGetPartString
541 { "sl_??", "sl_SI" }, // # from old MapScriptInfoAndISOCodes
542 { "sr_??", "sr_RS" }, // # from old MapScriptInfoAndISOCodes // <1.18>
543 { "su.La", "su" }, // # from old LocaleRefGetPartString
544 { "yi.He", "yi" }, // # from old LocaleRefGetPartString
545 { "zh-simp", "zh-Hans" }, // # from earlier version of tables in this file!
546 { "zh-trad", "zh-Hant" }, // # from earlier version of tables in this file!
547 { "zh.Ha-S", "zh-Hans" }, // # from old LocaleRefGetPartString
548 { "zh.Ha-S_CN", "zh_CN" }, // # from old LocaleRefGetPartString
549 { "zh.Ha-T", "zh-Hant" }, // # from old LocaleRefGetPartString
550 { "zh.Ha-T_TW", "zh_TW" }, // # from old LocaleRefGetPartString
551 };
552 enum {
553 kNumOldAppleLocaleToCanonical = sizeof(oldAppleLocaleToCanonical)/sizeof(KeyStringToResultString)
554 };
555
556 static const KeyStringToResultString localeStringPrefixToCanonical[] = {
557 // Map 3-letter & obsolete ISO 639 codes, plus obsolete RFC 3066 codes, to 2-letter ISO 639 code.
558 // (special cases for 'sh' handled separately)
559 // First column must be all lowercase; must be sorted according to how strcmp compares the strings in the first column.
560 //
561 // non-canonical canonical [ comment ] # source/reason for non-canonical string
562 // prefix prefix
563 // ------------- ---------
564
565 { "afr", "af" }, // Afrikaans
566 { "alb", "sq" }, // Albanian
567 { "amh", "am" }, // Amharic
568 { "ara", "ar" }, // Arabic
569 { "arm", "hy" }, // Armenian
570 { "asm", "as" }, // Assamese
571 { "aym", "ay" }, // Aymara
572 { "aze", "az" }, // Azerbaijani
573 { "baq", "eu" }, // Basque
574 { "bel", "be" }, // Belarusian
575 { "ben", "bn" }, // Bengali
576 { "bih", "bh" }, // Bihari
577 { "bod", "bo" }, // Tibetan
578 { "bos", "bs" }, // Bosnian
579 { "bre", "br" }, // Breton
580 { "bul", "bg" }, // Bulgarian
581 { "bur", "my" }, // Burmese
582 { "cat", "ca" }, // Catalan
583 { "ces", "cs" }, // Czech
584 { "che", "ce" }, // Chechen
585 { "chi", "zh" }, // Chinese
586 { "cor", "kw" }, // Cornish
587 { "cos", "co" }, // Corsican
588 { "cym", "cy" }, // Welsh
589 { "cze", "cs" }, // Czech
590 { "dan", "da" }, // Danish
591 { "deu", "de" }, // German
592 { "dut", "nl" }, // Dutch
593 { "dzo", "dz" }, // Dzongkha
594 { "ell", "el" }, // Greek, Modern (1453-)
595 { "eng", "en" }, // English
596 { "epo", "eo" }, // Esperanto
597 { "est", "et" }, // Estonian
598 { "eus", "eu" }, // Basque
599 { "fao", "fo" }, // Faroese
600 { "fas", "fa" }, // Persian
601 { "fin", "fi" }, // Finnish
602 { "fra", "fr" }, // French
603 { "fre", "fr" }, // French
604 { "geo", "ka" }, // Georgian
605 { "ger", "de" }, // German
606 { "gla", "gd" }, // Gaelic,Scottish
607 { "gle", "ga" }, // Irish
608 { "glg", "gl" }, // Gallegan
609 { "glv", "gv" }, // Manx
610 { "gre", "el" }, // Greek, Modern (1453-)
611 { "grn", "gn" }, // Guarani
612 { "guj", "gu" }, // Gujarati
613 { "heb", "he" }, // Hebrew
614 { "hin", "hi" }, // Hindi
615 { "hrv", "hr" }, // Croatian
616 { "hun", "hu" }, // Hungarian
617 { "hye", "hy" }, // Armenian
618 { "i-hak", "zh-hakka" }, // Hakka # deprecated RFC 3066
619 { "i-lux", "lb" }, // Luxembourgish # deprecated RFC 3066
620 { "i-navajo", "nv" }, // Navajo # deprecated RFC 3066
621 { "ice", "is" }, // Icelandic
622 { "iku", "iu" }, // Inuktitut
623 { "ile", "ie" }, // Interlingue
624 { "in", "id" }, // Indonesian # deprecated 639 code in -> id (1989)
625 { "ina", "ia" }, // Interlingua
626 { "ind", "id" }, // Indonesian
627 { "isl", "is" }, // Icelandic
628 { "ita", "it" }, // Italian
629 { "iw", "he" }, // Hebrew # deprecated 639 code iw -> he (1989)
630 { "jav", "jv" }, // Javanese
631 { "jaw", "jv" }, // Javanese # deprecated 639 code jaw -> jv (2001)
632 { "ji", "yi" }, // Yiddish # deprecated 639 code ji -> yi (1989)
633 { "jpn", "ja" }, // Japanese
634 { "kal", "kl" }, // Kalaallisut
635 { "kan", "kn" }, // Kannada
636 { "kas", "ks" }, // Kashmiri
637 { "kat", "ka" }, // Georgian
638 { "kaz", "kk" }, // Kazakh
639 { "khm", "km" }, // Khmer
640 { "kin", "rw" }, // Kinyarwanda
641 { "kir", "ky" }, // Kirghiz
642 { "kor", "ko" }, // Korean
643 { "kur", "ku" }, // Kurdish
644 { "lao", "lo" }, // Lao
645 { "lat", "la" }, // Latin
646 { "lav", "lv" }, // Latvian
647 { "lit", "lt" }, // Lithuanian
648 { "ltz", "lb" }, // Letzeburgesch
649 { "mac", "mk" }, // Macedonian
650 { "mal", "ml" }, // Malayalam
651 { "mar", "mr" }, // Marathi
652 { "may", "ms" }, // Malay
653 { "mkd", "mk" }, // Macedonian
654 { "mlg", "mg" }, // Malagasy
655 { "mlt", "mt" }, // Maltese
656 { "mol", "mo" }, // Moldavian
657 { "mon", "mn" }, // Mongolian
658 { "msa", "ms" }, // Malay
659 { "mya", "my" }, // Burmese
660 { "nep", "ne" }, // Nepali
661 { "nld", "nl" }, // Dutch
662 { "nno", "nn" }, // Norwegian Nynorsk
663 { "no", "nb" }, // Norwegian generic # ambiguous 639 code no -> nb
664 { "no-bok", "nb" }, // Norwegian Bokmal # deprecated RFC 3066 tag - used in old LocaleRefGetPartString
665 { "no-nyn", "nn" }, // Norwegian Nynorsk # deprecated RFC 3066 tag - used in old LocaleRefGetPartString
666 { "nob", "nb" }, // Norwegian Bokmal
667 { "nor", "nb" }, // Norwegian generic # ambiguous 639 code nor -> nb
668 { "nya", "ny" }, // Nyanja/Chewa/Chichewa # 3-letter code used in old LocaleRefGetPartString
669 { "oci", "oc" }, // Occitan/Provencal
670 { "ori", "or" }, // Oriya
671 { "orm", "om" }, // Oromo,Galla
672 { "pan", "pa" }, // Panjabi
673 { "per", "fa" }, // Persian
674 { "pol", "pl" }, // Polish
675 { "por", "pt" }, // Portuguese
676 { "pus", "ps" }, // Pushto
677 { "que", "qu" }, // Quechua
678 { "roh", "rm" }, // Raeto-Romance
679 { "ron", "ro" }, // Romanian
680 { "rum", "ro" }, // Romanian
681 { "run", "rn" }, // Rundi
682 { "rus", "ru" }, // Russian
683 { "san", "sa" }, // Sanskrit
684 { "scc", "sr" }, // Serbian
685 { "scr", "hr" }, // Croatian
686 { "sin", "si" }, // Sinhalese
687 { "slk", "sk" }, // Slovak
688 { "slo", "sk" }, // Slovak
689 { "slv", "sl" }, // Slovenian
690 { "sme", "se" }, // Sami,Northern
691 { "snd", "sd" }, // Sindhi
692 { "som", "so" }, // Somali
693 { "spa", "es" }, // Spanish
694 { "sqi", "sq" }, // Albanian
695 { "srp", "sr" }, // Serbian
696 { "sun", "su" }, // Sundanese
697 { "swa", "sw" }, // Swahili
698 { "swe", "sv" }, // Swedish
699 { "tam", "ta" }, // Tamil
700 { "tat", "tt" }, // Tatar
701 { "tel", "te" }, // Telugu
702 { "tgk", "tg" }, // Tajik
703 { "tgl", "tl" }, // Tagalog
704 { "tha", "th" }, // Thai
705 { "tib", "bo" }, // Tibetan
706 { "tir", "ti" }, // Tigrinya
707 { "ton", "to" }, // Tongan
708 { "tuk", "tk" }, // Turkmen
709 { "tur", "tr" }, // Turkish
710 { "uig", "ug" }, // Uighur
711 { "ukr", "uk" }, // Ukrainian
712 { "urd", "ur" }, // Urdu
713 { "uzb", "uz" }, // Uzbek
714 { "vie", "vi" }, // Vietnamese
715 { "wel", "cy" }, // Welsh
716 { "yid", "yi" }, // Yiddish
717 { "zho", "zh" }, // Chinese
718 };
719 enum {
720 kNumLocaleStringPrefixToCanonical = sizeof(localeStringPrefixToCanonical)/sizeof(KeyStringToResultString)
721 };
722
723
724 static const SpecialCaseUpdates specialCases[] = {
725 // Data for special cases
726 // a) The 3166 code CS was used for Czechoslovakia until 1993, when that country split and the code was
727 // replaced by CZ and SK. Then in 2003-07, the code YU (formerly designating all of Yugoslavia, then after
728 // the 1990s breakup just designating what is now Serbia and Montenegro) was changed to CS! Then after
729 // Serbia and Montenegro split, the code CS was replaced in 2006-09 with separate codes RS and ME. If we
730 // see CS but a language of cs or sk, we change CS to CZ or SK. Otherwise, we change CS (and old YU) to RS.
731 // b) The 639 code sh for Serbo-Croatian was also replaced in the 1990s by separate codes hr and sr, and
732 // deprecated in 2000. We guess which one to map it to as follows: If there is a region tag of HR we use
733 // hr; if there is a region tag of (now) RS we use sr; else we do not change it (not enough info).
734 // c) There are other codes that have been updated without these issues (eg. TP to TL), plus among the
735 // "exceptionally reserved" codes some are just alternates for standard codes (eg. UK for GB).
736 { NULL, "-UK", "GB", NULL, NULL }, // always change UK to GB (UK is "exceptionally reserved" to mean GB)
737 { NULL, "-TP", "TL", NULL, NULL }, // always change TP to TL (East Timor, code changed 2002-05)
738 { "cs", "-CS", "CZ", NULL, NULL }, // if language is cs, change CS (pre-1993 Czechoslovakia) to CZ (Czech Republic)
739 { "sk", "-CS", "SK", NULL, NULL }, // if language is sk, change CS (pre-1993 Czechoslovakia) to SK (Slovakia)
740 { NULL, "-CS", "RS", NULL, NULL }, // otherwise map CS (assume Serbia+Montenegro) to RS (Serbia)
741 { NULL, "-YU", "RS", NULL, NULL }, // also map old YU (assume Serbia+Montenegro) to RS (Serbia)
742 { "sh", "-HR", "hr", "-RS", "sr" }, // then if language is old 'sh' (SerboCroatian), change it to 'hr' (Croatian)
743 // if we find HR (Croatia) or to 'sr' (Serbian) if we find RS (Serbia).
744 // Note: Do this after changing YU/CS toRS as above.
745 { NULL, NULL, NULL, NULL, NULL } // terminator
746 };
747
748
749 static const KeyStringToResultString localeStringRegionToDefaults[] = {
750 // For some region-code suffixes, there are default substrings to strip off for canonical string.
751 // Must be sorted according to how strcmp compares the strings in the first column
752 //
753 // region default writing
754 // suffix system tags, strip comment
755 // -------- ------------- ---------
756 { "_CN", "-Hans" }, // mainland China, default is simplified
757 { "_HK", "-Hant" }, // Hong Kong, default is traditional
758 { "_MO", "-Hant" }, // Macao, default is traditional
759 { "_SG", "-Hans" }, // Singapore, default is simplified
760 { "_TW", "-Hant" }, // Taiwan, default is traditional
761 };
762 enum {
763 kNumLocaleStringRegionToDefaults = sizeof(localeStringRegionToDefaults)/sizeof(KeyStringToResultString)
764 };
765
766 static const KeyStringToResultString localeStringPrefixToDefaults[] = {
767 // For some initial portions of language tag, there are default substrings to strip off for canonical string.
768 // Must be sorted according to how strcmp compares the strings in the first column
769 //
770 // language default writing
771 // tag prefix system tags, strip comment
772 // -------- ------------- ---------
773 { "ab-", "-Cyrl" }, // Abkhazian
774 { "af-", "-Latn" }, // Afrikaans
775 { "am-", "-Ethi" }, // Amharic
776 { "ar-", "-Arab" }, // Arabic
777 { "as-", "-Beng" }, // Assamese
778 { "ay-", "-Latn" }, // Aymara
779 { "be-", "-Cyrl" }, // Belarusian
780 { "bg-", "-Cyrl" }, // Bulgarian
781 { "bn-", "-Beng" }, // Bengali
782 { "bo-", "-Tibt" }, // Tibetan (? not Suppress-Script)
783 { "br-", "-Latn" }, // Breton (? not Suppress-Script)
784 { "bs-", "-Latn" }, // Bosnian
785 { "ca-", "-Latn" }, // Catalan
786 { "cs-", "-Latn" }, // Czech
787 { "cy-", "-Latn" }, // Welsh
788 { "da-", "-Latn" }, // Danish
789 { "de-", "-Latn -1901" }, // German, traditional orthography
790 { "dv-", "-Thaa" }, // Divehi/Maldivian
791 { "dz-", "-Tibt" }, // Dzongkha
792 { "el-", "-Grek" }, // Greek (modern, monotonic)
793 { "en-", "-Latn" }, // English
794 { "eo-", "-Latn" }, // Esperanto
795 { "es-", "-Latn" }, // Spanish
796 { "et-", "-Latn" }, // Estonian
797 { "eu-", "-Latn" }, // Basque
798 { "fa-", "-Arab" }, // Farsi
799 { "fi-", "-Latn" }, // Finnish
800 { "fo-", "-Latn" }, // Faroese
801 { "fr-", "-Latn" }, // French
802 { "ga-", "-Latn" }, // Irish
803 { "gd-", "-Latn" }, // Scottish Gaelic (? not Suppress-Script)
804 { "gl-", "-Latn" }, // Galician
805 { "gn-", "-Latn" }, // Guarani
806 { "gu-", "-Gujr" }, // Gujarati
807 { "gv-", "-Latn" }, // Manx
808 { "haw-", "-Latn" }, // Hawaiian (? not Suppress-Script)
809 { "he-", "-Hebr" }, // Hebrew
810 { "hi-", "-Deva" }, // Hindi
811 { "hr-", "-Latn" }, // Croatian
812 { "hu-", "-Latn" }, // Hungarian
813 { "hy-", "-Armn" }, // Armenian
814 { "id-", "-Latn" }, // Indonesian
815 { "is-", "-Latn" }, // Icelandic
816 { "it-", "-Latn" }, // Italian
817 { "ja-", "-Jpan" }, // Japanese
818 { "ka-", "-Geor" }, // Georgian
819 { "kk-", "-Cyrl" }, // Kazakh
820 { "kl-", "-Latn" }, // Kalaallisut/Greenlandic
821 { "km-", "-Khmr" }, // Central Khmer
822 { "kn-", "-Knda" }, // Kannada
823 { "ko-", "-Hang" }, // Korean (? not Suppress-Script)
824 { "kok-", "-Deva" }, // Konkani
825 { "la-", "-Latn" }, // Latin
826 { "lb-", "-Latn" }, // Luxembourgish
827 { "lo-", "-Laoo" }, // Lao
828 { "lt-", "-Latn" }, // Lithuanian
829 { "lv-", "-Latn" }, // Latvian
830 { "mg-", "-Latn" }, // Malagasy
831 { "mk-", "-Cyrl" }, // Macedonian
832 { "ml-", "-Mlym" }, // Malayalam
833 { "mo-", "-Latn" }, // Moldavian
834 { "mr-", "-Deva" }, // Marathi
835 { "ms-", "-Latn" }, // Malay
836 { "mt-", "-Latn" }, // Maltese
837 { "my-", "-Mymr" }, // Burmese/Myanmar
838 { "nb-", "-Latn" }, // Norwegian Bokmal
839 { "ne-", "-Deva" }, // Nepali
840 { "nl-", "-Latn" }, // Dutch
841 { "nn-", "-Latn" }, // Norwegian Nynorsk
842 { "ny-", "-Latn" }, // Chichewa/Nyanja
843 { "om-", "-Latn" }, // Oromo
844 { "or-", "-Orya" }, // Oriya
845 { "pa-", "-Guru" }, // Punjabi
846 { "pl-", "-Latn" }, // Polish
847 { "ps-", "-Arab" }, // Pushto
848 { "pt-", "-Latn" }, // Portuguese
849 { "qu-", "-Latn" }, // Quechua
850 { "rn-", "-Latn" }, // Rundi
851 { "ro-", "-Latn" }, // Romanian
852 { "ru-", "-Cyrl" }, // Russian
853 { "rw-", "-Latn" }, // Kinyarwanda
854 { "sa-", "-Deva" }, // Sanskrit (? not Suppress-Script)
855 { "se-", "-Latn" }, // Sami (? not Suppress-Script)
856 { "si-", "-Sinh" }, // Sinhala
857 { "sk-", "-Latn" }, // Slovak
858 { "sl-", "-Latn" }, // Slovenian
859 { "so-", "-Latn" }, // Somali
860 { "sq-", "-Latn" }, // Albanian
861 { "sv-", "-Latn" }, // Swedish
862 { "sw-", "-Latn" }, // Swahili
863 { "ta-", "-Taml" }, // Tamil
864 { "te-", "-Telu" }, // Telugu
865 { "th-", "-Thai" }, // Thai
866 { "ti-", "-Ethi" }, // Tigrinya
867 { "tl-", "-Latn" }, // Tagalog
868 { "tn-", "-Latn" }, // Tswana
869 { "to-", "-Latn" }, // Tonga of Tonga Islands
870 { "tr-", "-Latn" }, // Turkish
871 { "uk-", "-Cyrl" }, // Ukrainian
872 { "ur-", "-Arab" }, // Urdu
873 { "vi-", "-Latn" }, // Vietnamese
874 { "wo-", "-Latn" }, // Wolof
875 { "xh-", "-Latn" }, // Xhosa
876 { "yi-", "-Hebr" }, // Yiddish
877 { "zh-", "-Hani" }, // Chinese (? not Suppress-Script)
878 { "zu-", "-Latn" }, // Zulu
879 };
880 enum {
881 kNumLocaleStringPrefixToDefaults = sizeof(localeStringPrefixToDefaults)/sizeof(KeyStringToResultString)
882 };
883
884 static const KeyStringToResultString appleLocaleToLanguageString[] = {
885 // Map locale strings that Apple uses as language IDs to real language strings.
886 // Must be sorted according to how strcmp compares the strings in the first column.
887 // Note: Now we remove all transforms of the form ll_RR -> ll-RR, they are now
888 // handled in the code. <1.19>
889 //
890 // locale lang [ comment ]
891 // string string
892 // ------- -------
893 { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752]
894 { "zh_CN", "zh-Hans" }, // mainland China => simplified
895 { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used
896 { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used
897 { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used
898 { "zh_TW", "zh-Hant" }, // Taiwan => traditional
899 };
900 enum {
901 kNumAppleLocaleToLanguageString = sizeof(appleLocaleToLanguageString)/sizeof(KeyStringToResultString)
902 };
903
904 static const KeyStringToResultString appleLocaleToLanguageStringForCFBundle[] = {
905 // Map locale strings that Apple uses as language IDs to real language strings.
906 // Must be sorted according to how strcmp compares the strings in the first column.
907 //
908 // locale lang [ comment ]
909 // string string
910 // ------- -------
911 { "de_AT", "de-AT" }, // Austrian German
912 { "de_CH", "de-CH" }, // Swiss German
913 // { "de_DE", "de-DE" }, // German for Germany (default), not currently used
914 { "en_AU", "en-AU" }, // Australian English
915 { "en_CA", "en-CA" }, // Canadian English
916 { "en_GB", "en-GB" }, // British English
917 // { "en_IE", "en-IE" }, // Irish English, not currently used
918 { "en_US", "en-US" }, // U.S. English
919 { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752]
920 // { "fr_BE", "fr-BE" }, // Belgian French, not currently used
921 { "fr_CA", "fr-CA" }, // Canadian French
922 { "fr_CH", "fr-CH" }, // Swiss French
923 // { "fr_FR", "fr-FR" }, // French for France (default), not currently used
924 { "nl_BE", "nl-BE" }, // Flemish = Vlaams, Dutch for Belgium
925 // { "nl_NL", "nl-NL" }, // Dutch for Netherlands (default), not currently used
926 { "pt_BR", "pt-BR" }, // Brazilian Portuguese
927 { "pt_PT", "pt-PT" }, // Portuguese for Portugal
928 { "zh_CN", "zh-Hans" }, // mainland China => simplified
929 { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used
930 { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used
931 { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used
932 { "zh_TW", "zh-Hant" }, // Taiwan => traditional
933 };
934 enum {
935 kNumAppleLocaleToLanguageStringForCFBundle = sizeof(appleLocaleToLanguageStringForCFBundle)/sizeof(KeyStringToResultString)
936 };
937
938
939 struct LocaleToLegacyCodes {
940 const char * locale; // reduced to language plus one other component (script, region, variant), separators normalized to'_'
941 RegionCode regCode;
942 LangCode langCode;
943 CFStringEncoding encoding;
944 };
945 typedef struct LocaleToLegacyCodes LocaleToLegacyCodes;
946
947 static const LocaleToLegacyCodes localeToLegacyCodes[] = {
948 // locale RegionCode LangCode CFStringEncoding
949 { "af"/*ZA*/, 102/*verAfrikaans*/, 141/*langAfrikaans*/, 0/*Roman*/ }, // Latn
950 { "am", -1, 85/*langAmharic*/, 28/*Ethiopic*/ }, // Ethi
951 { "ar", 16/*verArabic*/, 12/*langArabic*/, 4/*Arabic*/ }, // Arab;
952 { "as", -1, 68/*langAssamese*/, 13/*Bengali*/ }, // Beng;
953 { "ay", -1, 134/*langAymara*/, 0/*Roman*/ }, // Latn;
954 { "az", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // assume "az" defaults to -Cyrl
955 { "az_Arab", -1, 50/*langAzerbaijanAr*/, 4/*Arabic*/ }, // Arab;
956 { "az_Cyrl", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // Cyrl;
957 { "az_Latn", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // Latn;
958 { "be"/*BY*/, 61/*verBelarus*/, 46/*langBelorussian*/, 7/*Cyrillic*/ }, // Cyrl;
959 { "bg"/*BG*/, 72/*verBulgaria*/, 44/*langBulgarian*/, 7/*Cyrillic*/ }, // Cyrl;
960 { "bn", 60/*verBengali*/, 67/*langBengali*/, 13/*Bengali*/ }, // Beng;
961 { "bo", 105/*verTibetan*/, 63/*langTibetan*/, 26/*Tibetan*/ }, // Tibt;
962 { "br", 77/*verBreton*/, 142/*langBreton*/, 39/*Celtic*/ }, // Latn;
963 { "ca"/*ES*/, 73/*verCatalonia*/, 130/*langCatalan*/, 0/*Roman*/ }, // Latn;
964 { "cs"/*CZ*/, 56/*verCzech*/, 38/*langCzech*/, 29/*CentralEurRoman*/ }, // Latn;
965 { "cy", 79/*verWelsh*/, 128/*langWelsh*/, 39/*Celtic*/ }, // Latn;
966 { "da"/*DK*/, 9/*verDenmark*/, 7/*langDanish*/, 0/*Roman*/ }, // Latn;
967 { "de", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, // assume "de" defaults to verGermany
968 { "de_1996", 70/*verGermanReformed*/, 2/*langGerman*/, 0/*Roman*/ },
969 { "de_AT", 92/*verAustria*/, 2/*langGerman*/, 0/*Roman*/ },
970 { "de_CH", 19/*verGrSwiss*/, 2/*langGerman*/, 0/*Roman*/ },
971 { "de_DE", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ },
972 { "dz"/*BT*/, 83/*verBhutan*/, 137/*langDzongkha*/, 26/*Tibetan*/ }, // Tibt;
973 { "el", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // assume "el" defaults to verGreece
974 { "el_CY", 23/*verCyprus*/, 14/*langGreek*/, 6/*Greek*/ },
975 { "el_GR", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // modern monotonic
976 { "en", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, // "en" defaults to verUS (per Chris Hansten)
977 { "en_001", 37/*verInternational*/, 0/*langEnglish*/, 0/*Roman*/ },
978 { "en_AU", 15/*verAustralia*/, 0/*langEnglish*/, 0/*Roman*/ },
979 { "en_CA", 82/*verEngCanada*/, 0/*langEnglish*/, 0/*Roman*/ },
980 { "en_GB", 2/*verBritain*/, 0/*langEnglish*/, 0/*Roman*/ },
981 { "en_IE", 108/*verIrelandEnglish*/, 0/*langEnglish*/, 0/*Roman*/ },
982 { "en_SG", 100/*verSingapore*/, 0/*langEnglish*/, 0/*Roman*/ },
983 { "en_US", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ },
984 { "eo", 103/*verEsperanto*/, 94/*langEsperanto*/, 0/*Roman*/ }, // Latn;
985 { "es", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, // "es" defaults to verSpain (per Chris Hansten)
986 { "es_419", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, // new BCP 47 tag
987 { "es_ES", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ },
988 { "es_MX", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ },
989 { "es_US", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ },
990 { "et"/*EE*/, 44/*verEstonia*/, 27/*langEstonian*/, 29/*CentralEurRoman*/ },
991 { "eu", -1, 129/*langBasque*/, 0/*Roman*/ }, // Latn;
992 { "fa"/*IR*/, 48/*verIran*/, 31/*langFarsi/Persian*/, 0x8C/*Farsi*/ }, // Arab;
993 { "fi"/*FI*/, 17/*verFinland*/, 13/*langFinnish*/, 0/*Roman*/ },
994 { "fo"/*FO*/, 47/*verFaroeIsl*/, 30/*langFaroese*/, 37/*Icelandic*/ },
995 { "fr", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, // "fr" defaults to verFrance (per Chris Hansten)
996 { "fr_001", 91/*verFrenchUniversal*/, 1/*langFrench*/, 0/*Roman*/ },
997 { "fr_BE", 98/*verFrBelgium*/, 1/*langFrench*/, 0/*Roman*/ },
998 { "fr_CA", 11/*verFrCanada*/, 1/*langFrench*/, 0/*Roman*/ },
999 { "fr_CH", 18/*verFrSwiss*/, 1/*langFrench*/, 0/*Roman*/ },
1000 { "fr_FR", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ },
1001 { "ga"/*IE*/, 50/*verIreland*/, 35/*langIrishGaelic*/, 0/*Roman*/ }, // no dots (h after)
1002 { "ga_Latg"/*IE*/, 81/*verIrishGaelicScrip*/, 146/*langIrishGaelicScript*/, 40/*Gaelic*/ }, // using dots
1003 { "gd", 75/*verScottishGaelic*/, 144/*langScottishGaelic*/, 39/*Celtic*/ },
1004 { "gl", -1, 140/*langGalician*/, 0/*Roman*/ }, // Latn;
1005 { "gn", -1, 133/*langGuarani*/, 0/*Roman*/ }, // Latn;
1006 { "grc", 40/*verGreekAncient*/, 148/*langGreekAncient*/, 6/*Greek*/ }, // polytonic (MacGreek doesn't actually support it)
1007 { "gu"/*IN*/, 94/*verGujarati*/, 69/*langGujarati*/, 11/*Gujarati*/ }, // Gujr;
1008 { "gv", 76/*verManxGaelic*/, 145/*langManxGaelic*/, 39/*Celtic*/ }, // Latn;
1009 { "he"/*IL*/, 13/*verIsrael*/, 10/*langHebrew*/, 5/*Hebrew*/ }, // Hebr;
1010 { "hi"/*IN*/, 33/*verIndiaHindi*/, 21/*langHindi*/, 9/*Devanagari*/ }, // Deva;
1011 { "hr"/*HR*/, 68/*verCroatia*/, 18/*langCroatian*/, 36/*Croatian*/ },
1012 { "hu"/*HU*/, 43/*verHungary*/, 26/*langHungarian*/, 29/*CentralEurRoman*/ },
1013 { "hy"/*AM*/, 84/*verArmenian*/, 51/*langArmenian*/, 24/*Armenian*/ }, // Armn;
1014 { "id", -1, 81/*langIndonesian*/, 0/*Roman*/ }, // Latn;
1015 { "is"/*IS*/, 21/*verIceland*/, 15/*langIcelandic*/, 37/*Icelandic*/ },
1016 { "it", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, // "it" defaults to verItaly
1017 { "it_CH", 36/*verItalianSwiss*/, 3/*langItalian*/, 0/*Roman*/ },
1018 { "it_IT", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ },
1019 { "iu"/*CA*/, 78/*verNunavut*/, 143/*langInuktitut*/, 0xEC/*Inuit*/ }, // Cans;
1020 { "ja"/*JP*/, 14/*verJapan*/, 11/*langJapanese*/, 1/*Japanese*/ }, // Jpan;
1021 { "jv", -1, 138/*langJavaneseRom*/, 0/*Roman*/ }, // Latn;
1022 { "ka"/*GE*/, 85/*verGeorgian*/, 52/*langGeorgian*/, 23/*Georgian*/ }, // Geor;
1023 { "kk", -1, 48/*langKazakh*/, 7/*Cyrillic*/ }, // "kk" defaults to -Cyrl; also have -Latn, -Arab
1024 { "kl", 107/*verGreenland*/, 149/*langGreenlandic*/, 0/*Roman*/ }, // Latn;
1025 { "km", -1, 78/*langKhmer*/, 20/*Khmer*/ }, // Khmr;
1026 { "kn", -1, 73/*langKannada*/, 16/*Kannada*/ }, // Knda;
1027 { "ko"/*KR*/, 51/*verKorea*/, 23/*langKorean*/, 3/*Korean*/ }, // Hang;
1028 { "ks", -1, 61/*langKashmiri*/, 4/*Arabic*/ }, // Arab;
1029 { "ku", -1, 60/*langKurdish*/, 4/*Arabic*/ }, // Arab;
1030 { "ky", -1, 54/*langKirghiz*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1031 { "la", -1, 131/*langLatin*/, 0/*Roman*/ }, // Latn;
1032 { "lo", -1, 79/*langLao*/, 22/*Laotian*/ }, // Laoo;
1033 { "lt"/*LT*/, 41/*verLithuania*/, 24/*langLithuanian*/, 29/*CentralEurRoman*/ },
1034 { "lv"/*LV*/, 45/*verLatvia*/, 28/*langLatvian*/, 29/*CentralEurRoman*/ },
1035 { "mg", -1, 93/*langMalagasy*/, 0/*Roman*/ }, // Latn;
1036 { "mk"/*MK*/, 67/*verMacedonian*/, 43/*langMacedonian*/, 7/*Cyrillic*/ }, // Cyrl;
1037 { "ml", -1, 72/*langMalayalam*/, 17/*Malayalam*/ }, // Mlym;
1038 { "mn", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // "mn" defaults to -Mong
1039 { "mn_Cyrl", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // Cyrl;
1040 { "mn_Mong", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // Mong;
1041 { "mo", -1, 53/*langMoldavian*/, 7/*Cyrillic*/ }, // Cyrl;
1042 { "mr"/*IN*/, 104/*verMarathi*/, 66/*langMarathi*/, 9/*Devanagari*/ }, // Deva;
1043 { "ms", -1, 83/*langMalayRoman*/, 0/*Roman*/ }, // "ms" defaults to -Latn;
1044 { "ms_Arab", -1, 84/*langMalayArabic*/, 4/*Arabic*/ }, // Arab;
1045 { "mt"/*MT*/, 22/*verMalta*/, 16/*langMaltese*/, 0/*Roman*/ }, // Latn;
1046 { "mul", 74/*verMultilingual*/, -1, 0 },
1047 { "my", -1, 77/*langBurmese*/, 19/*Burmese*/ }, // Mymr;
1048 { "nb"/*NO*/, 12/*verNorway*/, 9/*langNorwegian*/, 0/*Roman*/ },
1049 { "ne"/*NP*/, 106/*verNepal*/, 64/*langNepali*/, 9/*Devanagari*/ }, // Deva;
1050 { "nl", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, // "nl" defaults to verNetherlands
1051 { "nl_BE", 6/*verFlemish*/, 34/*langFlemish*/, 0/*Roman*/ },
1052 { "nl_NL", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ },
1053 { "nn"/*NO*/, 101/*verNynorsk*/, 151/*langNynorsk*/, 0/*Roman*/ },
1054 { "ny", -1, 92/*langNyanja/Chewa*/, 0/*Roman*/ }, // Latn;
1055 { "om", -1, 87/*langOromo*/, 28/*Ethiopic*/ }, // Ethi;
1056 { "or", -1, 71/*langOriya*/, 12/*Oriya*/ }, // Orya;
1057 { "pa", 95/*verPunjabi*/, 70/*langPunjabi*/, 10/*Gurmukhi*/ }, // Guru;
1058 { "pl"/*PL*/, 42/*verPoland*/, 25/*langPolish*/, 29/*CentralEurRoman*/ },
1059 { "ps", -1, 59/*langPashto*/, 0x8C/*Farsi*/ }, // Arab;
1060 { "pt", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, // "pt" defaults to verBrazil (per Chris Hansten)
1061 { "pt_BR", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ },
1062 { "pt_PT", 10/*verPortugal*/, 8/*langPortuguese*/, 0/*Roman*/ },
1063 { "qu", -1, 132/*langQuechua*/, 0/*Roman*/ }, // Latn;
1064 { "rn", -1, 91/*langRundi*/, 0/*Roman*/ }, // Latn;
1065 { "ro"/*RO*/, 39/*verRomania*/, 37/*langRomanian*/, 38/*Romanian*/ },
1066 { "ru"/*RU*/, 49/*verRussia*/, 32/*langRussian*/, 7/*Cyrillic*/ }, // Cyrl;
1067 { "rw", -1, 90/*langKinyarwanda*/, 0/*Roman*/ }, // Latn;
1068 { "sa", -1, 65/*langSanskrit*/, 9/*Devanagari*/ }, // Deva;
1069 { "sd", -1, 62/*langSindhi*/, 0x8C/*Farsi*/ }, // Arab;
1070 { "se", 46/*verSami*/, 29/*langSami*/, 0/*Roman*/ },
1071 { "si", -1, 76/*langSinhalese*/, 18/*Sinhalese*/ }, // Sinh;
1072 { "sk"/*SK*/, 57/*verSlovak*/, 39/*langSlovak*/, 29/*CentralEurRoman*/ },
1073 { "sl"/*SI*/, 66/*verSlovenian*/, 40/*langSlovenian*/, 36/*Croatian*/ },
1074 { "so", -1, 88/*langSomali*/, 0/*Roman*/ }, // Latn;
1075 { "sq", -1, 36/*langAlbanian*/, 0/*Roman*/ },
1076 { "sr"/*CS,RS*/, 65/*verSerbian*/, 42/*langSerbian*/, 7/*Cyrillic*/ }, // Cyrl;
1077 { "su", -1, 139/*langSundaneseRom*/, 0/*Roman*/ }, // Latn;
1078 { "sv"/*SE*/, 7/*verSweden*/, 5/*langSwedish*/, 0/*Roman*/ },
1079 { "sw", -1, 89/*langSwahili*/, 0/*Roman*/ }, // Latn;
1080 { "ta", -1, 74/*langTamil*/, 14/*Tamil*/ }, // Taml;
1081 { "te", -1, 75/*langTelugu*/, 15/*Telugu*/ }, // Telu
1082 { "tg", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // "tg" defaults to "Cyrl"
1083 { "tg_Cyrl", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1084 { "th"/*TH*/, 54/*verThailand*/, 22/*langThai*/, 21/*Thai*/ }, // Thai;
1085 { "ti", -1, 86/*langTigrinya*/, 28/*Ethiopic*/ }, // Ethi;
1086 { "tk", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // "tk" defaults to Cyrl
1087 { "tk_Cyrl", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1088 { "tl", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn;
1089 { "to"/*TO*/, 88/*verTonga*/, 147/*langTongan*/, 0/*Roman*/ }, // Latn;
1090 { "tr"/*TR*/, 24/*verTurkey*/, 17/*langTurkish*/, 35/*Turkish*/ }, // Latn;
1091 { "tt", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl;
1092 { "tt_Cyrl", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl;
1093 { "ug", -1, 136/*langUighur*/, 4/*Arabic*/ }, // Arab;
1094 { "uk"/*UA*/, 62/*verUkraine*/, 45/*langUkrainian*/, 7/*Cyrillic*/ }, // Cyrl;
1095 { "und", 55/*verScriptGeneric*/, -1, 0 },
1096 { "ur", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // "ur" defaults to verPakistanUrdu
1097 { "ur_IN", 96/*verIndiaUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab
1098 { "ur_PK", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab
1099 { "uz"/*UZ*/, 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1100 { "uz_Cyrl", 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ },
1101 { "vi"/*VN*/, 97/*verVietnam*/, 80/*langVietnamese*/, 30/*Vietnamese*/ }, // Latn
1102 { "yi", -1, 41/*langYiddish*/, 5/*Hebrew*/ }, // Hebr;
1103 { "zh", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, // "zh" defaults to verChina, langSimpChinese
1104 { "zh_CN", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
1105 { "zh_HK", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1106 { "zh_Hans", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
1107 { "zh_Hant", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1108 { "zh_MO", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1109 { "zh_SG", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
1110 { "zh_TW", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1111 };
1112 enum {
1113 kNumLocaleToLegacyCodes = sizeof(localeToLegacyCodes)/sizeof(localeToLegacyCodes[0])
1114 };
1115
1116 /*
1117 For reference here is a list of ICU locales with variants and how some
1118 of them are canonicalized with the ICU function uloc_canonicalize:
1119
1120 ICU 3.0 has:
1121 en_US_POSIX x no change
1122 hy_AM_REVISED x no change
1123 ja_JP_TRADITIONAL -> ja_JP@calendar=japanese
1124 th_TH_TRADITIONAL -> th_TH@calendar=buddhist
1125
1126 ICU 2.8 also had the following (now obsolete):
1127 ca_ES_PREEURO
1128 de__PHONEBOOK -> de@collation=phonebook
1129 de_AT_PREEURO
1130 de_DE_PREEURO
1131 de_LU_PREEURO
1132 el_GR_PREEURO
1133 en_BE_PREEURO
1134 en_GB_EURO -> en_GB@currency=EUR
1135 en_IE_PREEURO -> en_IE@currency=IEP
1136 es__TRADITIONAL -> es@collation=traditional
1137 es_ES_PREEURO
1138 eu_ES_PREEURO
1139 fi_FI_PREEURO
1140 fr_BE_PREEURO
1141 fr_FR_PREEURO -> fr_FR@currency=FRF
1142 fr_LU_PREEURO
1143 ga_IE_PREEURO
1144 gl_ES_PREEURO
1145 hi__DIRECT -> hi@collation=direct
1146 it_IT_PREEURO
1147 nl_BE_PREEURO
1148 nl_NL_PREEURO
1149 pt_PT_PREEURO
1150 zh__PINYIN -> zh@collation=pinyin
1151 zh_TW_STROKE -> zh_TW@collation=stroke
1152
1153 */
1154
1155 // _CompareTestEntryToTableEntryKey
1156 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1157 // comparison function for bsearch
1158 static int _CompareTestEntryToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
1159 return strcmp( ((const KeyStringToResultString *)testEntryPtr)->key, ((const KeyStringToResultString *)tableEntryKeyPtr)->key );
1160 }
1161
1162 // _CompareTestEntryPrefixToTableEntryKey
1163 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1164 // Comparison function for bsearch. Assumes prefix IS terminated with '-' or '_'.
1165 // Do the following instead of strlen & strncmp so we don't walk tableEntry key twice.
1166 static int _CompareTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
1167 const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key;
1168 const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key;
1169
1170 while ( *testPtr == *tablePtr && *tablePtr != 0 ) {
1171 testPtr++; tablePtr++;
1172 }
1173 if ( *tablePtr != 0 ) {
1174 // strings are different, and the string in the table has not run out;
1175 // i.e. the table entry is not a prefix of the text string.
1176 return ( *testPtr < *tablePtr )? -1: 1;
1177 }
1178 return 0;
1179 }
1180
1181 // _CompareLowerTestEntryPrefixToTableEntryKey
1182 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1183 // Comparison function for bsearch. Assumes prefix NOT terminated with '-' or '_'.
1184 // Lowercases the test string before comparison (the table should already have lowercased entries).
1185 static int _CompareLowerTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
1186 const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key;
1187 const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key;
1188 char lowerTestChar;
1189
1190 while ( (lowerTestChar = tolower(*testPtr)) == *tablePtr && *tablePtr != 0 && lowerTestChar != '_' ) { // <1.9>
1191 testPtr++; tablePtr++;
1192 }
1193 if ( *tablePtr != 0 ) {
1194 // strings are different, and the string in the table has not run out;
1195 // i.e. the table entry is not a prefix of the text string.
1196 if (lowerTestChar == '_') // <1.9>
1197 return -1; // <1.9>
1198 return ( lowerTestChar < *tablePtr )? -1: 1;
1199 }
1200 // The string in the table has run out. If the test string char is not alnum,
1201 // then the string matches, else the test string sorts after.
1202 return ( !isalnum(lowerTestChar) )? 0: 1;
1203 }
1204
1205 // _DeleteCharsAtPointer
1206 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1207 // remove _length_ characters from the beginning of the string indicated by _stringPtr_
1208 // (we know that the string has at least _length_ characters in it)
1209 static void _DeleteCharsAtPointer(char *stringPtr, int length) {
1210 do {
1211 *stringPtr = stringPtr[length];
1212 } while (*stringPtr++ != 0);
1213 }
1214
1215 // _CopyReplacementAtPointer
1216 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1217 // Copy replacement string (*excluding* terminating NULL byte) to the place indicated by stringPtr
1218 static void _CopyReplacementAtPointer(char *stringPtr, const char *replacementPtr) {
1219 while (*replacementPtr != 0) {
1220 *stringPtr++ = *replacementPtr++;
1221 }
1222 }
1223
1224 // _CheckForTag
1225 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1226 static Boolean _CheckForTag(const char *localeStringPtr, const char *tagPtr, int tagLen) {
1227 return ( strncmp(localeStringPtr, tagPtr, tagLen) == 0 && !isalnum(localeStringPtr[tagLen]) );
1228 }
1229
1230 // _ReplacePrefix
1231 // Move this code from _UpdateFullLocaleString into separate function // <1.10>
1232 static void _ReplacePrefix(char locString[], int locStringMaxLen, int oldPrefixLen, const char *newPrefix) {
1233 int newPrefixLen = strlen(newPrefix);
1234 int lengthDelta = newPrefixLen - oldPrefixLen;
1235
1236 if (lengthDelta < 0) {
1237 // replacement is shorter, delete chars by shifting tail of string
1238 _DeleteCharsAtPointer(locString + newPrefixLen, -lengthDelta);
1239 } else if (lengthDelta > 0) {
1240 // replacement is longer...
1241 int stringLen = strlen(locString);
1242
1243 if (stringLen + lengthDelta < locStringMaxLen) {
1244 // make room by shifting tail of string
1245 char * tailShiftPtr = locString + stringLen;
1246 char * tailStartPtr = locString + oldPrefixLen; // pointer to tail of string to shift
1247
1248 while (tailShiftPtr >= tailStartPtr) {
1249 tailShiftPtr[lengthDelta] = *tailShiftPtr;
1250 tailShiftPtr--;
1251 }
1252 } else {
1253 // no room, can't do substitution
1254 newPrefix = NULL;
1255 }
1256 }
1257
1258 if (newPrefix) {
1259 // do the substitution
1260 _CopyReplacementAtPointer(locString, newPrefix);
1261 }
1262 }
1263
1264 // _UpdateFullLocaleString
1265 // Given a locale string that uses standard codes (not a special old-style Apple string),
1266 // update all the language codes and region codes to latest versions, map 3-letter
1267 // language codes to 2-letter codes if possible, and normalize casing. If requested, return
1268 // pointers to a language-region variant subtag (if present) and a region tag (if present).
1269 // (add locStringMaxLen parameter) // <1.10>
1270 static void _UpdateFullLocaleString(char inLocaleString[], int locStringMaxLen,
1271 char **langRegSubtagRef, char **regionTagRef,
1272 char varKeyValueString[]) // <1.17>
1273 {
1274 KeyStringToResultString testEntry;
1275 KeyStringToResultString * foundEntry;
1276 const SpecialCaseUpdates * specialCasePtr;
1277 char * inLocalePtr;
1278 char * subtagPtr;
1279 char * langRegSubtag = NULL;
1280 char * regionTag = NULL;
1281 char * variantTag = NULL;
1282 Boolean subtagHasDigits, pastPrimarySubtag, hadRegion;
1283
1284 // 1. First replace any non-canonical prefix (case insensitive) with canonical
1285 // (change 3-letter ISO 639 code to 2-letter, update obsolete ISO 639 codes & RFC 3066 tags, etc.)
1286
1287 testEntry.key = inLocaleString;
1288 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToCanonical, kNumLocaleStringPrefixToCanonical,
1289 sizeof(KeyStringToResultString), _CompareLowerTestEntryPrefixToTableEntryKey );
1290 if (foundEntry) {
1291 // replace key (at beginning of string) with result
1292 _ReplacePrefix(inLocaleString, locStringMaxLen, strlen(foundEntry->key), foundEntry->result); // <1.10>
1293 }
1294
1295 // 2. Walk through input string, normalizing case & marking use of ISO 3166 codes
1296
1297 inLocalePtr = inLocaleString;
1298 subtagPtr = inLocaleString;
1299 subtagHasDigits = false;
1300 pastPrimarySubtag = false;
1301 hadRegion = false;
1302
1303 while ( true ) {
1304 if ( isalpha(*inLocalePtr) ) {
1305 // if not past a region tag, then lowercase, else uppercase
1306 *inLocalePtr = (!hadRegion)? tolower(*inLocalePtr): toupper(*inLocalePtr);
1307 } else if ( isdigit(*inLocalePtr) ) {
1308 subtagHasDigits = true;
1309 } else {
1310
1311 if (!pastPrimarySubtag) {
1312 // may have a NULL primary subtag
1313 if (subtagHasDigits) {
1314 break;
1315 }
1316 pastPrimarySubtag = true;
1317 } else if (!hadRegion) {
1318 // We are after any primary language subtag, but not past any region tag.
1319 // This subtag is preceded by '-' or '_'.
1320 int subtagLength = inLocalePtr - subtagPtr; // includes leading '-' or '_'
1321
1322 if (subtagLength == 3 && !subtagHasDigits) {
1323 // potential ISO 3166 code for region or language variant; if so, needs uppercasing
1324 if (*subtagPtr == '_') {
1325 regionTag = subtagPtr;
1326 hadRegion = true;
1327 subtagPtr[1] = toupper(subtagPtr[1]);
1328 subtagPtr[2] = toupper(subtagPtr[2]);
1329 } else if (langRegSubtag == NULL) {
1330 langRegSubtag = subtagPtr;
1331 subtagPtr[1] = toupper(subtagPtr[1]);
1332 subtagPtr[2] = toupper(subtagPtr[2]);
1333 }
1334 } else if (subtagLength == 4 && subtagHasDigits) {
1335 // potential UN M.49 region code
1336 if (*subtagPtr == '_') {
1337 regionTag = subtagPtr;
1338 hadRegion = true;
1339 } else if (langRegSubtag == NULL) {
1340 langRegSubtag = subtagPtr;
1341 }
1342 } else if (subtagLength == 5 && !subtagHasDigits) {
1343 // ISO 15924 script code, uppercase just the first letter
1344 subtagPtr[1] = toupper(subtagPtr[1]);
1345 } else if (subtagLength == 1 && *subtagPtr == '_') { // <1.17>
1346 hadRegion = true;
1347 }
1348
1349 if (!hadRegion) {
1350 // convert improper '_' to '-'
1351 *subtagPtr = '-';
1352 }
1353 } else {
1354 variantTag = subtagPtr; // <1.17>
1355 }
1356
1357 if (*inLocalePtr == '-' || *inLocalePtr == '_') {
1358 subtagPtr = inLocalePtr;
1359 subtagHasDigits = false;
1360 } else {
1361 break;
1362 }
1363 }
1364
1365 inLocalePtr++;
1366 }
1367
1368 // 3 If there is a variant tag, see if ICU canonicalizes it to keywords. // <1.17> [3577669]
1369 // If so, copy the keywords to varKeyValueString and delete the variant tag
1370 // from the original string (but don't otherwise use the ICU canonicalization).
1371 varKeyValueString[0] = 0;
1372 if (variantTag) {
1373 UErrorCode icuStatus;
1374 int icuCanonStringLen;
1375 char * varKeyValueStringPtr = varKeyValueString;
1376
1377 icuStatus = U_ZERO_ERROR;
1378 icuCanonStringLen = uloc_canonicalize( inLocaleString, varKeyValueString, locStringMaxLen, &icuStatus );
1379 if ( U_SUCCESS(icuStatus) ) {
1380 char * icuCanonStringPtr = varKeyValueString;
1381
1382 if (icuCanonStringLen >= locStringMaxLen)
1383 icuCanonStringLen = locStringMaxLen - 1;
1384 varKeyValueString[icuCanonStringLen] = 0;
1385 while (*icuCanonStringPtr != 0 && *icuCanonStringPtr != ULOC_KEYWORD_SEPARATOR)
1386 ++icuCanonStringPtr;
1387 if (*icuCanonStringPtr != 0) {
1388 // the canonicalized string has keywords
1389 // delete the variant tag in the original string (and other trailing '_' or '-')
1390 *variantTag-- = 0;
1391 while (*variantTag == '_')
1392 *variantTag-- = 0;
1393 // delete all of the canonicalized string except the keywords
1394 while (*icuCanonStringPtr != 0)
1395 *varKeyValueStringPtr++ = *icuCanonStringPtr++;
1396 }
1397 *varKeyValueStringPtr = 0;
1398 }
1399 }
1400
1401 // 4. Handle special cases of updating region codes, or updating language codes based on
1402 // region code.
1403 for (specialCasePtr = specialCases; specialCasePtr->reg1 != NULL; specialCasePtr++) {
1404 if ( specialCasePtr->lang == NULL || _CheckForTag(inLocaleString, specialCasePtr->lang, 2) ) {
1405 // OK, we matched any language specified. Now what needs updating?
1406 char * foundTag;
1407
1408 if ( isupper(specialCasePtr->update1[0]) ) {
1409 // updating a region code
1410 if ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) {
1411 _CopyReplacementAtPointer(foundTag+1, specialCasePtr->update1);
1412 }
1413 if ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) {
1414 _CopyReplacementAtPointer(regionTag+1, specialCasePtr->update1);
1415 }
1416
1417 } else {
1418 // updating the language, there will be two choices based on region
1419 if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) ||
1420 ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) ) {
1421 _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update1);
1422 } else if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg2 + 1, 2) ) ||
1423 ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg2) ) && !isalnum(foundTag[3]) ) ) {
1424 _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update2);
1425 }
1426 }
1427 }
1428 }
1429
1430 // 5. return pointers if requested.
1431 if (langRegSubtagRef != NULL) {
1432 *langRegSubtagRef = langRegSubtag;
1433 }
1434 if (regionTagRef != NULL) {
1435 *regionTagRef = regionTag;
1436 }
1437 }
1438
1439
1440 // _RemoveSubstringsIfPresent
1441 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1442 // substringList is a list of space-separated substrings to strip if found in localeString
1443 static void _RemoveSubstringsIfPresent(char *localeString, const char *substringList) {
1444 while (*substringList != 0) {
1445 char currentSubstring[kLocaleIdentifierCStringMax];
1446 int substringLength = 0;
1447 char * foundSubstring;
1448
1449 // copy current substring & get its length
1450 while ( isgraph(*substringList) ) {
1451 currentSubstring[substringLength++] = *substringList++;
1452 }
1453 // move to next substring
1454 while ( isspace(*substringList) ) {
1455 substringList++;
1456 }
1457
1458 // search for current substring in locale string
1459 if (substringLength == 0)
1460 continue;
1461 currentSubstring[substringLength] = 0;
1462 foundSubstring = strstr(localeString, currentSubstring);
1463
1464 // if substring is found, delete it
1465 if (foundSubstring) {
1466 _DeleteCharsAtPointer(foundSubstring, substringLength);
1467 }
1468 }
1469 }
1470
1471
1472 // _GetKeyValueString // <1.10>
1473 // Removes any key-value string from inLocaleString, puts canonized version in keyValueString
1474
1475 static void _GetKeyValueString(char inLocaleString[], char keyValueString[]) {
1476 char * inLocalePtr = inLocaleString;
1477
1478 while (*inLocalePtr != 0 && *inLocalePtr != ULOC_KEYWORD_SEPARATOR) {
1479 inLocalePtr++;
1480 }
1481 if (*inLocalePtr != 0) { // we found a key-value section
1482 char * keyValuePtr = keyValueString;
1483
1484 *keyValuePtr = *inLocalePtr;
1485 *inLocalePtr = 0;
1486 do {
1487 if ( *(++inLocalePtr) != ' ' ) {
1488 *(++keyValuePtr) = *inLocalePtr; // remove "tolower() for *inLocalePtr" // <1.11>
1489 }
1490 } while (*inLocalePtr != 0);
1491 } else {
1492 keyValueString[0] = 0;
1493 }
1494 }
1495
1496 static void _AppendKeyValueString(char inLocaleString[], int locStringMaxLen, char keyValueString[]) {
1497 if (keyValueString[0] != 0) {
1498 UErrorCode uerr = U_ZERO_ERROR;
1499 UEnumeration * uenum = uloc_openKeywords(keyValueString, &uerr);
1500 if ( uenum != NULL ) {
1501 const char * keyword;
1502 int32_t length;
1503 char value[ULOC_KEYWORDS_CAPACITY]; // use as max for keyword value
1504 while ( U_SUCCESS(uerr) ) {
1505 keyword = uenum_next(uenum, &length, &uerr);
1506 if ( keyword == NULL ) {
1507 break;
1508 }
1509 length = uloc_getKeywordValue( keyValueString, keyword, value, sizeof(value), &uerr );
1510 length = uloc_setKeywordValue( keyword, value, inLocaleString, locStringMaxLen, &uerr );
1511 }
1512 uenum_close(uenum);
1513 }
1514 }
1515 }
1516
1517 // __private_extern__ CFStringRef _CFLocaleCreateCanonicalLanguageIdentifierForCFBundle(CFAllocatorRef allocator, CFStringRef localeIdentifier) {}
1518
1519 CFStringRef CFLocaleCreateCanonicalLanguageIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) {
1520 char inLocaleString[kLocaleIdentifierCStringMax];
1521 CFStringRef outStringRef = NULL;
1522
1523 if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) {
1524 KeyStringToResultString testEntry;
1525 KeyStringToResultString * foundEntry;
1526 char keyValueString[sizeof(inLocaleString)]; // <1.10>
1527 char varKeyValueString[sizeof(inLocaleString)]; // <1.17>
1528
1529 _GetKeyValueString(inLocaleString, keyValueString); // <1.10>
1530 testEntry.result = NULL;
1531
1532 // A. First check if input string matches an old-style string that has a replacement
1533 // (do this before case normalization)
1534 testEntry.key = inLocaleString;
1535 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical,
1536 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1537 if (foundEntry) {
1538 // It does match, so replace old string with new
1539 strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
1540 varKeyValueString[0] = 0;
1541 } else {
1542 char * langRegSubtag = NULL;
1543 char * regionTag = NULL;
1544
1545 // B. No match with an old-style string, use input string but update codes, normalize case, etc.
1546 _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, &regionTag, varKeyValueString); // <1.10><1.17><1.19>
1547
1548 // if the language part already includes a regional variant, then delete any region tag. <1.19>
1549 if (langRegSubtag && regionTag)
1550 *regionTag = 0;
1551 }
1552
1553 // C. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string
1554
1555 // 1. Strip defaults in input string based on initial part of locale string
1556 // (mainly to strip default script tag for a language)
1557 testEntry.key = inLocaleString;
1558 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults,
1559 sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey );
1560 if (foundEntry) {
1561 // The input string begins with a character sequence for which
1562 // there are default substrings which should be stripped if present
1563 _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
1564 }
1565
1566 // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string
1567 testEntry.key = inLocaleString;
1568 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageString, kNumAppleLocaleToLanguageString,
1569 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1570 if (foundEntry) {
1571 // it does match
1572 strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
1573 } else {
1574 // skip to any region tag or java-type variant
1575 char * inLocalePtr = inLocaleString;
1576 while (*inLocalePtr != 0 && *inLocalePtr != '_') {
1577 inLocalePtr++;
1578 }
1579 // if there is still a region tag, turn it into a language variant <1.19>
1580 if (*inLocalePtr == '_') {
1581 // handle 3-digit regions in addition to 2-letter ones
1582 char * regionTag = inLocalePtr++;
1583 long expectedLength = 0;
1584 if ( isalpha(*inLocalePtr) ) {
1585 while ( isalpha(*(++inLocalePtr)) )
1586 ;
1587 expectedLength = 3;
1588 } else if ( isdigit(*inLocalePtr) ) {
1589 while ( isdigit(*(++inLocalePtr)) )
1590 ;
1591 expectedLength = 4;
1592 }
1593 *regionTag = (inLocalePtr - regionTag == expectedLength)? '-': 0;
1594 }
1595 // anything else at/after '_' just gets deleted
1596 *inLocalePtr = 0;
1597 }
1598
1599 // D. Re-append any key-value strings, now canonical // <1.10><1.17>
1600 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString );
1601 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString );
1602
1603 // All done, return what we came up with.
1604 outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII);
1605 }
1606
1607 return outStringRef;
1608 }
1609
1610
1611 CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) {
1612 char inLocaleString[kLocaleIdentifierCStringMax];
1613 CFStringRef outStringRef = NULL;
1614
1615 if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) {
1616 KeyStringToResultString testEntry;
1617 KeyStringToResultString * foundEntry;
1618 char keyValueString[sizeof(inLocaleString)]; // <1.10>
1619 char varKeyValueString[sizeof(inLocaleString)]; // <1.17>
1620
1621 _GetKeyValueString(inLocaleString, keyValueString); // <1.10>
1622 testEntry.result = NULL;
1623
1624 // A. First check if input string matches an old-style Apple string that has a replacement
1625 // (do this before case normalization)
1626 testEntry.key = inLocaleString;
1627 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical,
1628 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1629 if (foundEntry) {
1630 // It does match, so replace old string with new // <1.10>
1631 strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
1632 varKeyValueString[0] = 0;
1633 } else {
1634 char * langRegSubtag = NULL;
1635 char * regionTag = NULL;
1636
1637 // B. No match with an old-style string, use input string but update codes, normalize case, etc.
1638 _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, &regionTag, varKeyValueString); // <1.10><1.17>
1639
1640
1641 // C. Now strip defaults that are implied by other fields.
1642
1643 // 1. If an ISO 3166 region tag matches an ISO 3166 regional language variant subtag, strip the latter.
1644 if ( langRegSubtag && regionTag && strncmp(langRegSubtag+1, regionTag+1, 2) == 0 ) {
1645 _DeleteCharsAtPointer(langRegSubtag, 3);
1646 }
1647
1648 // 2. Strip defaults in input string based on final region tag in locale string
1649 // (mainly for Chinese, to strip -Hans for _CN/_SG, -Hant for _TW/_HK/_MO)
1650 if ( regionTag ) {
1651 testEntry.key = regionTag;
1652 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringRegionToDefaults, kNumLocaleStringRegionToDefaults,
1653 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1654 if (foundEntry) {
1655 _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
1656 }
1657 }
1658
1659 // 3. Strip defaults in input string based on initial part of locale string
1660 // (mainly to strip default script tag for a language)
1661 testEntry.key = inLocaleString;
1662 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults,
1663 sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey );
1664 if (foundEntry) {
1665 // The input string begins with a character sequence for which
1666 // there are default substrings which should be stripped if present
1667 _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
1668 }
1669 }
1670
1671 // D. Re-append any key-value strings, now canonical // <1.10><1.17>
1672 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString );
1673 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString );
1674
1675 // Now create the CFString (even if empty!)
1676 outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII);
1677 }
1678
1679 return outStringRef;
1680 }
1681
1682 // CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, based on
1683 // the first part of the SPI CFBundleCopyLocalizationForLocalizationInfo in CFBundle_Resources.c
1684 CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes(CFAllocatorRef allocator, LangCode lcode, RegionCode rcode) {
1685 CFStringRef result = NULL;
1686 if (0 <= rcode && rcode < kNumRegionCodeToLocaleString) {
1687 const char *localeString = regionCodeToLocaleString[rcode];
1688 if (localeString != NULL && *localeString != '\0') {
1689 result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull);
1690 }
1691 }
1692 if (result) return result;
1693 if (0 <= lcode && lcode < kNumLangCodeToLocaleString) {
1694 const char *localeString = langCodeToLocaleString[lcode];
1695 if (localeString != NULL && *localeString != '\0') {
1696 result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull);
1697 }
1698 }
1699 return result;
1700 }
1701
1702
1703 /*
1704 SPI: CFLocaleGetLanguageRegionEncodingForLocaleIdentifier gets the appropriate language and region codes,
1705 and the default legacy script code and encoding, for the specified locale (or language) string.
1706 Returns false if CFLocale has no information about the given locale (in which case none of the by-reference return values are set);
1707 otherwise may set *langCode and/or *regCode to -1 if there is no appropriate legacy value for the locale.
1708 This is a replacement for the CFBundle SPI CFBundleGetLocalizationInfoForLocalization (which was intended to be temporary and transitional);
1709 this function is more up-to-date in its handling of locale strings, and is in CFLocale where this functionality should belong. Compared
1710 to CFBundleGetLocalizationInfoForLocalization, this function does not spcially interpret a NULL localeIdentifier to mean use the single most
1711 preferred localization in the current context (this function returns NO for a NULL localeIdentifier); and in this function
1712 langCode, regCode, and scriptCode are all SInt16* (not SInt32* like the equivalent parameters in CFBundleGetLocalizationInfoForLocalization).
1713 */
1714 static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 );
1715
1716 Boolean CFLocaleGetLanguageRegionEncodingForLocaleIdentifier(CFStringRef localeIdentifier, LangCode *langCode, RegionCode *regCode, ScriptCode *scriptCode, CFStringEncoding *stringEncoding) {
1717 Boolean returnValue = false;
1718 CFStringRef canonicalIdentifier = CFLocaleCreateCanonicalLocaleIdentifierFromString(NULL, localeIdentifier);
1719 if (canonicalIdentifier) {
1720 char localeCString[kLocaleIdentifierCStringMax];
1721 if ( CFStringGetCString(canonicalIdentifier, localeCString, sizeof(localeCString), kCFStringEncodingASCII) ) {
1722 UErrorCode icuStatus = U_ZERO_ERROR;
1723 int32_t languagelength;
1724 char searchString[ULOC_LANG_CAPACITY + ULOC_FULLNAME_CAPACITY];
1725
1726 languagelength = uloc_getLanguage( localeCString, searchString, ULOC_LANG_CAPACITY, &icuStatus );
1727 if ( U_SUCCESS(icuStatus) && languagelength > 0 ) {
1728 // OK, here we have at least a language code, check for other components in order
1729 LocaleToLegacyCodes searchEntry = { (const char *)searchString, 0, 0, 0 };
1730 const LocaleToLegacyCodes * foundEntryPtr;
1731 int32_t componentLength;
1732 char componentString[ULOC_FULLNAME_CAPACITY];
1733
1734 languagelength = strlen(searchString); // in case it got truncated
1735 icuStatus = U_ZERO_ERROR;
1736 componentLength = uloc_getScript( localeCString, componentString, sizeof(componentString), &icuStatus );
1737 if ( U_FAILURE(icuStatus) || componentLength == 0 ) {
1738 icuStatus = U_ZERO_ERROR;
1739 componentLength = uloc_getCountry( localeCString, componentString, sizeof(componentString), &icuStatus );
1740 if ( U_FAILURE(icuStatus) || componentLength == 0 ) {
1741 icuStatus = U_ZERO_ERROR;
1742 componentLength = uloc_getVariant( localeCString, componentString, sizeof(componentString), &icuStatus );
1743 if ( U_FAILURE(icuStatus) ) {
1744 componentLength = 0;
1745 }
1746 }
1747 }
1748
1749 // Append whichever other component we first found
1750 if (componentLength > 0) {
1751 strlcat(searchString, "_", sizeof(searchString));
1752 strlcat(searchString, componentString, sizeof(searchString));
1753 }
1754
1755 // Search
1756 foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries );
1757 if (foundEntryPtr == NULL && (int32_t) strlen(searchString) > languagelength) {
1758 // truncate to language al;one and try again
1759 searchString[languagelength] = 0;
1760 foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries );
1761 }
1762
1763 // If found a matching entry, return requested values
1764 if (foundEntryPtr) {
1765 returnValue = true;
1766 if (langCode) *langCode = foundEntryPtr->langCode;
1767 if (regCode) *regCode = foundEntryPtr->regCode;
1768 if (stringEncoding) *stringEncoding = foundEntryPtr->encoding;
1769 if (scriptCode) {
1770 // map CFStringEncoding to ScriptCode
1771 if (foundEntryPtr->encoding < 33/*kCFStringEncodingMacSymbol*/) {
1772 *scriptCode = foundEntryPtr->encoding;
1773 } else {
1774 switch (foundEntryPtr->encoding) {
1775 case 0x8C/*kCFStringEncodingMacFarsi*/: *scriptCode = 4/*smArabic*/; break;
1776 case 0x98/*kCFStringEncodingMacUkrainian*/: *scriptCode = 7/*smCyrillic*/; break;
1777 case 0xEC/*kCFStringEncodingMacInuit*/: *scriptCode = 28/*smEthiopic*/; break;
1778 case 0xFC/*kCFStringEncodingMacVT100*/: *scriptCode = 32/*smUninterp*/; break;
1779 default: *scriptCode = 0/*smRoman*/; break;
1780 }
1781 }
1782 }
1783 }
1784 }
1785 }
1786 CFRelease(canonicalIdentifier);
1787 }
1788 return returnValue;
1789 }
1790
1791 static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 ) {
1792 const char * localeString1 = ((const LocaleToLegacyCodes *)entry1)->locale;
1793 const char * localeString2 = ((const LocaleToLegacyCodes *)entry2)->locale;
1794 return strcmp(localeString1, localeString2);
1795 }
1796
1797
1798 CFDictionaryRef CFLocaleCreateComponentsFromLocaleIdentifier(CFAllocatorRef allocator, CFStringRef localeID) {
1799 char cLocaleID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY];
1800 char buffer[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY];
1801 CFMutableDictionaryRef working = CFDictionaryCreateMutable(allocator, 10, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1802
1803 UErrorCode icuStatus = U_ZERO_ERROR;
1804 int32_t length = 0;
1805
1806 // Extract the C string locale ID, for ICU
1807 CFIndex outBytes = 0;
1808 CFStringGetBytes(localeID, CFRangeMake(0, CFStringGetLength(localeID)), kCFStringEncodingASCII, (UInt8) '?', true, (unsigned char *)cLocaleID, sizeof(cLocaleID)/sizeof(char) - 1, &outBytes);
1809 cLocaleID[outBytes] = '\0';
1810
1811 // Get the components
1812 length = uloc_getLanguage(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
1813 if (U_SUCCESS(icuStatus) && length > 0)
1814 {
1815 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
1816 CFDictionaryAddValue(working, kCFLocaleLanguageCodeKey, string);
1817 CFRelease(string);
1818 }
1819 icuStatus = U_ZERO_ERROR;
1820
1821 length = uloc_getScript(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
1822 if (U_SUCCESS(icuStatus) && length > 0)
1823 {
1824 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
1825 CFDictionaryAddValue(working, kCFLocaleScriptCodeKey, string);
1826 CFRelease(string);
1827 }
1828 icuStatus = U_ZERO_ERROR;
1829
1830 length = uloc_getCountry(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
1831 if (U_SUCCESS(icuStatus) && length > 0)
1832 {
1833 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
1834 CFDictionaryAddValue(working, kCFLocaleCountryCodeKey, string);
1835 CFRelease(string);
1836 }
1837 icuStatus = U_ZERO_ERROR;
1838
1839 length = uloc_getVariant(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
1840 if (U_SUCCESS(icuStatus) && length > 0)
1841 {
1842 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
1843 CFDictionaryAddValue(working, kCFLocaleVariantCodeKey, string);
1844 CFRelease(string);
1845 }
1846 icuStatus = U_ZERO_ERROR;
1847
1848 // Now get the keywords; open an enumerator on them
1849 UEnumeration *iter = uloc_openKeywords(cLocaleID, &icuStatus);
1850 const char *locKey = NULL;
1851 int32_t locKeyLen = 0;
1852 while ((locKey = uenum_next(iter, &locKeyLen, &icuStatus)) && U_SUCCESS(icuStatus))
1853 {
1854 char locValue[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1855
1856 // Get the value for this keyword
1857 if (uloc_getKeywordValue(cLocaleID, locKey, locValue, sizeof(locValue)/sizeof(char), &icuStatus) > 0
1858 && U_SUCCESS(icuStatus))
1859 {
1860 CFStringRef key = CFStringCreateWithBytes(allocator, (UInt8 *)locKey, strlen(locKey), kCFStringEncodingASCII, true);
1861 CFStringRef value = CFStringCreateWithBytes(allocator, (UInt8 *)locValue, strlen(locValue), kCFStringEncodingASCII, true);
1862 if (key && value)
1863 CFDictionaryAddValue(working, key, value);
1864 if (key)
1865 CFRelease(key);
1866 if (value)
1867 CFRelease(value);
1868 }
1869 }
1870 uenum_close(iter);
1871
1872 // Convert to an immutable dictionary and return
1873 CFDictionaryRef result = CFDictionaryCreateCopy(allocator, working);
1874 CFRelease(working);
1875 return result;
1876 }
1877
1878 static char *__CStringFromString(CFStringRef str) {
1879 if (!str) return NULL;
1880 CFRange rg = CFRangeMake(0, CFStringGetLength(str));
1881 CFIndex neededLength = 0;
1882 CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, NULL, 0, &neededLength);
1883 char *buf = (char *)malloc(neededLength + 1);
1884 CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, (uint8_t *)buf, neededLength, &neededLength);
1885 buf[neededLength] = '\0';
1886 return buf;
1887 }
1888
1889 CFStringRef CFLocaleCreateLocaleIdentifierFromComponents(CFAllocatorRef allocator, CFDictionaryRef dictionary) {
1890 CFIndex cnt = CFDictionaryGetCount(dictionary);
1891 STACK_BUFFER_DECL(CFStringRef, values, cnt);
1892 STACK_BUFFER_DECL(CFStringRef, keys, cnt);
1893 CFDictionaryGetKeysAndValues(dictionary, (const void **)keys, (const void **)values);
1894
1895 char *language = NULL, *script = NULL, *country = NULL, *variant = NULL;
1896 for (CFIndex idx = 0; idx < cnt; idx++) {
1897 if (CFEqual(kCFLocaleLanguageCodeKey, keys[idx])) {
1898 language = __CStringFromString(values[idx]);
1899 keys[idx] = NULL;
1900 } else if (CFEqual(kCFLocaleScriptCodeKey, keys[idx])) {
1901 script = __CStringFromString(values[idx]);
1902 keys[idx] = NULL;
1903 } else if (CFEqual(kCFLocaleCountryCodeKey, keys[idx])) {
1904 country = __CStringFromString(values[idx]);
1905 keys[idx] = NULL;
1906 } else if (CFEqual(kCFLocaleVariantCodeKey, keys[idx])) {
1907 variant = __CStringFromString(values[idx]);
1908 keys[idx] = NULL;
1909 }
1910 }
1911
1912 char *buf1 = NULL; // (|L)(|_S)(|_C|_C_V|__V)
1913 asprintf(&buf1, "%s%s%s%s%s%s%s", language ? language : "", script ? "_" : "", script ? script : "", (country || variant ? "_" : ""), country ? country : "", variant ? "_" : "", variant ? variant : "");
1914
1915 char cLocaleID[2 * ULOC_FULLNAME_CAPACITY + 2 * ULOC_KEYWORD_AND_VALUES_CAPACITY];
1916 strlcpy(cLocaleID, buf1, sizeof(cLocaleID));
1917 free(language);
1918 free(script);
1919 free(country);
1920 free(variant);
1921 free(buf1);
1922
1923 for (CFIndex idx = 0; idx < cnt; idx++) {
1924 if (keys[idx]) {
1925 char *key = __CStringFromString(keys[idx]);
1926 char *value;
1927 if (0 == strcmp(key, "kCFLocaleCalendarKey")) {
1928 // For interchangeability convenience, we alternatively allow a
1929 // calendar object to be passed in, with the alternate key, and
1930 // we'll extract the identifier.
1931 CFCalendarRef cal = (CFCalendarRef)values[idx];
1932 CFStringRef ident = CFCalendarGetIdentifier(cal);
1933 value = __CStringFromString(ident);
1934 char *oldkey = key;
1935 key = strdup("calendar");
1936 free(oldkey);
1937 } else {
1938 value = __CStringFromString(values[idx]);
1939 }
1940 UErrorCode status = U_ZERO_ERROR;
1941 uloc_setKeywordValue(key, value, cLocaleID, sizeof(cLocaleID), &status);
1942 free(key);
1943 free(value);
1944 }
1945 }
1946
1947 return CFStringCreateWithCString(allocator, cLocaleID, kCFStringEncodingASCII);
1948 }
1949