2 * Copyright (c) 2008 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 2002-2007, Apple Inc. All rights reserved.
26 Responsibility: Christopher Kane
28 CFLocaleIdentifier.c defines
29 - enum value kLocaleIdentifierCStringMax
30 - structs KeyStringToResultString, SpecialCaseUpdates
31 and provides the following data for the functions
32 CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes,
33 CFLocaleCreateCanonicalLocaleIdentifierFromString
34 CFLocaleCreateCanonicalLanguageIdentifierFromString
36 1. static const char * regionCodeToLocaleString[]; enum kNumRegionCodeToLocaleString;
37 map RegionCode 0..kNumRegionCodeToLocaleString-1 to canonical locale string
39 2. static const char * langCodeToLocaleString[]; enum kNumLangCodeToLocaleString;
40 map LangCode 0..kNumLangCodeToLocaleString-1 to canonical locale string
42 3. static const KeyStringToResultString oldAppleLocaleToCanonical[]; enum kNumOldAppleLocaleToCanonical;
43 map old Apple string oldAppleLocaleToCanonical[n].key
44 to canonical locale string oldAppleLocaleToCanonical[n].result
45 for n = 0..kNumOldAppleLocaleToCanonical-1
47 4. static const KeyStringToResultString localeStringPrefixToCanonical[]; enum kNumLocaleStringPrefixToCanonical;
48 map non-canonical language prefix (3-letter, obsolete) localeStringPrefixToCanonical[].key
49 to updated replacement localeStringPrefixToCanonical[].result
50 for n = 0..kNumLocaleStringPrefixToCanonical-1
52 5. static const SpecialCaseUpdates specialCases[];
53 various special cases for updating region codes, or for updating language codes based on region codes
55 6. static const KeyStringToResultString localeStringRegionToDefaults[]; enum kNumLocaleStringRegionToDefaults;
56 map locale string region tag localeStringRegionToDefaults[n].key
57 to default substrings to delete localeStringRegionToDefaults[n].result
58 for n = 0..kNumLocaleStringRegionToDefaults-1
60 7. static const KeyStringToResultString localeStringPrefixToDefaults[]; enum kNumLocaleStringPrefixToDefaults;
61 map locale string initial part localeStringPrefixToDefaults[n].key
62 to default substrings to delete localeStringPrefixToDefaults[n].result
63 for n = 0..kNumLocaleStringPrefixToDefaults-1
65 8. static const KeyStringToResultString appleLocaleToLanguageString[]; enum kNumAppleLocaleToLanguageString;
66 map Apple locale string appleLocaleToLanguageString[].key
67 to equivalent language string appleLocaleToLanguageString[].result
68 for n = 0..kNumAppleLocaleToLanguageString-1
72 #include <CoreFoundation/CFString.h>
76 #include <unicode/uloc.h>
79 // Max byte length of locale identifier (ASCII) as C string, including terminating null byte
81 kLocaleIdentifierCStringMax
= ULOC_FULLNAME_CAPACITY
+ ULOC_KEYWORD_AND_VALUES_CAPACITY
// currently 56 + 100
84 // KeyStringToResultString struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString
85 struct KeyStringToResultString
{
89 typedef struct KeyStringToResultString KeyStringToResultString
;
91 // SpecialCaseUpdates struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString
92 struct SpecialCaseUpdates
{
99 typedef struct SpecialCaseUpdates SpecialCaseUpdates
;
102 static const char * const regionCodeToLocaleString
[] = {
103 // map RegionCode (array index) to canonical locale string
105 // canon. string region code; language code; [comment] [ # __CFBundleLocaleAbbreviationsArray
106 // -------- ------------ ------------------ ------------ -------- string, if different ]
107 "en_US", // 0 verUS; 0 langEnglish;
108 "fr_FR", // 1 verFrance; 1 langFrench;
109 "en_GB", // 2 verBritain; 0 langEnglish;
110 "de_DE", // 3 verGermany; 2 langGerman;
111 "it_IT", // 4 verItaly; 3 langItalian;
112 "nl_NL", // 5 verNetherlands; 4 langDutch;
113 "nl_BE", // 6 verFlemish; 34 langFlemish (redundant, =Dutch);
114 "sv_SE", // 7 verSweden; 5 langSwedish;
115 "es_ES", // 8 verSpain; 6 langSpanish;
116 "da_DK", // 9 verDenmark; 7 langDanish;
117 "pt_PT", // 10 verPortugal; 8 langPortuguese;
118 "fr_CA", // 11 verFrCanada; 1 langFrench;
119 "nb_NO", // 12 verNorway; 9 langNorwegian (Bokmal); # "no_NO"
120 "he_IL", // 13 verIsrael; 10 langHebrew;
121 "ja_JP", // 14 verJapan; 11 langJapanese;
122 "en_AU", // 15 verAustralia; 0 langEnglish;
123 "ar", // 16 verArabic; 12 langArabic;
124 "fi_FI", // 17 verFinland; 13 langFinnish;
125 "fr_CH", // 18 verFrSwiss; 1 langFrench;
126 "de_CH", // 19 verGrSwiss; 2 langGerman;
127 "el_GR", // 20 verGreece; 14 langGreek (modern)-Grek-mono;
128 "is_IS", // 21 verIceland; 15 langIcelandic;
129 "mt_MT", // 22 verMalta; 16 langMaltese;
130 "el_CY", // 23 verCyprus; 14 langGreek?; el or tr? guess el # ""
131 "tr_TR", // 24 verTurkey; 17 langTurkish;
132 "hr_HR", // 25 verYugoCroatian; 18 langCroatian; * one-way mapping -> verCroatia
133 "nl_NL", // 26 KCHR, Netherlands; 4 langDutch; * one-way mapping
134 "nl_BE", // 27 KCHR, verFlemish; 34 langFlemish; * one-way mapping
135 "_CA", // 28 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
136 "_CA", // 29 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
137 "pt_PT", // 30 KCHR, Portugal; 8 langPortuguese; * one-way mapping
138 "nb_NO", // 31 KCHR, Norway; 9 langNorwegian (Bokmal); * one-way mapping # "no_NO"
139 "da_DK", // 32 KCHR, Denmark; 7 langDanish; * one-way mapping
140 "hi_IN", // 33 verIndiaHindi; 21 langHindi;
141 "ur_PK", // 34 verPakistanUrdu; 20 langUrdu;
142 "tr_TR", // 35 verTurkishModified; 17 langTurkish; * one-way mapping
143 "it_CH", // 36 verItalianSwiss; 3 langItalian;
144 "en_001", // 37 verInternational; 0 langEnglish; ASCII only # "en"
145 NULL
, // 38 *unassigned; -1 none; * one-way mapping # ""
146 "ro_RO", // 39 verRomania; 37 langRomanian;
147 "grc", // 40 verGreekAncient; 148 langGreekAncient -Grek-poly; # "el_GR"
148 "lt_LT", // 41 verLithuania; 24 langLithuanian;
149 "pl_PL", // 42 verPoland; 25 langPolish;
150 "hu_HU", // 43 verHungary; 26 langHungarian;
151 "et_EE", // 44 verEstonia; 27 langEstonian;
152 "lv_LV", // 45 verLatvia; 28 langLatvian;
153 "se", // 46 verSami; 29 langSami;
154 "fo_FO", // 47 verFaroeIsl; 30 langFaroese;
155 "fa_IR", // 48 verIran; 31 langFarsi/Persian;
156 "ru_RU", // 49 verRussia; 32 langRussian;
157 "ga_IE", // 50 verIreland; 35 langIrishGaelic (no dots);
158 "ko_KR", // 51 verKorea; 23 langKorean;
159 "zh_CN", // 52 verChina; 33 langSimpChinese;
160 "zh_TW", // 53 verTaiwan; 19 langTradChinese;
161 "th_TH", // 54 verThailand; 22 langThai;
162 "und", // 55 verScriptGeneric; -1 none; # "" // <1.9>
163 "cs_CZ", // 56 verCzech; 38 langCzech;
164 "sk_SK", // 57 verSlovak; 39 langSlovak;
165 "und", // 58 verEastAsiaGeneric; -1 none; * one-way mapping # "" // <1.9>
166 "hu_HU", // 59 verMagyar; 26 langHungarian; * one-way mapping -> verHungary
167 "bn", // 60 verBengali; 67 langBengali; _IN or _BD? guess generic
168 "be_BY", // 61 verBelarus; 46 langBelorussian;
169 "uk_UA", // 62 verUkraine; 45 langUkrainian;
170 NULL
, // 63 *unused; -1 none; * one-way mapping # ""
171 "el_GR", // 64 verGreeceAlt; 14 langGreek (modern)-Grek-mono; * one-way mapping
172 "sr_CS", // 65 verSerbian; 42 langSerbian -Cyrl; // <1.18>
173 "sl_SI", // 66 verSlovenian; 40 langSlovenian;
174 "mk_MK", // 67 verMacedonian; 43 langMacedonian;
175 "hr_HR", // 68 verCroatia; 18 langCroatian;
176 NULL
, // 69 *unused; -1 none; * one-way mapping # ""
177 "de-1996", // 70 verGermanReformed; 2 langGerman; 1996 orthogr. # "de_DE"
178 "pt_BR", // 71 verBrazil; 8 langPortuguese;
179 "bg_BG", // 72 verBulgaria; 44 langBulgarian;
180 "ca_ES", // 73 verCatalonia; 130 langCatalan;
181 "mul", // 74 verMultilingual; -1 none; # ""
182 "gd", // 75 verScottishGaelic; 144 langScottishGaelic;
183 "gv", // 76 verManxGaelic; 145 langManxGaelic;
184 "br", // 77 verBreton; 142 langBreton;
185 "iu_CA", // 78 verNunavut; 143 langInuktitut -Cans;
186 "cy", // 79 verWelsh; 128 langWelsh;
187 "_CA", // 80 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
188 "ga-Latg_IE", // 81 verIrishGaelicScrip; 146 langIrishGaelicScript -dots; # "ga_IE" // <xx>
189 "en_CA", // 82 verEngCanada; 0 langEnglish;
190 "dz_BT", // 83 verBhutan; 137 langDzongkha;
191 "hy_AM", // 84 verArmenian; 51 langArmenian;
192 "ka_GE", // 85 verGeorgian; 52 langGeorgian;
193 "es_419", // 86 verSpLatinAmerica; 6 langSpanish; # "es"
194 "es_ES", // 87 KCHR, Spain; 6 langSpanish; * one-way mapping
195 "to_TO", // 88 verTonga; 147 langTongan;
196 "pl_PL", // 89 KCHR, Poland; 25 langPolish; * one-way mapping
197 "ca_ES", // 90 KCHR, Catalonia; 130 langCatalan; * one-way mapping
198 "fr_001", // 91 verFrenchUniversal; 1 langFrench;
199 "de_AT", // 92 verAustria; 2 langGerman;
200 "es_419", // 93 > verSpLatinAmerica; 6 langSpanish; * one-way mapping # "es"
201 "gu_IN", // 94 verGujarati; 69 langGujarati;
202 "pa", // 95 verPunjabi; 70 langPunjabi; _IN or _PK? guess generic
203 "ur_IN", // 96 verIndiaUrdu; 20 langUrdu;
204 "vi_VN", // 97 verVietnam; 80 langVietnamese;
205 "fr_BE", // 98 verFrBelgium; 1 langFrench;
206 "uz_UZ", // 99 verUzbek; 47 langUzbek;
207 "en_SG", // 100 verSingapore; 0 langEnglish?; en, zh, or ms? guess en # ""
208 "nn_NO", // 101 verNynorsk; 151 langNynorsk; # ""
209 "af_ZA", // 102 verAfrikaans; 141 langAfrikaans;
210 "eo", // 103 verEsperanto; 94 langEsperanto;
211 "mr_IN", // 104 verMarathi; 66 langMarathi;
212 "bo", // 105 verTibetan; 63 langTibetan;
213 "ne_NP", // 106 verNepal; 64 langNepali;
214 "kl", // 107 verGreenland; 149 langGreenlandic;
215 "en_IE", // 108 verIrelandEnglish; 0 langEnglish; # (no entry)
218 kNumRegionCodeToLocaleString
= sizeof(regionCodeToLocaleString
)/sizeof(char *)
221 static const char * const langCodeToLocaleString
[] = {
222 // map LangCode (array index) to canonical locale string
224 // canon. string language code; [ comment] [ # __CFBundleLanguageAbbreviationsArray
225 // -------- -------------- ---------- -------- string, if different ]
226 "en", // 0 langEnglish;
227 "fr", // 1 langFrench;
228 "de", // 2 langGerman;
229 "it", // 3 langItalian;
230 "nl", // 4 langDutch;
231 "sv", // 5 langSwedish;
232 "es", // 6 langSpanish;
233 "da", // 7 langDanish;
234 "pt", // 8 langPortuguese;
235 "nb", // 9 langNorwegian (Bokmal); # "no"
236 "he", // 10 langHebrew -Hebr;
237 "ja", // 11 langJapanese -Jpan;
238 "ar", // 12 langArabic -Arab;
239 "fi", // 13 langFinnish;
240 "el", // 14 langGreek (modern)-Grek-mono;
241 "is", // 15 langIcelandic;
242 "mt", // 16 langMaltese -Latn;
243 "tr", // 17 langTurkish -Latn;
244 "hr", // 18 langCroatian;
245 "zh-Hant", // 19 langTradChinese; # "zh"
246 "ur", // 20 langUrdu -Arab;
247 "hi", // 21 langHindi -Deva;
248 "th", // 22 langThai -Thai;
249 "ko", // 23 langKorean -Hang;
250 "lt", // 24 langLithuanian;
251 "pl", // 25 langPolish;
252 "hu", // 26 langHungarian;
253 "et", // 27 langEstonian;
254 "lv", // 28 langLatvian;
255 "se", // 29 langSami;
256 "fo", // 30 langFaroese;
257 "fa", // 31 langFarsi/Persian -Arab;
258 "ru", // 32 langRussian -Cyrl;
259 "zh-Hans", // 33 langSimpChinese; # "zh"
260 "nl-BE", // 34 langFlemish (redundant, =Dutch); # "nl"
261 "ga", // 35 langIrishGaelic (no dots);
262 "sq", // 36 langAlbanian; no region codes
263 "ro", // 37 langRomanian;
264 "cs", // 38 langCzech;
265 "sk", // 39 langSlovak;
266 "sl", // 40 langSlovenian;
267 "yi", // 41 langYiddish -Hebr; no region codes
268 "sr", // 42 langSerbian -Cyrl;
269 "mk", // 43 langMacedonian -Cyrl;
270 "bg", // 44 langBulgarian -Cyrl;
271 "uk", // 45 langUkrainian -Cyrl;
272 "be", // 46 langBelorussian -Cyrl;
273 "uz-Cyrl", // 47 langUzbek -Cyrl; also -Latn, -Arab
274 "kk", // 48 langKazakh -Cyrl; no region codes; also -Latn, -Arab
275 "az-Cyrl", // 49 langAzerbaijani -Cyrl; no region codes # "az"
276 "az-Arab", // 50 langAzerbaijanAr -Arab; no region codes # "az"
277 "hy", // 51 langArmenian -Armn;
278 "ka", // 52 langGeorgian -Geor;
279 "mo", // 53 langMoldavian -Cyrl; no region codes
280 "ky", // 54 langKirghiz -Cyrl; no region codes; also -Latn, -Arab
281 "tg-Cyrl", // 55 langTajiki -Cyrl; no region codes; also -Latn, -Arab
282 "tk-Cyrl", // 56 langTurkmen -Cyrl; no region codes; also -Latn, -Arab
283 "mn-Mong", // 57 langMongolian -Mong; no region codes # "mn"
284 "mn-Cyrl", // 58 langMongolianCyr -Cyrl; no region codes # "mn"
285 "ps", // 59 langPashto -Arab; no region codes
286 "ku", // 60 langKurdish -Arab; no region codes
287 "ks", // 61 langKashmiri -Arab; no region codes
288 "sd", // 62 langSindhi -Arab; no region codes
289 "bo", // 63 langTibetan -Tibt;
290 "ne", // 64 langNepali -Deva;
291 "sa", // 65 langSanskrit -Deva; no region codes
292 "mr", // 66 langMarathi -Deva;
293 "bn", // 67 langBengali -Beng;
294 "as", // 68 langAssamese -Beng; no region codes
295 "gu", // 69 langGujarati -Gujr;
296 "pa", // 70 langPunjabi -Guru;
297 "or", // 71 langOriya -Orya; no region codes
298 "ml", // 72 langMalayalam -Mlym; no region codes
299 "kn", // 73 langKannada -Knda; no region codes
300 "ta", // 74 langTamil -Taml; no region codes
301 "te", // 75 langTelugu -Telu; no region codes
302 "si", // 76 langSinhalese -Sinh; no region codes
303 "my", // 77 langBurmese -Mymr; no region codes
304 "km", // 78 langKhmer -Khmr; no region codes
305 "lo", // 79 langLao -Laoo; no region codes
306 "vi", // 80 langVietnamese -Latn;
307 "id", // 81 langIndonesian -Latn; no region codes
308 "tl", // 82 langTagalog -Latn; no region codes
309 "ms", // 83 langMalayRoman -Latn; no region codes # "ms"
310 "ms-Arab", // 84 langMalayArabic -Arab; no region codes # "ms"
311 "am", // 85 langAmharic -Ethi; no region codes
312 "ti", // 86 langTigrinya -Ethi; no region codes
313 "om", // 87 langOromo -Ethi; no region codes
314 "so", // 88 langSomali -Latn; no region codes
315 "sw", // 89 langSwahili -Latn; no region codes
316 "rw", // 90 langKinyarwanda -Latn; no region codes
317 "rn", // 91 langRundi -Latn; no region codes
318 "ny", // 92 langNyanja/Chewa -Latn; no region codes # ""
319 "mg", // 93 langMalagasy -Latn; no region codes
320 "eo", // 94 langEsperanto -Latn;
321 NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
, // 95 to 105 (gap)
322 NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
, // 106 to 116 (gap)
323 NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
,NULL
, // 107 to 117 (gap)
324 "cy", // 128 langWelsh -Latn;
325 "eu", // 129 langBasque -Latn; no region codes
326 "ca", // 130 langCatalan -Latn;
327 "la", // 131 langLatin -Latn; no region codes
328 "qu", // 132 langQuechua -Latn; no region codes
329 "gn", // 133 langGuarani -Latn; no region codes
330 "ay", // 134 langAymara -Latn; no region codes
331 "tt-Cyrl", // 135 langTatar -Cyrl; no region codes
332 "ug", // 136 langUighur -Arab; no region codes
333 "dz", // 137 langDzongkha -Tibt;
334 "jv", // 138 langJavaneseRom -Latn; no region codes
335 "su", // 139 langSundaneseRom -Latn; no region codes
336 "gl", // 140 langGalician -Latn; no region codes
337 "af", // 141 langAfrikaans -Latn;
338 "br", // 142 langBreton -Latn;
339 "iu", // 143 langInuktitut -Cans;
340 "gd", // 144 langScottishGaelic;
341 "gv", // 145 langManxGaelic -Latn;
342 "ga-Latg", // 146 langIrishGaelicScript -Latn-dots; # "ga" // <xx>
343 "to", // 147 langTongan -Latn;
344 "grc", // 148 langGreekAncient -Grek-poly; # "el"
345 "kl", // 149 langGreenlandic -Latn;
346 "az-Latn", // 150 langAzerbaijanRoman -Latn; no region codes # "az"
347 "nn", // 151 langNynorsk -Latn; # (no entry)
350 kNumLangCodeToLocaleString
= sizeof(langCodeToLocaleString
)/sizeof(char *)
353 static const KeyStringToResultString oldAppleLocaleToCanonical
[] = {
354 // Map obsolete/old-style Apple strings to canonical
355 // Must be sorted according to how strcmp compares the strings in the first column
357 // non-canonical canonical [ comment ] # source/reason for non-canonical string
359 // ------------- ---------
360 { "Afrikaans", "af" }, // # __CFBundleLanguageNamesArray
361 { "Albanian", "sq" }, // # __CFBundleLanguageNamesArray
362 { "Amharic", "am" }, // # __CFBundleLanguageNamesArray
363 { "Arabic", "ar" }, // # __CFBundleLanguageNamesArray
364 { "Armenian", "hy" }, // # __CFBundleLanguageNamesArray
365 { "Assamese", "as" }, // # __CFBundleLanguageNamesArray
366 { "Aymara", "ay" }, // # __CFBundleLanguageNamesArray
367 { "Azerbaijani", "az" }, // -Arab,-Cyrl,-Latn? # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn")
368 { "Basque", "eu" }, // # __CFBundleLanguageNamesArray
369 { "Belarusian", "be" }, // # handle other names
370 { "Belorussian", "be" }, // # handle other names
371 { "Bengali", "bn" }, // # __CFBundleLanguageNamesArray
372 { "Brazilian Portugese", "pt-BR" }, // # from Installer.app Info.plist IFLanguages key, misspelled
373 { "Brazilian Portuguese", "pt-BR" }, // # correct spelling for above
374 { "Breton", "br" }, // # __CFBundleLanguageNamesArray
375 { "Bulgarian", "bg" }, // # __CFBundleLanguageNamesArray
376 { "Burmese", "my" }, // # __CFBundleLanguageNamesArray
377 { "Byelorussian", "be" }, // # __CFBundleLanguageNamesArray
378 { "Catalan", "ca" }, // # __CFBundleLanguageNamesArray
379 { "Chewa", "ny" }, // # handle other names
380 { "Chichewa", "ny" }, // # handle other names
381 { "Chinese", "zh" }, // -Hans,-Hant? # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans")
382 { "Chinese, Simplified", "zh-Hans" }, // # from Installer.app Info.plist IFLanguages key
383 { "Chinese, Traditional", "zh-Hant" }, // # correct spelling for below
384 { "Chinese, Tradtional", "zh-Hant" }, // # from Installer.app Info.plist IFLanguages key, misspelled
385 { "Croatian", "hr" }, // # __CFBundleLanguageNamesArray
386 { "Czech", "cs" }, // # __CFBundleLanguageNamesArray
387 { "Danish", "da" }, // # __CFBundleLanguageNamesArray
388 { "Dutch", "nl" }, // # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE")
389 { "Dzongkha", "dz" }, // # __CFBundleLanguageNamesArray
390 { "English", "en" }, // # __CFBundleLanguageNamesArray
391 { "Esperanto", "eo" }, // # __CFBundleLanguageNamesArray
392 { "Estonian", "et" }, // # __CFBundleLanguageNamesArray
393 { "Faroese", "fo" }, // # __CFBundleLanguageNamesArray
394 { "Farsi", "fa" }, // # __CFBundleLanguageNamesArray
395 { "Finnish", "fi" }, // # __CFBundleLanguageNamesArray
396 { "Flemish", "nl-BE" }, // # handle other names
397 { "French", "fr" }, // # __CFBundleLanguageNamesArray
398 { "Galician", "gl" }, // # __CFBundleLanguageNamesArray
399 { "Gallegan", "gl" }, // # handle other names
400 { "Georgian", "ka" }, // # __CFBundleLanguageNamesArray
401 { "German", "de" }, // # __CFBundleLanguageNamesArray
402 { "Greek", "el" }, // # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc")
403 { "Greenlandic", "kl" }, // # __CFBundleLanguageNamesArray
404 { "Guarani", "gn" }, // # __CFBundleLanguageNamesArray
405 { "Gujarati", "gu" }, // # __CFBundleLanguageNamesArray
406 { "Hawaiian", "haw" }, // # handle new languages
407 { "Hebrew", "he" }, // # __CFBundleLanguageNamesArray
408 { "Hindi", "hi" }, // # __CFBundleLanguageNamesArray
409 { "Hungarian", "hu" }, // # __CFBundleLanguageNamesArray
410 { "Icelandic", "is" }, // # __CFBundleLanguageNamesArray
411 { "Indonesian", "id" }, // # __CFBundleLanguageNamesArray
412 { "Inuktitut", "iu" }, // # __CFBundleLanguageNamesArray
413 { "Irish", "ga" }, // # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots")
414 { "Italian", "it" }, // # __CFBundleLanguageNamesArray
415 { "Japanese", "ja" }, // # __CFBundleLanguageNamesArray
416 { "Javanese", "jv" }, // # __CFBundleLanguageNamesArray
417 { "Kalaallisut", "kl" }, // # handle other names
418 { "Kannada", "kn" }, // # __CFBundleLanguageNamesArray
419 { "Kashmiri", "ks" }, // # __CFBundleLanguageNamesArray
420 { "Kazakh", "kk" }, // # __CFBundleLanguageNamesArray
421 { "Khmer", "km" }, // # __CFBundleLanguageNamesArray
422 { "Kinyarwanda", "rw" }, // # __CFBundleLanguageNamesArray
423 { "Kirghiz", "ky" }, // # __CFBundleLanguageNamesArray
424 { "Korean", "ko" }, // # __CFBundleLanguageNamesArray
425 { "Kurdish", "ku" }, // # __CFBundleLanguageNamesArray
426 { "Lao", "lo" }, // # __CFBundleLanguageNamesArray
427 { "Latin", "la" }, // # __CFBundleLanguageNamesArray
428 { "Latvian", "lv" }, // # __CFBundleLanguageNamesArray
429 { "Lithuanian", "lt" }, // # __CFBundleLanguageNamesArray
430 { "Macedonian", "mk" }, // # __CFBundleLanguageNamesArray
431 { "Malagasy", "mg" }, // # __CFBundleLanguageNamesArray
432 { "Malay", "ms" }, // -Latn,-Arab? # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab")
433 { "Malayalam", "ml" }, // # __CFBundleLanguageNamesArray
434 { "Maltese", "mt" }, // # __CFBundleLanguageNamesArray
435 { "Manx", "gv" }, // # __CFBundleLanguageNamesArray
436 { "Marathi", "mr" }, // # __CFBundleLanguageNamesArray
437 { "Moldavian", "mo" }, // # __CFBundleLanguageNamesArray
438 { "Mongolian", "mn" }, // -Mong,-Cyrl? # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl")
439 { "Nepali", "ne" }, // # __CFBundleLanguageNamesArray
440 { "Norwegian", "nb" }, // # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no")
441 { "Nyanja", "ny" }, // # __CFBundleLanguageNamesArray
442 { "Nynorsk", "nn" }, // # handle other names (no entry in __CFBundleLanguageNamesArray)
443 { "Oriya", "or" }, // # __CFBundleLanguageNamesArray
444 { "Oromo", "om" }, // # __CFBundleLanguageNamesArray
445 { "Panjabi", "pa" }, // # handle other names
446 { "Pashto", "ps" }, // # __CFBundleLanguageNamesArray
447 { "Persian", "fa" }, // # handle other names
448 { "Polish", "pl" }, // # __CFBundleLanguageNamesArray
449 { "Portuguese", "pt" }, // # __CFBundleLanguageNamesArray
450 { "Portuguese, Brazilian", "pt-BR" }, // # handle other names
451 { "Punjabi", "pa" }, // # __CFBundleLanguageNamesArray
452 { "Pushto", "ps" }, // # handle other names
453 { "Quechua", "qu" }, // # __CFBundleLanguageNamesArray
454 { "Romanian", "ro" }, // # __CFBundleLanguageNamesArray
455 { "Ruanda", "rw" }, // # handle other names
456 { "Rundi", "rn" }, // # __CFBundleLanguageNamesArray
457 { "Russian", "ru" }, // # __CFBundleLanguageNamesArray
458 { "Sami", "se" }, // # __CFBundleLanguageNamesArray
459 { "Sanskrit", "sa" }, // # __CFBundleLanguageNamesArray
460 { "Scottish", "gd" }, // # __CFBundleLanguageNamesArray
461 { "Serbian", "sr" }, // # __CFBundleLanguageNamesArray
462 { "Simplified Chinese", "zh-Hans" }, // # handle other names
463 { "Sindhi", "sd" }, // # __CFBundleLanguageNamesArray
464 { "Sinhalese", "si" }, // # __CFBundleLanguageNamesArray
465 { "Slovak", "sk" }, // # __CFBundleLanguageNamesArray
466 { "Slovenian", "sl" }, // # __CFBundleLanguageNamesArray
467 { "Somali", "so" }, // # __CFBundleLanguageNamesArray
468 { "Spanish", "es" }, // # __CFBundleLanguageNamesArray
469 { "Sundanese", "su" }, // # __CFBundleLanguageNamesArray
470 { "Swahili", "sw" }, // # __CFBundleLanguageNamesArray
471 { "Swedish", "sv" }, // # __CFBundleLanguageNamesArray
472 { "Tagalog", "tl" }, // # __CFBundleLanguageNamesArray
473 { "Tajik", "tg" }, // # handle other names
474 { "Tajiki", "tg" }, // # __CFBundleLanguageNamesArray
475 { "Tamil", "ta" }, // # __CFBundleLanguageNamesArray
476 { "Tatar", "tt" }, // # __CFBundleLanguageNamesArray
477 { "Telugu", "te" }, // # __CFBundleLanguageNamesArray
478 { "Thai", "th" }, // # __CFBundleLanguageNamesArray
479 { "Tibetan", "bo" }, // # __CFBundleLanguageNamesArray
480 { "Tigrinya", "ti" }, // # __CFBundleLanguageNamesArray
481 { "Tongan", "to" }, // # __CFBundleLanguageNamesArray
482 { "Traditional Chinese", "zh-Hant" }, // # handle other names
483 { "Turkish", "tr" }, // # __CFBundleLanguageNamesArray
484 { "Turkmen", "tk" }, // # __CFBundleLanguageNamesArray
485 { "Uighur", "ug" }, // # __CFBundleLanguageNamesArray
486 { "Ukrainian", "uk" }, // # __CFBundleLanguageNamesArray
487 { "Urdu", "ur" }, // # __CFBundleLanguageNamesArray
488 { "Uzbek", "uz" }, // # __CFBundleLanguageNamesArray
489 { "Vietnamese", "vi" }, // # __CFBundleLanguageNamesArray
490 { "Welsh", "cy" }, // # __CFBundleLanguageNamesArray
491 { "Yiddish", "yi" }, // # __CFBundleLanguageNamesArray
492 { "ar_??", "ar" }, // # from old MapScriptInfoAndISOCodes
493 { "az.Ar", "az-Arab" }, // # from old LocaleRefGetPartString
494 { "az.Cy", "az-Cyrl" }, // # from old LocaleRefGetPartString
495 { "az.La", "az-Latn" }, // # from old LocaleRefGetPartString
496 { "be_??", "be_BY" }, // # from old MapScriptInfoAndISOCodes
497 { "bn_??", "bn" }, // # from old LocaleRefGetPartString
498 { "bo_??", "bo" }, // # from old MapScriptInfoAndISOCodes
499 { "br_??", "br" }, // # from old MapScriptInfoAndISOCodes
500 { "cy_??", "cy" }, // # from old MapScriptInfoAndISOCodes
501 { "de-96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9>
502 { "de_96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9>
503 { "de_??", "de-1996" }, // # from old MapScriptInfoAndISOCodes
504 { "el.El-P", "grc" }, // # from old LocaleRefGetPartString
505 { "en-ascii", "en_001" }, // # from earlier version of tables in this file!
506 { "en_??", "en_001" }, // # from old MapScriptInfoAndISOCodes
507 { "eo_??", "eo" }, // # from old MapScriptInfoAndISOCodes
508 { "es_??", "es_419" }, // # from old MapScriptInfoAndISOCodes
509 { "es_XL", "es_419" }, // # from earlier version of tables in this file!
510 { "fr_??", "fr_001" }, // # from old MapScriptInfoAndISOCodes
511 { "ga-dots", "ga-Latg" }, // # from earlier version of tables in this file! // <1.8>
512 { "ga-dots_IE", "ga-Latg_IE" }, // # from earlier version of tables in this file! // <1.8>
513 { "ga.Lg", "ga-Latg" }, // # from old LocaleRefGetPartString // <1.8>
514 { "ga.Lg_IE", "ga-Latg_IE" }, // # from old LocaleRefGetPartString // <1.8>
515 { "gd_??", "gd" }, // # from old MapScriptInfoAndISOCodes
516 { "gv_??", "gv" }, // # from old MapScriptInfoAndISOCodes
517 { "jv.La", "jv" }, // # logical extension // <1.9>
518 { "jw.La", "jv" }, // # from old LocaleRefGetPartString
519 { "kk.Cy", "kk" }, // # from old LocaleRefGetPartString
520 { "kl.La", "kl" }, // # from old LocaleRefGetPartString
521 { "kl.La_GL", "kl_GL" }, // # from old LocaleRefGetPartString // <1.9>
522 { "lp_??", "se" }, // # from old MapScriptInfoAndISOCodes
523 { "mk_??", "mk_MK" }, // # from old MapScriptInfoAndISOCodes
524 { "mn.Cy", "mn-Cyrl" }, // # from old LocaleRefGetPartString
525 { "mn.Mn", "mn-Mong" }, // # from old LocaleRefGetPartString
526 { "ms.Ar", "ms-Arab" }, // # from old LocaleRefGetPartString
527 { "ms.La", "ms" }, // # from old LocaleRefGetPartString
528 { "nl-be", "nl-BE" }, // # from old LocaleRefGetPartString
529 { "nl-be_BE", "nl_BE" }, // # from old LocaleRefGetPartString
530 // { "no-bok_NO", "nb_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
531 // { "no-nyn_NO", "nn_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
532 // { "nya", "ny" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
533 { "pa_??", "pa" }, // # from old LocaleRefGetPartString
534 { "sa.Dv", "sa" }, // # from old LocaleRefGetPartString
535 { "sl_??", "sl_SI" }, // # from old MapScriptInfoAndISOCodes
536 { "sr_??", "sr_CS" }, // # from old MapScriptInfoAndISOCodes // <1.18>
537 { "su.La", "su" }, // # from old LocaleRefGetPartString
538 { "yi.He", "yi" }, // # from old LocaleRefGetPartString
539 { "zh-simp", "zh-Hans" }, // # from earlier version of tables in this file!
540 { "zh-trad", "zh-Hant" }, // # from earlier version of tables in this file!
541 { "zh.Ha-S", "zh-Hans" }, // # from old LocaleRefGetPartString
542 { "zh.Ha-S_CN", "zh_CN" }, // # from old LocaleRefGetPartString
543 { "zh.Ha-T", "zh-Hant" }, // # from old LocaleRefGetPartString
544 { "zh.Ha-T_TW", "zh_TW" }, // # from old LocaleRefGetPartString
547 kNumOldAppleLocaleToCanonical
= sizeof(oldAppleLocaleToCanonical
)/sizeof(KeyStringToResultString
)
550 static const KeyStringToResultString localeStringPrefixToCanonical
[] = {
551 // Map 3-letter & obsolete ISO 639 codes, plus obsolete RFC 3066 codes, to 2-letter ISO 639 code.
552 // (special cases for 'sh' handled separately)
553 // First column must be all lowercase; must be sorted according to how strcmp compares the strings in the first column.
555 // non-canonical canonical [ comment ] # source/reason for non-canonical string
557 // ------------- ---------
559 { "afr", "af" }, // Afrikaans
560 { "alb", "sq" }, // Albanian
561 { "amh", "am" }, // Amharic
562 { "ara", "ar" }, // Arabic
563 { "arm", "hy" }, // Armenian
564 { "asm", "as" }, // Assamese
565 { "aym", "ay" }, // Aymara
566 { "aze", "az" }, // Azerbaijani
567 { "baq", "eu" }, // Basque
568 { "bel", "be" }, // Belarusian
569 { "ben", "bn" }, // Bengali
570 { "bih", "bh" }, // Bihari
571 { "bod", "bo" }, // Tibetan
572 { "bos", "bs" }, // Bosnian
573 { "bre", "br" }, // Breton
574 { "bul", "bg" }, // Bulgarian
575 { "bur", "my" }, // Burmese
576 { "cat", "ca" }, // Catalan
577 { "ces", "cs" }, // Czech
578 { "che", "ce" }, // Chechen
579 { "chi", "zh" }, // Chinese
580 { "cor", "kw" }, // Cornish
581 { "cos", "co" }, // Corsican
582 { "cym", "cy" }, // Welsh
583 { "cze", "cs" }, // Czech
584 { "dan", "da" }, // Danish
585 { "deu", "de" }, // German
586 { "dut", "nl" }, // Dutch
587 { "dzo", "dz" }, // Dzongkha
588 { "ell", "el" }, // Greek, Modern (1453-)
589 { "eng", "en" }, // English
590 { "epo", "eo" }, // Esperanto
591 { "est", "et" }, // Estonian
592 { "eus", "eu" }, // Basque
593 { "fao", "fo" }, // Faroese
594 { "fas", "fa" }, // Persian
595 { "fin", "fi" }, // Finnish
596 { "fra", "fr" }, // French
597 { "fre", "fr" }, // French
598 { "geo", "ka" }, // Georgian
599 { "ger", "de" }, // German
600 { "gla", "gd" }, // Gaelic,Scottish
601 { "gle", "ga" }, // Irish
602 { "glg", "gl" }, // Gallegan
603 { "glv", "gv" }, // Manx
604 { "gre", "el" }, // Greek, Modern (1453-)
605 { "grn", "gn" }, // Guarani
606 { "guj", "gu" }, // Gujarati
607 { "heb", "he" }, // Hebrew
608 { "hin", "hi" }, // Hindi
609 { "hrv", "hr" }, // Croatian
610 { "hun", "hu" }, // Hungarian
611 { "hye", "hy" }, // Armenian
612 { "i-hak", "zh-hakka" }, // Hakka # deprecated RFC 3066
613 { "i-lux", "lb" }, // Luxembourgish # deprecated RFC 3066
614 { "i-navajo", "nv" }, // Navajo # deprecated RFC 3066
615 { "ice", "is" }, // Icelandic
616 { "iku", "iu" }, // Inuktitut
617 { "ile", "ie" }, // Interlingue
618 { "in", "id" }, // Indonesian # deprecated 639 code in -> id (1989)
619 { "ina", "ia" }, // Interlingua
620 { "ind", "id" }, // Indonesian
621 { "isl", "is" }, // Icelandic
622 { "ita", "it" }, // Italian
623 { "iw", "he" }, // Hebrew # deprecated 639 code iw -> he (1989)
624 { "jav", "jv" }, // Javanese
625 { "jaw", "jv" }, // Javanese # deprecated 639 code jaw -> jv (2001)
626 { "ji", "yi" }, // Yiddish # deprecated 639 code ji -> yi (1989)
627 { "jpn", "ja" }, // Japanese
628 { "kal", "kl" }, // Kalaallisut
629 { "kan", "kn" }, // Kannada
630 { "kas", "ks" }, // Kashmiri
631 { "kat", "ka" }, // Georgian
632 { "kaz", "kk" }, // Kazakh
633 { "khm", "km" }, // Khmer
634 { "kin", "rw" }, // Kinyarwanda
635 { "kir", "ky" }, // Kirghiz
636 { "kor", "ko" }, // Korean
637 { "kur", "ku" }, // Kurdish
638 { "lao", "lo" }, // Lao
639 { "lat", "la" }, // Latin
640 { "lav", "lv" }, // Latvian
641 { "lit", "lt" }, // Lithuanian
642 { "ltz", "lb" }, // Letzeburgesch
643 { "mac", "mk" }, // Macedonian
644 { "mal", "ml" }, // Malayalam
645 { "mar", "mr" }, // Marathi
646 { "may", "ms" }, // Malay
647 { "mkd", "mk" }, // Macedonian
648 { "mlg", "mg" }, // Malagasy
649 { "mlt", "mt" }, // Maltese
650 { "mol", "mo" }, // Moldavian
651 { "mon", "mn" }, // Mongolian
652 { "msa", "ms" }, // Malay
653 { "mya", "my" }, // Burmese
654 { "nep", "ne" }, // Nepali
655 { "nld", "nl" }, // Dutch
656 { "nno", "nn" }, // Norwegian Nynorsk
657 { "no", "nb" }, // Norwegian generic # ambiguous 639 code no -> nb
658 { "no-bok", "nb" }, // Norwegian Bokmal # deprecated RFC 3066 tag - used in old LocaleRefGetPartString
659 { "no-nyn", "nn" }, // Norwegian Nynorsk # deprecated RFC 3066 tag - used in old LocaleRefGetPartString
660 { "nob", "nb" }, // Norwegian Bokmal
661 { "nor", "nb" }, // Norwegian generic # ambiguous 639 code nor -> nb
662 { "nya", "ny" }, // Nyanja/Chewa/Chichewa # 3-letter code used in old LocaleRefGetPartString
663 { "oci", "oc" }, // Occitan/Provencal
664 { "ori", "or" }, // Oriya
665 { "orm", "om" }, // Oromo,Galla
666 { "pan", "pa" }, // Panjabi
667 { "per", "fa" }, // Persian
668 { "pol", "pl" }, // Polish
669 { "por", "pt" }, // Portuguese
670 { "pus", "ps" }, // Pushto
671 { "que", "qu" }, // Quechua
672 { "roh", "rm" }, // Raeto-Romance
673 { "ron", "ro" }, // Romanian
674 { "rum", "ro" }, // Romanian
675 { "run", "rn" }, // Rundi
676 { "rus", "ru" }, // Russian
677 { "san", "sa" }, // Sanskrit
678 { "scc", "sr" }, // Serbian
679 { "scr", "hr" }, // Croatian
680 { "sin", "si" }, // Sinhalese
681 { "slk", "sk" }, // Slovak
682 { "slo", "sk" }, // Slovak
683 { "slv", "sl" }, // Slovenian
684 { "sme", "se" }, // Sami,Northern
685 { "snd", "sd" }, // Sindhi
686 { "som", "so" }, // Somali
687 { "spa", "es" }, // Spanish
688 { "sqi", "sq" }, // Albanian
689 { "srp", "sr" }, // Serbian
690 { "sun", "su" }, // Sundanese
691 { "swa", "sw" }, // Swahili
692 { "swe", "sv" }, // Swedish
693 { "tam", "ta" }, // Tamil
694 { "tat", "tt" }, // Tatar
695 { "tel", "te" }, // Telugu
696 { "tgk", "tg" }, // Tajik
697 { "tgl", "tl" }, // Tagalog
698 { "tha", "th" }, // Thai
699 { "tib", "bo" }, // Tibetan
700 { "tir", "ti" }, // Tigrinya
701 { "ton", "to" }, // Tongan
702 { "tuk", "tk" }, // Turkmen
703 { "tur", "tr" }, // Turkish
704 { "uig", "ug" }, // Uighur
705 { "ukr", "uk" }, // Ukrainian
706 { "urd", "ur" }, // Urdu
707 { "uzb", "uz" }, // Uzbek
708 { "vie", "vi" }, // Vietnamese
709 { "wel", "cy" }, // Welsh
710 { "yid", "yi" }, // Yiddish
711 { "zho", "zh" }, // Chinese
714 kNumLocaleStringPrefixToCanonical
= sizeof(localeStringPrefixToCanonical
)/sizeof(KeyStringToResultString
)
718 static const SpecialCaseUpdates specialCases
[] = {
719 // Data for special cases
720 // a) The 3166 code CS was used for Czechoslovakia until 1993, when that country split and the code was
721 // replaced by CZ and SK. Then in 2003-07, the code YU (formerly designating all of Yugoslavia, then after
722 // the 1990s breakup just designating what is now Serbia and Montenegro) was changed to CS! However, ICU
723 // and RFC 3066bis will continue to use YU for this. So now CS is ambiguous. We guess as follows: If we
724 // see CS but a language of cs or sk, we change CS to CZ or SK. Otherwise, we change CS to YU.
725 // b) The 639 code sh for Serbo-Croatian was also replaced in the 1990s by separate codes hr and sr, and
726 // deprecated in 2000. We guess which one to map it to as follows: If there is a region tag of HR we use
727 // hr; if there is a region tag of (now) YU we use sr; else we do not change it (not enough info).
728 // c) There are other codes that have been updated without these issues (eg. TP to TL), plus among the
729 // "exceptionally reserved" codes some are just alternates for standard codes (eg. UK for GB).
730 { NULL
, "-UK", "GB", NULL
, NULL
}, // always change UK to GB (UK is "exceptionally reserved" to mean GB)
731 { NULL
, "-TP", "TL", NULL
, NULL
}, // always change TP to TL (East Timor, code changed 2002-05)
732 { "cs", "-CS", "CZ", NULL
, NULL
}, // if language is cs, change CS (pre-1993 Czechoslovakia) to CZ (Czech Republic)
733 { "sk", "-CS", "SK", NULL
, NULL
}, // if language is sk, change CS (pre-1993 Czechoslovakia) to SK (Slovakia)
734 { NULL
, "-YU", "CS", NULL
, NULL
}, // then always change YU to CS (map old Yugoslavia code to new 2003-07 ISO code
735 // for Serbia & Montenegro per RFC3066bis & ICU) // <1.18>
736 // Note: do this after fixing CS for cs/sk as above.
737 { "sh", "-HR", "hr", "-CS", "sr" }, // if language is old 'sh' (SerboCroatian), change it to 'hr' (Croatian) if we find
738 // HR (Croatia) or to 'sr' (Serbian) if we find CS (Serbia & Montenegro, Yugoslavia). // <1.18>
739 // Note: Do this after changing YU to CS as above.
740 { NULL
, NULL
, NULL
, NULL
, NULL
} // terminator
744 static const KeyStringToResultString localeStringRegionToDefaults
[] = {
745 // For some region-code suffixes, there are default substrings to strip off for canonical string.
746 // Must be sorted according to how strcmp compares the strings in the first column
748 // region default writing
749 // suffix system tags, strip comment
750 // -------- ------------- ---------
751 { "_CN", "-Hans" }, // mainland China, default is simplified
752 { "_HK", "-Hant" }, // Hong Kong, default is traditional
753 { "_MO", "-Hant" }, // Macao, default is traditional
754 { "_SG", "-Hans" }, // Singapore, default is simplified
755 { "_TW", "-Hant" }, // Taiwan, default is traditional
758 kNumLocaleStringRegionToDefaults
= sizeof(localeStringRegionToDefaults
)/sizeof(KeyStringToResultString
)
761 static const KeyStringToResultString localeStringPrefixToDefaults
[] = {
762 // For some initial portions of language tag, there are default substrings to strip off for canonical string.
763 // Must be sorted according to how strcmp compares the strings in the first column
765 // language default writing
766 // tag prefix system tags, strip comment
767 // -------- ------------- ---------
768 { "ab-", "-Cyrl" }, // Abkhazian
769 { "af-", "-Latn" }, // Afrikaans
770 { "am-", "-Ethi" }, // Amharic
771 { "ar-", "-Arab" }, // Arabic
772 { "as-", "-Beng" }, // Assamese
773 { "ay-", "-Latn" }, // Aymara
774 { "be-", "-Cyrl" }, // Belarusian
775 { "bg-", "-Cyrl" }, // Bulgarian
776 { "bn-", "-Beng" }, // Bengali
777 { "bo-", "-Tibt" }, // Tibetan (? not Suppress-Script)
778 { "br-", "-Latn" }, // Breton (? not Suppress-Script)
779 { "bs-", "-Latn" }, // Bosnian
780 { "ca-", "-Latn" }, // Catalan
781 { "cs-", "-Latn" }, // Czech
782 { "cy-", "-Latn" }, // Welsh
783 { "da-", "-Latn" }, // Danish
784 { "de-", "-Latn -1901" }, // German, traditional orthography
785 { "dv-", "-Thaa" }, // Divehi/Maldivian
786 { "dz-", "-Tibt" }, // Dzongkha
787 { "el-", "-Grek" }, // Greek (modern, monotonic)
788 { "en-", "-Latn" }, // English
789 { "eo-", "-Latn" }, // Esperanto
790 { "es-", "-Latn" }, // Spanish
791 { "et-", "-Latn" }, // Estonian
792 { "eu-", "-Latn" }, // Basque
793 { "fa-", "-Arab" }, // Farsi
794 { "fi-", "-Latn" }, // Finnish
795 { "fo-", "-Latn" }, // Faroese
796 { "fr-", "-Latn" }, // French
797 { "ga-", "-Latn" }, // Irish
798 { "gd-", "-Latn" }, // Scottish Gaelic (? not Suppress-Script)
799 { "gl-", "-Latn" }, // Galician
800 { "gn-", "-Latn" }, // Guarani
801 { "gu-", "-Gujr" }, // Gujarati
802 { "gv-", "-Latn" }, // Manx
803 { "haw-", "-Latn" }, // Hawaiian (? not Suppress-Script)
804 { "he-", "-Hebr" }, // Hebrew
805 { "hi-", "-Deva" }, // Hindi
806 { "hr-", "-Latn" }, // Croatian
807 { "hu-", "-Latn" }, // Hungarian
808 { "hy-", "-Armn" }, // Armenian
809 { "id-", "-Latn" }, // Indonesian
810 { "is-", "-Latn" }, // Icelandic
811 { "it-", "-Latn" }, // Italian
812 { "ja-", "-Jpan" }, // Japanese
813 { "ka-", "-Geor" }, // Georgian
814 { "kk-", "-Cyrl" }, // Kazakh
815 { "kl-", "-Latn" }, // Kalaallisut/Greenlandic
816 { "km-", "-Khmr" }, // Central Khmer
817 { "kn-", "-Knda" }, // Kannada
818 { "ko-", "-Hang" }, // Korean (? not Suppress-Script)
819 { "kok-", "-Deva" }, // Konkani
820 { "la-", "-Latn" }, // Latin
821 { "lb-", "-Latn" }, // Luxembourgish
822 { "lo-", "-Laoo" }, // Lao
823 { "lt-", "-Latn" }, // Lithuanian
824 { "lv-", "-Latn" }, // Latvian
825 { "mg-", "-Latn" }, // Malagasy
826 { "mk-", "-Cyrl" }, // Macedonian
827 { "ml-", "-Mlym" }, // Malayalam
828 { "mo-", "-Latn" }, // Moldavian
829 { "mr-", "-Deva" }, // Marathi
830 { "ms-", "-Latn" }, // Malay
831 { "mt-", "-Latn" }, // Maltese
832 { "my-", "-Mymr" }, // Burmese/Myanmar
833 { "nb-", "-Latn" }, // Norwegian Bokmal
834 { "ne-", "-Deva" }, // Nepali
835 { "nl-", "-Latn" }, // Dutch
836 { "nn-", "-Latn" }, // Norwegian Nynorsk
837 { "ny-", "-Latn" }, // Chichewa/Nyanja
838 { "om-", "-Latn" }, // Oromo
839 { "or-", "-Orya" }, // Oriya
840 { "pa-", "-Guru" }, // Punjabi
841 { "pl-", "-Latn" }, // Polish
842 { "ps-", "-Arab" }, // Pushto
843 { "pt-", "-Latn" }, // Portuguese
844 { "qu-", "-Latn" }, // Quechua
845 { "rn-", "-Latn" }, // Rundi
846 { "ro-", "-Latn" }, // Romanian
847 { "ru-", "-Cyrl" }, // Russian
848 { "rw-", "-Latn" }, // Kinyarwanda
849 { "sa-", "-Deva" }, // Sanskrit (? not Suppress-Script)
850 { "se-", "-Latn" }, // Sami (? not Suppress-Script)
851 { "si-", "-Sinh" }, // Sinhala
852 { "sk-", "-Latn" }, // Slovak
853 { "sl-", "-Latn" }, // Slovenian
854 { "so-", "-Latn" }, // Somali
855 { "sq-", "-Latn" }, // Albanian
856 { "sv-", "-Latn" }, // Swedish
857 { "sw-", "-Latn" }, // Swahili
858 { "ta-", "-Taml" }, // Tamil
859 { "te-", "-Telu" }, // Telugu
860 { "th-", "-Thai" }, // Thai
861 { "ti-", "-Ethi" }, // Tigrinya
862 { "tl-", "-Latn" }, // Tagalog
863 { "tn-", "-Latn" }, // Tswana
864 { "to-", "-Latn" }, // Tonga of Tonga Islands
865 { "tr-", "-Latn" }, // Turkish
866 { "uk-", "-Cyrl" }, // Ukrainian
867 { "ur-", "-Arab" }, // Urdu
868 { "vi-", "-Latn" }, // Vietnamese
869 { "wo-", "-Latn" }, // Wolof
870 { "xh-", "-Latn" }, // Xhosa
871 { "yi-", "-Hebr" }, // Yiddish
872 { "zh-", "-Hani" }, // Chinese (? not Suppress-Script)
873 { "zu-", "-Latn" }, // Zulu
876 kNumLocaleStringPrefixToDefaults
= sizeof(localeStringPrefixToDefaults
)/sizeof(KeyStringToResultString
)
879 static const KeyStringToResultString appleLocaleToLanguageString
[] = {
880 // Map locale strings that Apple uses as language IDs to real language strings.
881 // Must be sorted according to how strcmp compares the strings in the first column.
882 // Note: Now we remove all transforms of the form ll_RR -> ll-RR, they are now
883 // handled in the code. <1.19>
885 // locale lang [ comment ]
888 { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752]
889 { "zh_CN", "zh-Hans" }, // mainland China => simplified
890 { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used
891 { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used
892 { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used
893 { "zh_TW", "zh-Hant" }, // Taiwan => traditional
896 kNumAppleLocaleToLanguageString
= sizeof(appleLocaleToLanguageString
)/sizeof(KeyStringToResultString
)
899 static const KeyStringToResultString appleLocaleToLanguageStringForCFBundle
[] = {
900 // Map locale strings that Apple uses as language IDs to real language strings.
901 // Must be sorted according to how strcmp compares the strings in the first column.
903 // locale lang [ comment ]
906 { "de_AT", "de-AT" }, // Austrian German
907 { "de_CH", "de-CH" }, // Swiss German
908 // { "de_DE", "de-DE" }, // German for Germany (default), not currently used
909 { "en_AU", "en-AU" }, // Australian English
910 { "en_CA", "en-CA" }, // Canadian English
911 { "en_GB", "en-GB" }, // British English
912 // { "en_IE", "en-IE" }, // Irish English, not currently used
913 { "en_US", "en-US" }, // U.S. English
914 { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752]
915 // { "fr_BE", "fr-BE" }, // Belgian French, not currently used
916 { "fr_CA", "fr-CA" }, // Canadian French
917 { "fr_CH", "fr-CH" }, // Swiss French
918 // { "fr_FR", "fr-FR" }, // French for France (default), not currently used
919 { "nl_BE", "nl-BE" }, // Flemish = Vlaams, Dutch for Belgium
920 // { "nl_NL", "nl-NL" }, // Dutch for Netherlands (default), not currently used
921 { "pt_BR", "pt-BR" }, // Brazilian Portuguese
922 { "pt_PT", "pt-PT" }, // Portuguese for Portugal
923 { "zh_CN", "zh-Hans" }, // mainland China => simplified
924 { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used
925 { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used
926 { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used
927 { "zh_TW", "zh-Hant" }, // Taiwan => traditional
930 kNumAppleLocaleToLanguageStringForCFBundle
= sizeof(appleLocaleToLanguageStringForCFBundle
)/sizeof(KeyStringToResultString
)
934 struct LocaleToLegacyCodes
{
935 const char * locale
; // reduced to language plus one other component (script, region, variant), separators normalized to'_'
938 CFStringEncoding encoding
;
940 typedef struct LocaleToLegacyCodes LocaleToLegacyCodes
;
942 static const LocaleToLegacyCodes localeToLegacyCodes
[] = {
943 // locale RegionCode LangCode CFStringEncoding
944 { "af"/*ZA*/, 102/*verAfrikaans*/, 141/*langAfrikaans*/, 0/*Roman*/ }, // Latn
945 { "am", -1, 85/*langAmharic*/, 28/*Ethiopic*/ }, // Ethi
946 { "ar", 16/*verArabic*/, 12/*langArabic*/, 4/*Arabic*/ }, // Arab;
947 { "as", -1, 68/*langAssamese*/, 13/*Bengali*/ }, // Beng;
948 { "ay", -1, 134/*langAymara*/, 0/*Roman*/ }, // Latn;
949 { "az", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // assume "az" defaults to -Cyrl
950 { "az_Arab", -1, 50/*langAzerbaijanAr*/, 4/*Arabic*/ }, // Arab;
951 { "az_Cyrl", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // Cyrl;
952 { "az_Latn", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // Latn;
953 { "be"/*BY*/, 61/*verBelarus*/, 46/*langBelorussian*/, 7/*Cyrillic*/ }, // Cyrl;
954 { "bg"/*BG*/, 72/*verBulgaria*/, 44/*langBulgarian*/, 7/*Cyrillic*/ }, // Cyrl;
955 { "bn", 60/*verBengali*/, 67/*langBengali*/, 13/*Bengali*/ }, // Beng;
956 { "bo", 105/*verTibetan*/, 63/*langTibetan*/, 26/*Tibetan*/ }, // Tibt;
957 { "br", 77/*verBreton*/, 142/*langBreton*/, 39/*Celtic*/ }, // Latn;
958 { "ca"/*ES*/, 73/*verCatalonia*/, 130/*langCatalan*/, 0/*Roman*/ }, // Latn;
959 { "cs"/*CZ*/, 56/*verCzech*/, 38/*langCzech*/, 29/*CentralEurRoman*/ }, // Latn;
960 { "cy", 79/*verWelsh*/, 128/*langWelsh*/, 39/*Celtic*/ }, // Latn;
961 { "da"/*DK*/, 9/*verDenmark*/, 7/*langDanish*/, 0/*Roman*/ }, // Latn;
962 { "de", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, // assume "de" defaults to verGermany
963 { "de_1996", 70/*verGermanReformed*/, 2/*langGerman*/, 0/*Roman*/ },
964 { "de_AT", 92/*verAustria*/, 2/*langGerman*/, 0/*Roman*/ },
965 { "de_CH", 19/*verGrSwiss*/, 2/*langGerman*/, 0/*Roman*/ },
966 { "de_DE", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ },
967 { "dz"/*BT*/, 83/*verBhutan*/, 137/*langDzongkha*/, 26/*Tibetan*/ }, // Tibt;
968 { "el", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // assume "el" defaults to verGreece
969 { "el_CY", 23/*verCyprus*/, 14/*langGreek*/, 6/*Greek*/ },
970 { "el_GR", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // modern monotonic
971 { "en", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, // "en" defaults to verUS (per Chris Hansten)
972 { "en_001", 37/*verInternational*/, 0/*langEnglish*/, 0/*Roman*/ },
973 { "en_AU", 15/*verAustralia*/, 0/*langEnglish*/, 0/*Roman*/ },
974 { "en_CA", 82/*verEngCanada*/, 0/*langEnglish*/, 0/*Roman*/ },
975 { "en_GB", 2/*verBritain*/, 0/*langEnglish*/, 0/*Roman*/ },
976 { "en_IE", 108/*verIrelandEnglish*/, 0/*langEnglish*/, 0/*Roman*/ },
977 { "en_SG", 100/*verSingapore*/, 0/*langEnglish*/, 0/*Roman*/ },
978 { "en_US", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ },
979 { "eo", 103/*verEsperanto*/, 94/*langEsperanto*/, 0/*Roman*/ }, // Latn;
980 { "es", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, // "es" defaults to verSpain (per Chris Hansten)
981 { "es_419", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, // new BCP 47 tag
982 { "es_ES", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ },
983 { "es_MX", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ },
984 { "es_US", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ },
985 { "et"/*EE*/, 44/*verEstonia*/, 27/*langEstonian*/, 29/*CentralEurRoman*/ },
986 { "eu", -1, 129/*langBasque*/, 0/*Roman*/ }, // Latn;
987 { "fa"/*IR*/, 48/*verIran*/, 31/*langFarsi/Persian*/, 0x8C/*Farsi*/ }, // Arab;
988 { "fi"/*FI*/, 17/*verFinland*/, 13/*langFinnish*/, 0/*Roman*/ },
989 { "fo"/*FO*/, 47/*verFaroeIsl*/, 30/*langFaroese*/, 37/*Icelandic*/ },
990 { "fr", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, // "fr" defaults to verFrance (per Chris Hansten)
991 { "fr_001", 91/*verFrenchUniversal*/, 1/*langFrench*/, 0/*Roman*/ },
992 { "fr_BE", 98/*verFrBelgium*/, 1/*langFrench*/, 0/*Roman*/ },
993 { "fr_CA", 11/*verFrCanada*/, 1/*langFrench*/, 0/*Roman*/ },
994 { "fr_CH", 18/*verFrSwiss*/, 1/*langFrench*/, 0/*Roman*/ },
995 { "fr_FR", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ },
996 { "ga"/*IE*/, 50/*verIreland*/, 35/*langIrishGaelic*/, 0/*Roman*/ }, // no dots (h after)
997 { "ga_Latg"/*IE*/, 81/*verIrishGaelicScrip*/, 146/*langIrishGaelicScript*/, 40/*Gaelic*/ }, // using dots
998 { "gd", 75/*verScottishGaelic*/, 144/*langScottishGaelic*/, 39/*Celtic*/ },
999 { "gl", -1, 140/*langGalician*/, 0/*Roman*/ }, // Latn;
1000 { "gn", -1, 133/*langGuarani*/, 0/*Roman*/ }, // Latn;
1001 { "grc", 40/*verGreekAncient*/, 148/*langGreekAncient*/, 6/*Greek*/ }, // polytonic (MacGreek doesn't actually support it)
1002 { "gu"/*IN*/, 94/*verGujarati*/, 69/*langGujarati*/, 11/*Gujarati*/ }, // Gujr;
1003 { "gv", 76/*verManxGaelic*/, 145/*langManxGaelic*/, 39/*Celtic*/ }, // Latn;
1004 { "he"/*IL*/, 13/*verIsrael*/, 10/*langHebrew*/, 5/*Hebrew*/ }, // Hebr;
1005 { "hi"/*IN*/, 33/*verIndiaHindi*/, 21/*langHindi*/, 9/*Devanagari*/ }, // Deva;
1006 { "hr"/*HR*/, 68/*verCroatia*/, 18/*langCroatian*/, 36/*Croatian*/ },
1007 { "hu"/*HU*/, 43/*verHungary*/, 26/*langHungarian*/, 29/*CentralEurRoman*/ },
1008 { "hy"/*AM*/, 84/*verArmenian*/, 51/*langArmenian*/, 24/*Armenian*/ }, // Armn;
1009 { "id", -1, 81/*langIndonesian*/, 0/*Roman*/ }, // Latn;
1010 { "is"/*IS*/, 21/*verIceland*/, 15/*langIcelandic*/, 37/*Icelandic*/ },
1011 { "it", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, // "it" defaults to verItaly
1012 { "it_CH", 36/*verItalianSwiss*/, 3/*langItalian*/, 0/*Roman*/ },
1013 { "it_IT", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ },
1014 { "iu"/*CA*/, 78/*verNunavut*/, 143/*langInuktitut*/, 0xEC/*Inuit*/ }, // Cans;
1015 { "ja"/*JP*/, 14/*verJapan*/, 11/*langJapanese*/, 1/*Japanese*/ }, // Jpan;
1016 { "jv", -1, 138/*langJavaneseRom*/, 0/*Roman*/ }, // Latn;
1017 { "ka"/*GE*/, 85/*verGeorgian*/, 52/*langGeorgian*/, 23/*Georgian*/ }, // Geor;
1018 { "kk", -1, 48/*langKazakh*/, 7/*Cyrillic*/ }, // "kk" defaults to -Cyrl; also have -Latn, -Arab
1019 { "kl", 107/*verGreenland*/, 149/*langGreenlandic*/, 0/*Roman*/ }, // Latn;
1020 { "km", -1, 78/*langKhmer*/, 20/*Khmer*/ }, // Khmr;
1021 { "kn", -1, 73/*langKannada*/, 16/*Kannada*/ }, // Knda;
1022 { "ko"/*KR*/, 51/*verKorea*/, 23/*langKorean*/, 3/*Korean*/ }, // Hang;
1023 { "ks", -1, 61/*langKashmiri*/, 4/*Arabic*/ }, // Arab;
1024 { "ku", -1, 60/*langKurdish*/, 4/*Arabic*/ }, // Arab;
1025 { "ky", -1, 54/*langKirghiz*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1026 { "la", -1, 131/*langLatin*/, 0/*Roman*/ }, // Latn;
1027 { "lo", -1, 79/*langLao*/, 22/*Laotian*/ }, // Laoo;
1028 { "lt"/*LT*/, 41/*verLithuania*/, 24/*langLithuanian*/, 29/*CentralEurRoman*/ },
1029 { "lv"/*LV*/, 45/*verLatvia*/, 28/*langLatvian*/, 29/*CentralEurRoman*/ },
1030 { "mg", -1, 93/*langMalagasy*/, 0/*Roman*/ }, // Latn;
1031 { "mk"/*MK*/, 67/*verMacedonian*/, 43/*langMacedonian*/, 7/*Cyrillic*/ }, // Cyrl;
1032 { "ml", -1, 72/*langMalayalam*/, 17/*Malayalam*/ }, // Mlym;
1033 { "mn", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // "mn" defaults to -Mong
1034 { "mn_Cyrl", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // Cyrl;
1035 { "mn_Mong", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // Mong;
1036 { "mo", -1, 53/*langMoldavian*/, 7/*Cyrillic*/ }, // Cyrl;
1037 { "mr"/*IN*/, 104/*verMarathi*/, 66/*langMarathi*/, 9/*Devanagari*/ }, // Deva;
1038 { "ms", -1, 83/*langMalayRoman*/, 0/*Roman*/ }, // "ms" defaults to -Latn;
1039 { "ms_Arab", -1, 84/*langMalayArabic*/, 4/*Arabic*/ }, // Arab;
1040 { "mt"/*MT*/, 22/*verMalta*/, 16/*langMaltese*/, 0/*Roman*/ }, // Latn;
1041 { "mul", 74/*verMultilingual*/, -1, 0 },
1042 { "my", -1, 77/*langBurmese*/, 19/*Burmese*/ }, // Mymr;
1043 { "nb"/*NO*/, 12/*verNorway*/, 9/*langNorwegian*/, 0/*Roman*/ },
1044 { "ne"/*NP*/, 106/*verNepal*/, 64/*langNepali*/, 9/*Devanagari*/ }, // Deva;
1045 { "nl", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, // "nl" defaults to verNetherlands
1046 { "nl_BE", 6/*verFlemish*/, 34/*langFlemish*/, 0/*Roman*/ },
1047 { "nl_NL", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ },
1048 { "nn"/*NO*/, 101/*verNynorsk*/, 151/*langNynorsk*/, 0/*Roman*/ },
1049 { "ny", -1, 92/*langNyanja/Chewa*/, 0/*Roman*/ }, // Latn;
1050 { "om", -1, 87/*langOromo*/, 28/*Ethiopic*/ }, // Ethi;
1051 { "or", -1, 71/*langOriya*/, 12/*Oriya*/ }, // Orya;
1052 { "pa", 95/*verPunjabi*/, 70/*langPunjabi*/, 10/*Gurmukhi*/ }, // Guru;
1053 { "pl"/*PL*/, 42/*verPoland*/, 25/*langPolish*/, 29/*CentralEurRoman*/ },
1054 { "ps", -1, 59/*langPashto*/, 0x8C/*Farsi*/ }, // Arab;
1055 { "pt", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, // "pt" defaults to verBrazil (per Chris Hansten)
1056 { "pt_BR", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ },
1057 { "pt_PT", 10/*verPortugal*/, 8/*langPortuguese*/, 0/*Roman*/ },
1058 { "qu", -1, 132/*langQuechua*/, 0/*Roman*/ }, // Latn;
1059 { "rn", -1, 91/*langRundi*/, 0/*Roman*/ }, // Latn;
1060 { "ro"/*RO*/, 39/*verRomania*/, 37/*langRomanian*/, 38/*Romanian*/ },
1061 { "ru"/*RU*/, 49/*verRussia*/, 32/*langRussian*/, 7/*Cyrillic*/ }, // Cyrl;
1062 { "rw", -1, 90/*langKinyarwanda*/, 0/*Roman*/ }, // Latn;
1063 { "sa", -1, 65/*langSanskrit*/, 9/*Devanagari*/ }, // Deva;
1064 { "sd", -1, 62/*langSindhi*/, 0x8C/*Farsi*/ }, // Arab;
1065 { "se", 46/*verSami*/, 29/*langSami*/, 0/*Roman*/ },
1066 { "si", -1, 76/*langSinhalese*/, 18/*Sinhalese*/ }, // Sinh;
1067 { "sk"/*SK*/, 57/*verSlovak*/, 39/*langSlovak*/, 29/*CentralEurRoman*/ },
1068 { "sl"/*SI*/, 66/*verSlovenian*/, 40/*langSlovenian*/, 36/*Croatian*/ },
1069 { "so", -1, 88/*langSomali*/, 0/*Roman*/ }, // Latn;
1070 { "sq", -1, 36/*langAlbanian*/, 0/*Roman*/ },
1071 { "sr"/*CS,RS*/, 65/*verSerbian*/, 42/*langSerbian*/, 7/*Cyrillic*/ }, // Cyrl;
1072 { "su", -1, 139/*langSundaneseRom*/, 0/*Roman*/ }, // Latn;
1073 { "sv"/*SE*/, 7/*verSweden*/, 5/*langSwedish*/, 0/*Roman*/ },
1074 { "sw", -1, 89/*langSwahili*/, 0/*Roman*/ }, // Latn;
1075 { "ta", -1, 74/*langTamil*/, 14/*Tamil*/ }, // Taml;
1076 { "te", -1, 75/*langTelugu*/, 15/*Telugu*/ }, // Telu
1077 { "tg", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // "tg" defaults to "Cyrl"
1078 { "tg_Cyrl", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1079 { "th"/*TH*/, 54/*verThailand*/, 22/*langThai*/, 21/*Thai*/ }, // Thai;
1080 { "ti", -1, 86/*langTigrinya*/, 28/*Ethiopic*/ }, // Ethi;
1081 { "tk", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // "tk" defaults to Cyrl
1082 { "tk_Cyrl", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1083 { "tl", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn;
1084 { "to"/*TO*/, 88/*verTonga*/, 147/*langTongan*/, 0/*Roman*/ }, // Latn;
1085 { "tr"/*TR*/, 24/*verTurkey*/, 17/*langTurkish*/, 35/*Turkish*/ }, // Latn;
1086 { "tt", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl;
1087 { "tt_Cyrl", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl;
1088 { "ug", -1, 136/*langUighur*/, 4/*Arabic*/ }, // Arab;
1089 { "uk"/*UA*/, 62/*verUkraine*/, 45/*langUkrainian*/, 7/*Cyrillic*/ }, // Cyrl;
1090 { "und", 55/*verScriptGeneric*/, -1, 0 },
1091 { "ur", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // "ur" defaults to verPakistanUrdu
1092 { "ur_IN", 96/*verIndiaUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab
1093 { "ur_PK", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab
1094 { "uz"/*UZ*/, 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1095 { "uz_Cyrl", 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ },
1096 { "vi"/*VN*/, 97/*verVietnam*/, 80/*langVietnamese*/, 30/*Vietnamese*/ }, // Latn
1097 { "yi", -1, 41/*langYiddish*/, 5/*Hebrew*/ }, // Hebr;
1098 { "zh", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, // "zh" defaults to verChina, langSimpChinese
1099 { "zh_CN", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
1100 { "zh_HK", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1101 { "zh_Hans", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
1102 { "zh_Hant", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1103 { "zh_MO", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1104 { "zh_SG", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
1105 { "zh_TW", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1108 kNumLocaleToLegacyCodes
= sizeof(localeToLegacyCodes
)/sizeof(localeToLegacyCodes
[0])
1112 For reference here is a list of ICU locales with variants and how some
1113 of them are canonicalized with the ICU function uloc_canonicalize:
1116 en_US_POSIX x no change
1117 hy_AM_REVISED x no change
1118 ja_JP_TRADITIONAL -> ja_JP@calendar=japanese
1119 th_TH_TRADITIONAL -> th_TH@calendar=buddhist
1121 ICU 2.8 also had the following (now obsolete):
1123 de__PHONEBOOK -> de@collation=phonebook
1129 en_GB_EURO -> en_GB@currency=EUR
1130 en_IE_PREEURO -> en_IE@currency=IEP
1131 es__TRADITIONAL -> es@collation=traditional
1136 fr_FR_PREEURO -> fr_FR@currency=FRF
1140 hi__DIRECT -> hi@collation=direct
1145 zh__PINYIN -> zh@collation=pinyin
1146 zh_TW_STROKE -> zh_TW@collation=stroke
1150 // _CompareTestEntryToTableEntryKey
1151 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1152 // comparison function for bsearch
1153 static int _CompareTestEntryToTableEntryKey(const void *testEntryPtr
, const void *tableEntryKeyPtr
) {
1154 return strcmp( ((const KeyStringToResultString
*)testEntryPtr
)->key
, ((const KeyStringToResultString
*)tableEntryKeyPtr
)->key
);
1157 // _CompareTestEntryPrefixToTableEntryKey
1158 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1159 // Comparison function for bsearch. Assumes prefix IS terminated with '-' or '_'.
1160 // Do the following instead of strlen & strncmp so we don't walk tableEntry key twice.
1161 static int _CompareTestEntryPrefixToTableEntryKey(const void *testEntryPtr
, const void *tableEntryKeyPtr
) {
1162 const char * testPtr
= ((const KeyStringToResultString
*)testEntryPtr
)->key
;
1163 const char * tablePtr
= ((const KeyStringToResultString
*)tableEntryKeyPtr
)->key
;
1165 while ( *testPtr
== *tablePtr
&& *tablePtr
!= 0 ) {
1166 testPtr
++; tablePtr
++;
1168 if ( *tablePtr
!= 0 ) {
1169 // strings are different, and the string in the table has not run out;
1170 // i.e. the table entry is not a prefix of the text string.
1171 return ( *testPtr
< *tablePtr
)? -1: 1;
1176 // _CompareLowerTestEntryPrefixToTableEntryKey
1177 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1178 // Comparison function for bsearch. Assumes prefix NOT terminated with '-' or '_'.
1179 // Lowercases the test string before comparison (the table should already have lowercased entries).
1180 static int _CompareLowerTestEntryPrefixToTableEntryKey(const void *testEntryPtr
, const void *tableEntryKeyPtr
) {
1181 const char * testPtr
= ((const KeyStringToResultString
*)testEntryPtr
)->key
;
1182 const char * tablePtr
= ((const KeyStringToResultString
*)tableEntryKeyPtr
)->key
;
1185 while ( (lowerTestChar
= tolower(*testPtr
)) == *tablePtr
&& *tablePtr
!= 0 && lowerTestChar
!= '_' ) { // <1.9>
1186 testPtr
++; tablePtr
++;
1188 if ( *tablePtr
!= 0 ) {
1189 // strings are different, and the string in the table has not run out;
1190 // i.e. the table entry is not a prefix of the text string.
1191 if (lowerTestChar
== '_') // <1.9>
1193 return ( lowerTestChar
< *tablePtr
)? -1: 1;
1195 // The string in the table has run out. If the test string char is not alnum,
1196 // then the string matches, else the test string sorts after.
1197 return ( !isalnum(lowerTestChar
) )? 0: 1;
1200 // _DeleteCharsAtPointer
1201 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1202 // remove _length_ characters from the beginning of the string indicated by _stringPtr_
1203 // (we know that the string has at least _length_ characters in it)
1204 static void _DeleteCharsAtPointer(char *stringPtr
, int length
) {
1206 *stringPtr
= stringPtr
[length
];
1207 } while (*stringPtr
++ != 0);
1210 // _CopyReplacementAtPointer
1211 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1212 // Copy replacement string (*excluding* terminating NULL byte) to the place indicated by stringPtr
1213 static void _CopyReplacementAtPointer(char *stringPtr
, const char *replacementPtr
) {
1214 while (*replacementPtr
!= 0) {
1215 *stringPtr
++ = *replacementPtr
++;
1220 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1221 static Boolean
_CheckForTag(const char *localeStringPtr
, const char *tagPtr
, int tagLen
) {
1222 return ( strncmp(localeStringPtr
, tagPtr
, tagLen
) == 0 && !isalnum(localeStringPtr
[tagLen
]) );
1226 // Move this code from _UpdateFullLocaleString into separate function // <1.10>
1227 static void _ReplacePrefix(char locString
[], int locStringMaxLen
, int oldPrefixLen
, const char *newPrefix
) {
1228 int newPrefixLen
= strlen(newPrefix
);
1229 int lengthDelta
= newPrefixLen
- oldPrefixLen
;
1231 if (lengthDelta
< 0) {
1232 // replacement is shorter, delete chars by shifting tail of string
1233 _DeleteCharsAtPointer(locString
+ newPrefixLen
, -lengthDelta
);
1234 } else if (lengthDelta
> 0) {
1235 // replacement is longer...
1236 int stringLen
= strlen(locString
);
1238 if (stringLen
+ lengthDelta
< locStringMaxLen
) {
1239 // make room by shifting tail of string
1240 char * tailShiftPtr
= locString
+ stringLen
;
1241 char * tailStartPtr
= locString
+ oldPrefixLen
; // pointer to tail of string to shift
1243 while (tailShiftPtr
>= tailStartPtr
) {
1244 tailShiftPtr
[lengthDelta
] = *tailShiftPtr
;
1248 // no room, can't do substitution
1254 // do the substitution
1255 _CopyReplacementAtPointer(locString
, newPrefix
);
1259 // _UpdateFullLocaleString
1260 // Given a locale string that uses standard codes (not a special old-style Apple string),
1261 // update all the language codes and region codes to latest versions, map 3-letter
1262 // language codes to 2-letter codes if possible, and normalize casing. If requested, return
1263 // pointers to a language-region variant subtag (if present) and a region tag (if present).
1264 // (add locStringMaxLen parameter) // <1.10>
1265 static void _UpdateFullLocaleString(char inLocaleString
[], int locStringMaxLen
,
1266 char **langRegSubtagRef
, char **regionTagRef
,
1267 char varKeyValueString
[]) // <1.17>
1269 KeyStringToResultString testEntry
;
1270 KeyStringToResultString
* foundEntry
;
1271 const SpecialCaseUpdates
* specialCasePtr
;
1274 char * langRegSubtag
= NULL
;
1275 char * regionTag
= NULL
;
1276 char * variantTag
= NULL
;
1277 Boolean subtagHasDigits
, pastPrimarySubtag
, hadRegion
;
1279 // 1. First replace any non-canonical prefix (case insensitive) with canonical
1280 // (change 3-letter ISO 639 code to 2-letter, update obsolete ISO 639 codes & RFC 3066 tags, etc.)
1282 testEntry
.key
= inLocaleString
;
1283 foundEntry
= (KeyStringToResultString
*)bsearch( &testEntry
, localeStringPrefixToCanonical
, kNumLocaleStringPrefixToCanonical
,
1284 sizeof(KeyStringToResultString
), _CompareLowerTestEntryPrefixToTableEntryKey
);
1286 // replace key (at beginning of string) with result
1287 _ReplacePrefix(inLocaleString
, locStringMaxLen
, strlen(foundEntry
->key
), foundEntry
->result
); // <1.10>
1290 // 2. Walk through input string, normalizing case & marking use of ISO 3166 codes
1292 inLocalePtr
= inLocaleString
;
1293 subtagPtr
= inLocaleString
;
1294 subtagHasDigits
= false;
1295 pastPrimarySubtag
= false;
1299 if ( isalpha(*inLocalePtr
) ) {
1300 // if not past a region tag, then lowercase, else uppercase
1301 *inLocalePtr
= (!hadRegion
)? tolower(*inLocalePtr
): toupper(*inLocalePtr
);
1302 } else if ( isdigit(*inLocalePtr
) ) {
1303 subtagHasDigits
= true;
1306 if (!pastPrimarySubtag
) {
1307 // may have a NULL primary subtag
1308 if (subtagHasDigits
) {
1311 pastPrimarySubtag
= true;
1312 } else if (!hadRegion
) {
1313 // We are after any primary language subtag, but not past any region tag.
1314 // This subtag is preceded by '-' or '_'.
1315 int subtagLength
= inLocalePtr
- subtagPtr
; // includes leading '-' or '_'
1317 if (subtagLength
== 3 && !subtagHasDigits
) {
1318 // potential ISO 3166 code for region or language variant; if so, needs uppercasing
1319 if (*subtagPtr
== '_') {
1320 regionTag
= subtagPtr
;
1322 subtagPtr
[1] = toupper(subtagPtr
[1]);
1323 subtagPtr
[2] = toupper(subtagPtr
[2]);
1324 } else if (langRegSubtag
== NULL
) {
1325 langRegSubtag
= subtagPtr
;
1326 subtagPtr
[1] = toupper(subtagPtr
[1]);
1327 subtagPtr
[2] = toupper(subtagPtr
[2]);
1329 } else if (subtagLength
== 4 && subtagHasDigits
) {
1330 // potential UN M.49 region code
1331 if (*subtagPtr
== '_') {
1332 regionTag
= subtagPtr
;
1334 } else if (langRegSubtag
== NULL
) {
1335 langRegSubtag
= subtagPtr
;
1337 } else if (subtagLength
== 5 && !subtagHasDigits
) {
1338 // ISO 15924 script code, uppercase just the first letter
1339 subtagPtr
[1] = toupper(subtagPtr
[1]);
1340 } else if (subtagLength
== 1 && *subtagPtr
== '_') { // <1.17>
1345 // convert improper '_' to '-'
1349 variantTag
= subtagPtr
; // <1.17>
1352 if (*inLocalePtr
== '-' || *inLocalePtr
== '_') {
1353 subtagPtr
= inLocalePtr
;
1354 subtagHasDigits
= false;
1363 // 3 If there is a variant tag, see if ICU canonicalizes it to keywords. // <1.17> [3577669]
1364 // If so, copy the keywords to varKeyValueString and delete the variant tag
1365 // from the original string (but don't otherwise use the ICU canonicalization).
1366 varKeyValueString
[0] = 0;
1368 UErrorCode icuStatus
;
1369 int icuCanonStringLen
;
1370 char * varKeyValueStringPtr
= varKeyValueString
;
1372 icuStatus
= U_ZERO_ERROR
;
1373 icuCanonStringLen
= uloc_canonicalize( inLocaleString
, varKeyValueString
, locStringMaxLen
, &icuStatus
);
1374 if ( U_SUCCESS(icuStatus
) ) {
1375 char * icuCanonStringPtr
= varKeyValueString
;
1377 if (icuCanonStringLen
>= locStringMaxLen
)
1378 icuCanonStringLen
= locStringMaxLen
- 1;
1379 varKeyValueString
[icuCanonStringLen
] = 0;
1380 while (*icuCanonStringPtr
!= 0 && *icuCanonStringPtr
!= ULOC_KEYWORD_SEPARATOR
)
1381 ++icuCanonStringPtr
;
1382 if (*icuCanonStringPtr
!= 0) {
1383 // the canonicalized string has keywords
1384 // delete the variant tag in the original string (and other trailing '_' or '-')
1386 while (*variantTag
== '_')
1388 // delete all of the canonicalized string except the keywords
1389 while (*icuCanonStringPtr
!= 0)
1390 *varKeyValueStringPtr
++ = *icuCanonStringPtr
++;
1392 *varKeyValueStringPtr
= 0;
1396 // 4. Handle special cases of updating region codes, or updating language codes based on
1398 for (specialCasePtr
= specialCases
; specialCasePtr
->reg1
!= NULL
; specialCasePtr
++) {
1399 if ( specialCasePtr
->lang
== NULL
|| _CheckForTag(inLocaleString
, specialCasePtr
->lang
, 2) ) {
1400 // OK, we matched any language specified. Now what needs updating?
1403 if ( isupper(specialCasePtr
->update1
[0]) ) {
1404 // updating a region code
1405 if ( ( foundTag
= strstr(inLocaleString
, specialCasePtr
->reg1
) ) && !isalnum(foundTag
[3]) ) {
1406 _CopyReplacementAtPointer(foundTag
+1, specialCasePtr
->update1
);
1408 if ( regionTag
&& _CheckForTag(regionTag
+1, specialCasePtr
->reg1
+ 1, 2) ) {
1409 _CopyReplacementAtPointer(regionTag
+1, specialCasePtr
->update1
);
1413 // updating the language, there will be two choices based on region
1414 if ( ( regionTag
&& _CheckForTag(regionTag
+1, specialCasePtr
->reg1
+ 1, 2) ) ||
1415 ( ( foundTag
= strstr(inLocaleString
, specialCasePtr
->reg1
) ) && !isalnum(foundTag
[3]) ) ) {
1416 _CopyReplacementAtPointer(inLocaleString
, specialCasePtr
->update1
);
1417 } else if ( ( regionTag
&& _CheckForTag(regionTag
+1, specialCasePtr
->reg2
+ 1, 2) ) ||
1418 ( ( foundTag
= strstr(inLocaleString
, specialCasePtr
->reg2
) ) && !isalnum(foundTag
[3]) ) ) {
1419 _CopyReplacementAtPointer(inLocaleString
, specialCasePtr
->update2
);
1425 // 5. return pointers if requested.
1426 if (langRegSubtagRef
!= NULL
) {
1427 *langRegSubtagRef
= langRegSubtag
;
1429 if (regionTagRef
!= NULL
) {
1430 *regionTagRef
= regionTag
;
1435 // _RemoveSubstringsIfPresent
1436 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1437 // substringList is a list of space-separated substrings to strip if found in localeString
1438 static void _RemoveSubstringsIfPresent(char *localeString
, const char *substringList
) {
1439 while (*substringList
!= 0) {
1440 char currentSubstring
[kLocaleIdentifierCStringMax
];
1441 int substringLength
= 0;
1442 char * foundSubstring
;
1444 // copy current substring & get its length
1445 while ( isgraph(*substringList
) ) {
1446 currentSubstring
[substringLength
++] = *substringList
++;
1448 // move to next substring
1449 while ( isspace(*substringList
) ) {
1453 // search for current substring in locale string
1454 if (substringLength
== 0)
1456 currentSubstring
[substringLength
] = 0;
1457 foundSubstring
= strstr(localeString
, currentSubstring
);
1459 // if substring is found, delete it
1460 if (foundSubstring
) {
1461 _DeleteCharsAtPointer(foundSubstring
, substringLength
);
1467 // _GetKeyValueString // <1.10>
1468 // Removes any key-value string from inLocaleString, puts canonized version in keyValueString
1470 static void _GetKeyValueString(char inLocaleString
[], char keyValueString
[]) {
1471 char * inLocalePtr
= inLocaleString
;
1473 while (*inLocalePtr
!= 0 && *inLocalePtr
!= ULOC_KEYWORD_SEPARATOR
) {
1476 if (*inLocalePtr
!= 0) { // we found a key-value section
1477 char * keyValuePtr
= keyValueString
;
1479 *keyValuePtr
= *inLocalePtr
;
1482 if ( *(++inLocalePtr
) != ' ' ) {
1483 *(++keyValuePtr
) = *inLocalePtr
; // remove "tolower() for *inLocalePtr" // <1.11>
1485 } while (*inLocalePtr
!= 0);
1487 keyValueString
[0] = 0;
1491 static void _AppendKeyValueString(char inLocaleString
[], int locStringMaxLen
, char keyValueString
[]) {
1492 if (keyValueString
[0] != 0) {
1493 UErrorCode uerr
= U_ZERO_ERROR
;
1494 UEnumeration
* uenum
= uloc_openKeywords(keyValueString
, &uerr
);
1495 if ( uenum
!= NULL
) {
1496 const char * keyword
;
1498 char value
[ULOC_KEYWORDS_CAPACITY
]; // use as max for keyword value
1499 while ( U_SUCCESS(uerr
) ) {
1500 keyword
= uenum_next(uenum
, &length
, &uerr
);
1501 if ( keyword
== NULL
) {
1504 length
= uloc_getKeywordValue( keyValueString
, keyword
, value
, sizeof(value
), &uerr
);
1505 length
= uloc_setKeywordValue( keyword
, value
, inLocaleString
, locStringMaxLen
, &uerr
);
1512 __private_extern__ CFStringRef
_CFLocaleCreateCanonicalLanguageIdentifierForCFBundle(CFAllocatorRef allocator
, CFStringRef localeIdentifier
) {
1513 char inLocaleString
[kLocaleIdentifierCStringMax
];
1514 CFStringRef outStringRef
= NULL
;
1516 if ( localeIdentifier
&& CFStringGetCString(localeIdentifier
, inLocaleString
, sizeof(inLocaleString
), kCFStringEncodingASCII
) ) {
1517 KeyStringToResultString testEntry
;
1518 KeyStringToResultString
* foundEntry
;
1519 char keyValueString
[sizeof(inLocaleString
)]; // <1.10>
1520 char varKeyValueString
[sizeof(inLocaleString
)]; // <1.17>
1522 _GetKeyValueString(inLocaleString
, keyValueString
); // <1.10>
1523 testEntry
.result
= NULL
;
1525 // A. First check if input string matches an old-style string that has a replacement
1526 // (do this before case normalization)
1527 testEntry
.key
= inLocaleString
;
1528 foundEntry
= (KeyStringToResultString
*)bsearch( &testEntry
, oldAppleLocaleToCanonical
, kNumOldAppleLocaleToCanonical
,
1529 sizeof(KeyStringToResultString
), _CompareTestEntryToTableEntryKey
);
1531 // It does match, so replace old string with new
1532 strlcpy(inLocaleString
, foundEntry
->result
, sizeof(inLocaleString
));
1533 varKeyValueString
[0] = 0;
1535 // B. No match with an old-style string, use input string but update codes, normalize case, etc.
1536 _UpdateFullLocaleString(inLocaleString
, sizeof(inLocaleString
), NULL
, NULL
, varKeyValueString
); // <1.10><1.17>
1539 // C. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string
1541 // 1. Strip defaults in input string based on initial part of locale string
1542 // (mainly to strip default script tag for a language)
1543 testEntry
.key
= inLocaleString
;
1544 foundEntry
= (KeyStringToResultString
*)bsearch( &testEntry
, localeStringPrefixToDefaults
, kNumLocaleStringPrefixToDefaults
,
1545 sizeof(KeyStringToResultString
), _CompareTestEntryPrefixToTableEntryKey
);
1547 // The input string begins with a character sequence for which
1548 // there are default substrings which should be stripped if present
1549 _RemoveSubstringsIfPresent(inLocaleString
, foundEntry
->result
);
1552 // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string
1553 testEntry
.key
= inLocaleString
;
1554 foundEntry
= (KeyStringToResultString
*)bsearch( &testEntry
, appleLocaleToLanguageStringForCFBundle
, kNumAppleLocaleToLanguageStringForCFBundle
,
1555 sizeof(KeyStringToResultString
), _CompareTestEntryToTableEntryKey
);
1558 strlcpy(inLocaleString
, foundEntry
->result
, sizeof(inLocaleString
));
1560 // just delete the region tag and anything after
1561 char * inLocalePtr
= inLocaleString
;
1562 while (*inLocalePtr
!= 0 && *inLocalePtr
!= '_') {
1568 // D. Re-append any key-value strings, now canonical // <1.10><1.17>
1569 _AppendKeyValueString( inLocaleString
, sizeof(inLocaleString
), varKeyValueString
);
1570 _AppendKeyValueString( inLocaleString
, sizeof(inLocaleString
), keyValueString
);
1572 // All done, return what we came up with.
1573 outStringRef
= CFStringCreateWithCString(allocator
, inLocaleString
, kCFStringEncodingASCII
);
1576 return outStringRef
;
1579 CFStringRef
CFLocaleCreateCanonicalLanguageIdentifierFromString(CFAllocatorRef allocator
, CFStringRef localeIdentifier
) {
1580 char inLocaleString
[kLocaleIdentifierCStringMax
];
1581 CFStringRef outStringRef
= NULL
;
1583 if ( localeIdentifier
&& CFStringGetCString(localeIdentifier
, inLocaleString
, sizeof(inLocaleString
), kCFStringEncodingASCII
) ) {
1584 KeyStringToResultString testEntry
;
1585 KeyStringToResultString
* foundEntry
;
1586 char keyValueString
[sizeof(inLocaleString
)]; // <1.10>
1587 char varKeyValueString
[sizeof(inLocaleString
)]; // <1.17>
1589 _GetKeyValueString(inLocaleString
, keyValueString
); // <1.10>
1590 testEntry
.result
= NULL
;
1592 // A. First check if input string matches an old-style string that has a replacement
1593 // (do this before case normalization)
1594 testEntry
.key
= inLocaleString
;
1595 foundEntry
= (KeyStringToResultString
*)bsearch( &testEntry
, oldAppleLocaleToCanonical
, kNumOldAppleLocaleToCanonical
,
1596 sizeof(KeyStringToResultString
), _CompareTestEntryToTableEntryKey
);
1598 // It does match, so replace old string with new
1599 strlcpy(inLocaleString
, foundEntry
->result
, sizeof(inLocaleString
));
1600 varKeyValueString
[0] = 0;
1602 char * langRegSubtag
= NULL
;
1603 char * regionTag
= NULL
;
1605 // B. No match with an old-style string, use input string but update codes, normalize case, etc.
1606 _UpdateFullLocaleString(inLocaleString
, sizeof(inLocaleString
), &langRegSubtag
, ®ionTag
, varKeyValueString
); // <1.10><1.17><1.19>
1608 // if the language part already includes a regional variant, then delete any region tag. <1.19>
1609 if (langRegSubtag
&& regionTag
)
1613 // C. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string
1615 // 1. Strip defaults in input string based on initial part of locale string
1616 // (mainly to strip default script tag for a language)
1617 testEntry
.key
= inLocaleString
;
1618 foundEntry
= (KeyStringToResultString
*)bsearch( &testEntry
, localeStringPrefixToDefaults
, kNumLocaleStringPrefixToDefaults
,
1619 sizeof(KeyStringToResultString
), _CompareTestEntryPrefixToTableEntryKey
);
1621 // The input string begins with a character sequence for which
1622 // there are default substrings which should be stripped if present
1623 _RemoveSubstringsIfPresent(inLocaleString
, foundEntry
->result
);
1626 // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string
1627 testEntry
.key
= inLocaleString
;
1628 foundEntry
= (KeyStringToResultString
*)bsearch( &testEntry
, appleLocaleToLanguageString
, kNumAppleLocaleToLanguageString
,
1629 sizeof(KeyStringToResultString
), _CompareTestEntryToTableEntryKey
);
1632 strlcpy(inLocaleString
, foundEntry
->result
, sizeof(inLocaleString
));
1634 // skip to any region tag or java-type variant
1635 char * inLocalePtr
= inLocaleString
;
1636 while (*inLocalePtr
!= 0 && *inLocalePtr
!= '_') {
1639 // if there is still a region tag, turn it into a language variant <1.19>
1640 if (*inLocalePtr
== '_') {
1641 // handle 3-digit regions in addition to 2-letter ones
1642 char * regionTag
= inLocalePtr
++;
1643 long expectedLength
= 0;
1644 if ( isalpha(*inLocalePtr
) ) {
1645 while ( isalpha(*(++inLocalePtr
)) )
1648 } else if ( isdigit(*inLocalePtr
) ) {
1649 while ( isdigit(*(++inLocalePtr
)) )
1653 *regionTag
= (inLocalePtr
- regionTag
== expectedLength
)? '-': 0;
1655 // anything else at/after '_' just gets deleted
1659 // D. Re-append any key-value strings, now canonical // <1.10><1.17>
1660 _AppendKeyValueString( inLocaleString
, sizeof(inLocaleString
), varKeyValueString
);
1661 _AppendKeyValueString( inLocaleString
, sizeof(inLocaleString
), keyValueString
);
1663 // All done, return what we came up with.
1664 outStringRef
= CFStringCreateWithCString(allocator
, inLocaleString
, kCFStringEncodingASCII
);
1667 return outStringRef
;
1671 CFStringRef
CFLocaleCreateCanonicalLocaleIdentifierFromString(CFAllocatorRef allocator
, CFStringRef localeIdentifier
) {
1672 char inLocaleString
[kLocaleIdentifierCStringMax
];
1673 CFStringRef outStringRef
= NULL
;
1675 if ( localeIdentifier
&& CFStringGetCString(localeIdentifier
, inLocaleString
, sizeof(inLocaleString
), kCFStringEncodingASCII
) ) {
1676 KeyStringToResultString testEntry
;
1677 KeyStringToResultString
* foundEntry
;
1678 char keyValueString
[sizeof(inLocaleString
)]; // <1.10>
1679 char varKeyValueString
[sizeof(inLocaleString
)]; // <1.17>
1681 _GetKeyValueString(inLocaleString
, keyValueString
); // <1.10>
1682 testEntry
.result
= NULL
;
1684 // A. First check if input string matches an old-style Apple string that has a replacement
1685 // (do this before case normalization)
1686 testEntry
.key
= inLocaleString
;
1687 foundEntry
= (KeyStringToResultString
*)bsearch( &testEntry
, oldAppleLocaleToCanonical
, kNumOldAppleLocaleToCanonical
,
1688 sizeof(KeyStringToResultString
), _CompareTestEntryToTableEntryKey
);
1690 // It does match, so replace old string with new // <1.10>
1691 strlcpy(inLocaleString
, foundEntry
->result
, sizeof(inLocaleString
));
1692 varKeyValueString
[0] = 0;
1694 char * langRegSubtag
= NULL
;
1695 char * regionTag
= NULL
;
1697 // B. No match with an old-style string, use input string but update codes, normalize case, etc.
1698 _UpdateFullLocaleString(inLocaleString
, sizeof(inLocaleString
), &langRegSubtag
, ®ionTag
, varKeyValueString
); // <1.10><1.17>
1701 // C. Now strip defaults that are implied by other fields.
1703 // 1. If an ISO 3166 region tag matches an ISO 3166 regional language variant subtag, strip the latter.
1704 if ( langRegSubtag
&& regionTag
&& strncmp(langRegSubtag
+1, regionTag
+1, 2) == 0 ) {
1705 _DeleteCharsAtPointer(langRegSubtag
, 3);
1708 // 2. Strip defaults in input string based on final region tag in locale string
1709 // (mainly for Chinese, to strip -Hans for _CN/_SG, -Hant for _TW/_HK/_MO)
1711 testEntry
.key
= regionTag
;
1712 foundEntry
= (KeyStringToResultString
*)bsearch( &testEntry
, localeStringRegionToDefaults
, kNumLocaleStringRegionToDefaults
,
1713 sizeof(KeyStringToResultString
), _CompareTestEntryToTableEntryKey
);
1715 _RemoveSubstringsIfPresent(inLocaleString
, foundEntry
->result
);
1719 // 3. Strip defaults in input string based on initial part of locale string
1720 // (mainly to strip default script tag for a language)
1721 testEntry
.key
= inLocaleString
;
1722 foundEntry
= (KeyStringToResultString
*)bsearch( &testEntry
, localeStringPrefixToDefaults
, kNumLocaleStringPrefixToDefaults
,
1723 sizeof(KeyStringToResultString
), _CompareTestEntryPrefixToTableEntryKey
);
1725 // The input string begins with a character sequence for which
1726 // there are default substrings which should be stripped if present
1727 _RemoveSubstringsIfPresent(inLocaleString
, foundEntry
->result
);
1731 // D. Re-append any key-value strings, now canonical // <1.10><1.17>
1732 _AppendKeyValueString( inLocaleString
, sizeof(inLocaleString
), varKeyValueString
);
1733 _AppendKeyValueString( inLocaleString
, sizeof(inLocaleString
), keyValueString
);
1735 // Now create the CFString (even if empty!)
1736 outStringRef
= CFStringCreateWithCString(allocator
, inLocaleString
, kCFStringEncodingASCII
);
1739 return outStringRef
;
1742 // CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, based on
1743 // the first part of the SPI CFBundleCopyLocalizationForLocalizationInfo in CFBundle_Resources.c
1744 CFStringRef
CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes(CFAllocatorRef allocator
, LangCode lcode
, RegionCode rcode
) {
1745 CFStringRef result
= NULL
;
1746 if (0 <= rcode
&& rcode
< kNumRegionCodeToLocaleString
) {
1747 const char *localeString
= regionCodeToLocaleString
[rcode
];
1748 if (localeString
!= NULL
&& *localeString
!= '\0') {
1749 result
= CFStringCreateWithCStringNoCopy(allocator
, localeString
, kCFStringEncodingASCII
, kCFAllocatorNull
);
1752 if (result
) return result
;
1753 if (0 <= lcode
&& lcode
< kNumLangCodeToLocaleString
) {
1754 const char *localeString
= langCodeToLocaleString
[lcode
];
1755 if (localeString
!= NULL
&& *localeString
!= '\0') {
1756 result
= CFStringCreateWithCStringNoCopy(allocator
, localeString
, kCFStringEncodingASCII
, kCFAllocatorNull
);
1763 CFDictionaryRef
CFLocaleCreateComponentsFromLocaleIdentifier(CFAllocatorRef allocator
, CFStringRef localeID
) {
1764 char cLocaleID
[ULOC_FULLNAME_CAPACITY
+ULOC_KEYWORD_AND_VALUES_CAPACITY
];
1765 char buffer
[ULOC_FULLNAME_CAPACITY
+ULOC_KEYWORD_AND_VALUES_CAPACITY
];
1766 CFMutableDictionaryRef working
= CFDictionaryCreateMutable(allocator
, 10, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1768 UErrorCode icuStatus
= U_ZERO_ERROR
;
1771 // Extract the C string locale ID, for ICU
1772 CFIndex outBytes
= 0;
1773 CFStringGetBytes(localeID
, CFRangeMake(0, CFStringGetLength(localeID
)), kCFStringEncodingASCII
, (UInt8
) '?', true, (unsigned char *)cLocaleID
, sizeof(cLocaleID
)/sizeof(char) - 1, &outBytes
);
1774 cLocaleID
[outBytes
] = '\0';
1776 // Get the components
1777 length
= uloc_getLanguage(cLocaleID
, buffer
, sizeof(buffer
)/sizeof(char), &icuStatus
);
1778 if (U_SUCCESS(icuStatus
) && length
> 0)
1780 CFStringRef string
= CFStringCreateWithBytes(allocator
, (UInt8
*)buffer
, length
, kCFStringEncodingASCII
, true);
1781 CFDictionaryAddValue(working
, kCFLocaleLanguageCode
, string
);
1784 icuStatus
= U_ZERO_ERROR
;
1786 length
= uloc_getScript(cLocaleID
, buffer
, sizeof(buffer
)/sizeof(char), &icuStatus
);
1787 if (U_SUCCESS(icuStatus
) && length
> 0)
1789 CFStringRef string
= CFStringCreateWithBytes(allocator
, (UInt8
*)buffer
, length
, kCFStringEncodingASCII
, true);
1790 CFDictionaryAddValue(working
, kCFLocaleScriptCode
, string
);
1793 icuStatus
= U_ZERO_ERROR
;
1795 length
= uloc_getCountry(cLocaleID
, buffer
, sizeof(buffer
)/sizeof(char), &icuStatus
);
1796 if (U_SUCCESS(icuStatus
) && length
> 0)
1798 CFStringRef string
= CFStringCreateWithBytes(allocator
, (UInt8
*)buffer
, length
, kCFStringEncodingASCII
, true);
1799 CFDictionaryAddValue(working
, kCFLocaleCountryCode
, string
);
1802 icuStatus
= U_ZERO_ERROR
;
1804 length
= uloc_getVariant(cLocaleID
, buffer
, sizeof(buffer
)/sizeof(char), &icuStatus
);
1805 if (U_SUCCESS(icuStatus
) && length
> 0)
1807 CFStringRef string
= CFStringCreateWithBytes(allocator
, (UInt8
*)buffer
, length
, kCFStringEncodingASCII
, true);
1808 CFDictionaryAddValue(working
, kCFLocaleVariantCode
, string
);
1811 icuStatus
= U_ZERO_ERROR
;
1813 // Now get the keywords; open an enumerator on them
1814 UEnumeration
*iter
= uloc_openKeywords(cLocaleID
, &icuStatus
);
1815 const char *locKey
= NULL
;
1816 int32_t locKeyLen
= 0;
1817 while ((locKey
= uenum_next(iter
, &locKeyLen
, &icuStatus
)) && U_SUCCESS(icuStatus
))
1819 char locValue
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
1821 // Get the value for this keyword
1822 if (uloc_getKeywordValue(cLocaleID
, locKey
, locValue
, sizeof(locValue
)/sizeof(char), &icuStatus
) > 0
1823 && U_SUCCESS(icuStatus
))
1825 CFStringRef key
= CFStringCreateWithBytes(allocator
, (UInt8
*)locKey
, strlen(locKey
), kCFStringEncodingASCII
, true);
1826 CFStringRef value
= CFStringCreateWithBytes(allocator
, (UInt8
*)locValue
, strlen(locValue
), kCFStringEncodingASCII
, true);
1828 CFDictionaryAddValue(working
, key
, value
);
1837 // Convert to an immutable dictionary and return
1838 CFDictionaryRef result
= CFDictionaryCreateCopy(allocator
, working
);
1843 typedef struct __AppendContext
1846 CFMutableStringRef working
;
1849 static void __AppendKeywords(const void *k
, const void *v
, void *c
)
1851 __AppendContext
*context
= (__AppendContext
*) c
;
1852 CFStringRef key
= (CFStringRef
) k
;
1853 CFStringRef value
= (CFStringRef
) v
;
1854 if (CFEqual(key
, kCFLocaleLanguageCode
) || CFEqual(key
, kCFLocaleScriptCode
) || CFEqual(key
, kCFLocaleCountryCode
) || CFEqual(key
, kCFLocaleVariantCode
))
1856 CFStringAppendFormat(context
->working
, NULL
, CFSTR("%c%@%c%@"), context
->separator
, key
, ULOC_KEYWORD_ASSIGN
, value
);
1857 context
->separator
= ULOC_KEYWORD_ITEM_SEPARATOR
;
1860 CFStringRef
CFLocaleCreateLocaleIdentifierFromComponents(CFAllocatorRef allocator
, CFDictionaryRef dictionary
) {
1861 CFMutableStringRef working
= CFStringCreateMutable(allocator
, 0);
1862 CFStringRef value
= NULL
;
1863 bool country
= false;
1864 __AppendContext context
= {ULOC_KEYWORD_SEPARATOR
, working
};
1866 if ((value
= (CFStringRef
) CFDictionaryGetValue(dictionary
, kCFLocaleLanguageCode
)))
1868 CFStringAppend(working
, value
);
1871 if ((value
= (CFStringRef
) CFDictionaryGetValue(dictionary
, kCFLocaleScriptCode
)))
1873 CFStringAppendFormat(working
, NULL
, CFSTR("_%@"), value
);
1876 if ((value
= (CFStringRef
) CFDictionaryGetValue(dictionary
, kCFLocaleCountryCode
)))
1878 CFStringAppendFormat(working
, NULL
, CFSTR("_%@"), value
);
1882 if ((value
= (CFStringRef
) CFDictionaryGetValue(dictionary
, kCFLocaleVariantCode
)))
1885 CFStringAppend(working
, CFSTR("_"));
1886 CFStringAppendFormat(working
, NULL
, CFSTR("_%@"), value
);
1889 // Now iterate through any remaining entries and append as keywords
1890 CFDictionaryApplyFunction(dictionary
, __AppendKeywords
, &context
);
1892 // Convert to immutable string and return
1893 CFStringRef result
= (CFStringRef
)CFStringCreateCopy(allocator
, working
);