]> git.saurik.com Git - apple/cf.git/blob - CFLocaleIdentifier.c
9da7ecda6425eed5b443958bc15c4d73a03d8a9f
[apple/cf.git] / CFLocaleIdentifier.c
1 /*
2 * Copyright (c) 2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 CFLocaleIdentifier.c
26 Copyright (c) 2002-2012, Apple Inc. All rights reserved.
27 Responsibility: David Smith
28
29 CFLocaleIdentifier.c defines
30 - enum value kLocaleIdentifierCStringMax
31 - structs KeyStringToResultString, SpecialCaseUpdates
32 and provides the following data for the functions
33 CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes,
34 CFLocaleCreateCanonicalLocaleIdentifierFromString
35 CFLocaleCreateCanonicalLanguageIdentifierFromString
36
37 1. static const char * regionCodeToLocaleString[]; enum kNumRegionCodeToLocaleString;
38 map RegionCode 0..kNumRegionCodeToLocaleString-1 to canonical locale string
39
40 2. static const char * langCodeToLocaleString[]; enum kNumLangCodeToLocaleString;
41 map LangCode 0..kNumLangCodeToLocaleString-1 to canonical locale string
42
43 3. static const KeyStringToResultString oldAppleLocaleToCanonical[]; enum kNumOldAppleLocaleToCanonical;
44 map old Apple string oldAppleLocaleToCanonical[n].key
45 to canonical locale string oldAppleLocaleToCanonical[n].result
46 for n = 0..kNumOldAppleLocaleToCanonical-1
47
48 4. static const KeyStringToResultString localeStringPrefixToCanonical[]; enum kNumLocaleStringPrefixToCanonical;
49 map non-canonical language prefix (3-letter, obsolete) localeStringPrefixToCanonical[].key
50 to updated replacement localeStringPrefixToCanonical[].result
51 for n = 0..kNumLocaleStringPrefixToCanonical-1
52
53 5. static const SpecialCaseUpdates specialCases[];
54 various special cases for updating region codes, or for updating language codes based on region codes
55
56 6. static const KeyStringToResultString localeStringRegionToDefaults[]; enum kNumLocaleStringRegionToDefaults;
57 map locale string region tag localeStringRegionToDefaults[n].key
58 to default substrings to delete localeStringRegionToDefaults[n].result
59 for n = 0..kNumLocaleStringRegionToDefaults-1
60
61 7. static const KeyStringToResultString localeStringPrefixToDefaults[]; enum kNumLocaleStringPrefixToDefaults;
62 map locale string initial part localeStringPrefixToDefaults[n].key
63 to default substrings to delete localeStringPrefixToDefaults[n].result
64 for n = 0..kNumLocaleStringPrefixToDefaults-1
65
66 8. static const KeyStringToResultString appleLocaleToLanguageString[]; enum kNumAppleLocaleToLanguageString;
67 map Apple locale string appleLocaleToLanguageString[].key
68 to equivalent language string appleLocaleToLanguageString[].result
69 for n = 0..kNumAppleLocaleToLanguageString-1
70
71 */
72
73 #include <CoreFoundation/CFString.h>
74 #include <CoreFoundation/CFCalendar.h>
75 #include <ctype.h>
76 #include <string.h>
77 #include <stdlib.h>
78 #include <stdio.h>
79 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
80 #include <unicode/uloc.h>
81 #else
82 #define ULOC_KEYWORD_SEPARATOR '@'
83 #define ULOC_FULLNAME_CAPACITY 56
84 #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
85 #endif
86 #include "CFInternal.h"
87 #include "CFLocaleInternal.h"
88
89 // Max byte length of locale identifier (ASCII) as C string, including terminating null byte
90 enum {
91 kLocaleIdentifierCStringMax = ULOC_FULLNAME_CAPACITY + ULOC_KEYWORD_AND_VALUES_CAPACITY // currently 56 + 100
92 };
93
94 // KeyStringToResultString struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString
95 struct KeyStringToResultString {
96 const char * key;
97 const char * result;
98 };
99 typedef struct KeyStringToResultString KeyStringToResultString;
100
101 // SpecialCaseUpdates struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString
102 struct SpecialCaseUpdates {
103 const char * lang;
104 const char * reg1;
105 const char * update1;
106 const char * reg2;
107 const char * update2;
108 };
109 typedef struct SpecialCaseUpdates SpecialCaseUpdates;
110
111
112 static const char * const regionCodeToLocaleString[] = {
113 // map RegionCode (array index) to canonical locale string
114 //
115 // canon. string region code; language code; [comment] [ # __CFBundleLocaleAbbreviationsArray
116 // -------- ------------ ------------------ ------------ -------- string, if different ]
117 "en_US", // 0 verUS; 0 langEnglish;
118 "fr_FR", // 1 verFrance; 1 langFrench;
119 "en_GB", // 2 verBritain; 0 langEnglish;
120 "de_DE", // 3 verGermany; 2 langGerman;
121 "it_IT", // 4 verItaly; 3 langItalian;
122 "nl_NL", // 5 verNetherlands; 4 langDutch;
123 "nl_BE", // 6 verFlemish; 34 langFlemish (redundant, =Dutch);
124 "sv_SE", // 7 verSweden; 5 langSwedish;
125 "es_ES", // 8 verSpain; 6 langSpanish;
126 "da_DK", // 9 verDenmark; 7 langDanish;
127 "pt_PT", // 10 verPortugal; 8 langPortuguese;
128 "fr_CA", // 11 verFrCanada; 1 langFrench;
129 "nb_NO", // 12 verNorway; 9 langNorwegian (Bokmal); # "no_NO"
130 "he_IL", // 13 verIsrael; 10 langHebrew;
131 "ja_JP", // 14 verJapan; 11 langJapanese;
132 "en_AU", // 15 verAustralia; 0 langEnglish;
133 "ar", // 16 verArabic; 12 langArabic;
134 "fi_FI", // 17 verFinland; 13 langFinnish;
135 "fr_CH", // 18 verFrSwiss; 1 langFrench;
136 "de_CH", // 19 verGrSwiss; 2 langGerman;
137 "el_GR", // 20 verGreece; 14 langGreek (modern)-Grek-mono;
138 "is_IS", // 21 verIceland; 15 langIcelandic;
139 "mt_MT", // 22 verMalta; 16 langMaltese;
140 "el_CY", // 23 verCyprus; 14 langGreek?; el or tr? guess el # ""
141 "tr_TR", // 24 verTurkey; 17 langTurkish;
142 "hr_HR", // 25 verYugoCroatian; 18 langCroatian; * one-way mapping -> verCroatia
143 "nl_NL", // 26 KCHR, Netherlands; 4 langDutch; * one-way mapping
144 "nl_BE", // 27 KCHR, verFlemish; 34 langFlemish; * one-way mapping
145 "_CA", // 28 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
146 "_CA", // 29 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
147 "pt_PT", // 30 KCHR, Portugal; 8 langPortuguese; * one-way mapping
148 "nb_NO", // 31 KCHR, Norway; 9 langNorwegian (Bokmal); * one-way mapping # "no_NO"
149 "da_DK", // 32 KCHR, Denmark; 7 langDanish; * one-way mapping
150 "hi_IN", // 33 verIndiaHindi; 21 langHindi;
151 "ur_PK", // 34 verPakistanUrdu; 20 langUrdu;
152 "tr_TR", // 35 verTurkishModified; 17 langTurkish; * one-way mapping
153 "it_CH", // 36 verItalianSwiss; 3 langItalian;
154 "en_001", // 37 verInternational; 0 langEnglish; ASCII only # "en"
155 NULL, // 38 *unassigned; -1 none; * one-way mapping # ""
156 "ro_RO", // 39 verRomania; 37 langRomanian;
157 "grc", // 40 verGreekAncient; 148 langGreekAncient -Grek-poly; # "el_GR"
158 "lt_LT", // 41 verLithuania; 24 langLithuanian;
159 "pl_PL", // 42 verPoland; 25 langPolish;
160 "hu_HU", // 43 verHungary; 26 langHungarian;
161 "et_EE", // 44 verEstonia; 27 langEstonian;
162 "lv_LV", // 45 verLatvia; 28 langLatvian;
163 "se", // 46 verSami; 29 langSami;
164 "fo_FO", // 47 verFaroeIsl; 30 langFaroese;
165 "fa_IR", // 48 verIran; 31 langFarsi/Persian;
166 "ru_RU", // 49 verRussia; 32 langRussian;
167 "ga_IE", // 50 verIreland; 35 langIrishGaelic (no dots);
168 "ko_KR", // 51 verKorea; 23 langKorean;
169 "zh_CN", // 52 verChina; 33 langSimpChinese;
170 "zh_TW", // 53 verTaiwan; 19 langTradChinese;
171 "th_TH", // 54 verThailand; 22 langThai;
172 "und", // 55 verScriptGeneric; -1 none; # "" // <1.9>
173 "cs_CZ", // 56 verCzech; 38 langCzech;
174 "sk_SK", // 57 verSlovak; 39 langSlovak;
175 "und", // 58 verEastAsiaGeneric; -1 none; * one-way mapping # "" // <1.9>
176 "hu_HU", // 59 verMagyar; 26 langHungarian; * one-way mapping -> verHungary
177 "bn", // 60 verBengali; 67 langBengali; _IN or _BD? guess generic
178 "be_BY", // 61 verBelarus; 46 langBelorussian;
179 "uk_UA", // 62 verUkraine; 45 langUkrainian;
180 NULL, // 63 *unused; -1 none; * one-way mapping # ""
181 "el_GR", // 64 verGreeceAlt; 14 langGreek (modern)-Grek-mono; * one-way mapping
182 "sr_RS", // 65 verSerbian; 42 langSerbian -Cyrl; // <1.18>
183 "sl_SI", // 66 verSlovenian; 40 langSlovenian;
184 "mk_MK", // 67 verMacedonian; 43 langMacedonian;
185 "hr_HR", // 68 verCroatia; 18 langCroatian;
186 NULL, // 69 *unused; -1 none; * one-way mapping # ""
187 "de-1996", // 70 verGermanReformed; 2 langGerman; 1996 orthogr. # "de_DE"
188 "pt_BR", // 71 verBrazil; 8 langPortuguese;
189 "bg_BG", // 72 verBulgaria; 44 langBulgarian;
190 "ca_ES", // 73 verCatalonia; 130 langCatalan;
191 "mul", // 74 verMultilingual; -1 none; # ""
192 "gd", // 75 verScottishGaelic; 144 langScottishGaelic;
193 "gv", // 76 verManxGaelic; 145 langManxGaelic;
194 "br", // 77 verBreton; 142 langBreton;
195 "iu_CA", // 78 verNunavut; 143 langInuktitut -Cans;
196 "cy", // 79 verWelsh; 128 langWelsh;
197 "_CA", // 80 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
198 "ga-Latg_IE", // 81 verIrishGaelicScrip; 146 langIrishGaelicScript -dots; # "ga_IE" // <xx>
199 "en_CA", // 82 verEngCanada; 0 langEnglish;
200 "dz_BT", // 83 verBhutan; 137 langDzongkha;
201 "hy_AM", // 84 verArmenian; 51 langArmenian;
202 "ka_GE", // 85 verGeorgian; 52 langGeorgian;
203 "es_419", // 86 verSpLatinAmerica; 6 langSpanish; # "es"
204 "es_ES", // 87 KCHR, Spain; 6 langSpanish; * one-way mapping
205 "to_TO", // 88 verTonga; 147 langTongan;
206 "pl_PL", // 89 KCHR, Poland; 25 langPolish; * one-way mapping
207 "ca_ES", // 90 KCHR, Catalonia; 130 langCatalan; * one-way mapping
208 "fr_001", // 91 verFrenchUniversal; 1 langFrench;
209 "de_AT", // 92 verAustria; 2 langGerman;
210 "es_419", // 93 > verSpLatinAmerica; 6 langSpanish; * one-way mapping # "es"
211 "gu_IN", // 94 verGujarati; 69 langGujarati;
212 "pa", // 95 verPunjabi; 70 langPunjabi; _IN or _PK? guess generic
213 "ur_IN", // 96 verIndiaUrdu; 20 langUrdu;
214 "vi_VN", // 97 verVietnam; 80 langVietnamese;
215 "fr_BE", // 98 verFrBelgium; 1 langFrench;
216 "uz_UZ", // 99 verUzbek; 47 langUzbek;
217 "en_SG", // 100 verSingapore; 0 langEnglish?; en, zh, or ms? guess en # ""
218 "nn_NO", // 101 verNynorsk; 151 langNynorsk; # ""
219 "af_ZA", // 102 verAfrikaans; 141 langAfrikaans;
220 "eo", // 103 verEsperanto; 94 langEsperanto;
221 "mr_IN", // 104 verMarathi; 66 langMarathi;
222 "bo", // 105 verTibetan; 63 langTibetan;
223 "ne_NP", // 106 verNepal; 64 langNepali;
224 "kl", // 107 verGreenland; 149 langGreenlandic;
225 "en_IE", // 108 verIrelandEnglish; 0 langEnglish; # (no entry)
226 };
227 enum {
228 kNumRegionCodeToLocaleString = sizeof(regionCodeToLocaleString)/sizeof(char *)
229 };
230
231 static const char * const langCodeToLocaleString[] = {
232 // map LangCode (array index) to canonical locale string
233 //
234 // canon. string language code; [ comment] [ # __CFBundleLanguageAbbreviationsArray
235 // -------- -------------- ---------- -------- string, if different ]
236 "en", // 0 langEnglish;
237 "fr", // 1 langFrench;
238 "de", // 2 langGerman;
239 "it", // 3 langItalian;
240 "nl", // 4 langDutch;
241 "sv", // 5 langSwedish;
242 "es", // 6 langSpanish;
243 "da", // 7 langDanish;
244 "pt", // 8 langPortuguese;
245 "nb", // 9 langNorwegian (Bokmal); # "no"
246 "he", // 10 langHebrew -Hebr;
247 "ja", // 11 langJapanese -Jpan;
248 "ar", // 12 langArabic -Arab;
249 "fi", // 13 langFinnish;
250 "el", // 14 langGreek (modern)-Grek-mono;
251 "is", // 15 langIcelandic;
252 "mt", // 16 langMaltese -Latn;
253 "tr", // 17 langTurkish -Latn;
254 "hr", // 18 langCroatian;
255 "zh-Hant", // 19 langTradChinese; # "zh"
256 "ur", // 20 langUrdu -Arab;
257 "hi", // 21 langHindi -Deva;
258 "th", // 22 langThai -Thai;
259 "ko", // 23 langKorean -Hang;
260 "lt", // 24 langLithuanian;
261 "pl", // 25 langPolish;
262 "hu", // 26 langHungarian;
263 "et", // 27 langEstonian;
264 "lv", // 28 langLatvian;
265 "se", // 29 langSami;
266 "fo", // 30 langFaroese;
267 "fa", // 31 langFarsi/Persian -Arab;
268 "ru", // 32 langRussian -Cyrl;
269 "zh-Hans", // 33 langSimpChinese; # "zh"
270 "nl-BE", // 34 langFlemish (redundant, =Dutch); # "nl"
271 "ga", // 35 langIrishGaelic (no dots);
272 "sq", // 36 langAlbanian; no region codes
273 "ro", // 37 langRomanian;
274 "cs", // 38 langCzech;
275 "sk", // 39 langSlovak;
276 "sl", // 40 langSlovenian;
277 "yi", // 41 langYiddish -Hebr; no region codes
278 "sr", // 42 langSerbian -Cyrl;
279 "mk", // 43 langMacedonian -Cyrl;
280 "bg", // 44 langBulgarian -Cyrl;
281 "uk", // 45 langUkrainian -Cyrl;
282 "be", // 46 langBelorussian -Cyrl;
283 "uz-Cyrl", // 47 langUzbek -Cyrl; also -Latn, -Arab
284 "kk", // 48 langKazakh -Cyrl; no region codes; also -Latn, -Arab
285 "az-Cyrl", // 49 langAzerbaijani -Cyrl; no region codes # "az"
286 "az-Arab", // 50 langAzerbaijanAr -Arab; no region codes # "az"
287 "hy", // 51 langArmenian -Armn;
288 "ka", // 52 langGeorgian -Geor;
289 "mo", // 53 langMoldavian -Cyrl; no region codes
290 "ky", // 54 langKirghiz -Cyrl; no region codes; also -Latn, -Arab
291 "tg-Cyrl", // 55 langTajiki -Cyrl; no region codes; also -Latn, -Arab
292 "tk-Cyrl", // 56 langTurkmen -Cyrl; no region codes; also -Latn, -Arab
293 "mn-Mong", // 57 langMongolian -Mong; no region codes # "mn"
294 "mn-Cyrl", // 58 langMongolianCyr -Cyrl; no region codes # "mn"
295 "ps", // 59 langPashto -Arab; no region codes
296 "ku", // 60 langKurdish -Arab; no region codes
297 "ks", // 61 langKashmiri -Arab; no region codes
298 "sd", // 62 langSindhi -Arab; no region codes
299 "bo", // 63 langTibetan -Tibt;
300 "ne", // 64 langNepali -Deva;
301 "sa", // 65 langSanskrit -Deva; no region codes
302 "mr", // 66 langMarathi -Deva;
303 "bn", // 67 langBengali -Beng;
304 "as", // 68 langAssamese -Beng; no region codes
305 "gu", // 69 langGujarati -Gujr;
306 "pa", // 70 langPunjabi -Guru;
307 "or", // 71 langOriya -Orya; no region codes
308 "ml", // 72 langMalayalam -Mlym; no region codes
309 "kn", // 73 langKannada -Knda; no region codes
310 "ta", // 74 langTamil -Taml; no region codes
311 "te", // 75 langTelugu -Telu; no region codes
312 "si", // 76 langSinhalese -Sinh; no region codes
313 "my", // 77 langBurmese -Mymr; no region codes
314 "km", // 78 langKhmer -Khmr; no region codes
315 "lo", // 79 langLao -Laoo; no region codes
316 "vi", // 80 langVietnamese -Latn;
317 "id", // 81 langIndonesian -Latn; no region codes
318 "tl", // 82 langTagalog -Latn; no region codes
319 "ms", // 83 langMalayRoman -Latn; no region codes # "ms"
320 "ms-Arab", // 84 langMalayArabic -Arab; no region codes # "ms"
321 "am", // 85 langAmharic -Ethi; no region codes
322 "ti", // 86 langTigrinya -Ethi; no region codes
323 "om", // 87 langOromo -Ethi; no region codes
324 "so", // 88 langSomali -Latn; no region codes
325 "sw", // 89 langSwahili -Latn; no region codes
326 "rw", // 90 langKinyarwanda -Latn; no region codes
327 "rn", // 91 langRundi -Latn; no region codes
328 "ny", // 92 langNyanja/Chewa -Latn; no region codes # ""
329 "mg", // 93 langMalagasy -Latn; no region codes
330 "eo", // 94 langEsperanto -Latn;
331 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 95 to 105 (gap)
332 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 106 to 116 (gap)
333 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 107 to 117 (gap)
334 "cy", // 128 langWelsh -Latn;
335 "eu", // 129 langBasque -Latn; no region codes
336 "ca", // 130 langCatalan -Latn;
337 "la", // 131 langLatin -Latn; no region codes
338 "qu", // 132 langQuechua -Latn; no region codes
339 "gn", // 133 langGuarani -Latn; no region codes
340 "ay", // 134 langAymara -Latn; no region codes
341 "tt-Cyrl", // 135 langTatar -Cyrl; no region codes
342 "ug", // 136 langUighur -Arab; no region codes
343 "dz", // 137 langDzongkha -Tibt;
344 "jv", // 138 langJavaneseRom -Latn; no region codes
345 "su", // 139 langSundaneseRom -Latn; no region codes
346 "gl", // 140 langGalician -Latn; no region codes
347 "af", // 141 langAfrikaans -Latn;
348 "br", // 142 langBreton -Latn;
349 "iu", // 143 langInuktitut -Cans;
350 "gd", // 144 langScottishGaelic;
351 "gv", // 145 langManxGaelic -Latn;
352 "ga-Latg", // 146 langIrishGaelicScript -Latn-dots; # "ga" // <xx>
353 "to", // 147 langTongan -Latn;
354 "grc", // 148 langGreekAncient -Grek-poly; # "el"
355 "kl", // 149 langGreenlandic -Latn;
356 "az-Latn", // 150 langAzerbaijanRoman -Latn; no region codes # "az"
357 "nn", // 151 langNynorsk -Latn; # (no entry)
358 };
359 enum {
360 kNumLangCodeToLocaleString = sizeof(langCodeToLocaleString)/sizeof(char *)
361 };
362
363 static const KeyStringToResultString oldAppleLocaleToCanonical[] = {
364 // Map obsolete/old-style Apple strings to canonical
365 // Must be sorted according to how strcmp compares the strings in the first column
366 //
367 // non-canonical canonical [ comment ] # source/reason for non-canonical string
368 // string string
369 // ------------- ---------
370 { "Afrikaans", "af" }, // # __CFBundleLanguageNamesArray
371 { "Albanian", "sq" }, // # __CFBundleLanguageNamesArray
372 { "Amharic", "am" }, // # __CFBundleLanguageNamesArray
373 { "Arabic", "ar" }, // # __CFBundleLanguageNamesArray
374 { "Armenian", "hy" }, // # __CFBundleLanguageNamesArray
375 { "Assamese", "as" }, // # __CFBundleLanguageNamesArray
376 { "Aymara", "ay" }, // # __CFBundleLanguageNamesArray
377 { "Azerbaijani", "az" }, // -Arab,-Cyrl,-Latn? # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn")
378 { "Basque", "eu" }, // # __CFBundleLanguageNamesArray
379 { "Belarusian", "be" }, // # handle other names
380 { "Belorussian", "be" }, // # handle other names
381 { "Bengali", "bn" }, // # __CFBundleLanguageNamesArray
382 { "Brazilian Portugese", "pt-BR" }, // # from Installer.app Info.plist IFLanguages key, misspelled
383 { "Brazilian Portuguese", "pt-BR" }, // # correct spelling for above
384 { "Breton", "br" }, // # __CFBundleLanguageNamesArray
385 { "Bulgarian", "bg" }, // # __CFBundleLanguageNamesArray
386 { "Burmese", "my" }, // # __CFBundleLanguageNamesArray
387 { "Byelorussian", "be" }, // # __CFBundleLanguageNamesArray
388 { "Catalan", "ca" }, // # __CFBundleLanguageNamesArray
389 { "Chewa", "ny" }, // # handle other names
390 { "Chichewa", "ny" }, // # handle other names
391 { "Chinese", "zh" }, // -Hans,-Hant? # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans")
392 { "Chinese, Simplified", "zh-Hans" }, // # from Installer.app Info.plist IFLanguages key
393 { "Chinese, Traditional", "zh-Hant" }, // # correct spelling for below
394 { "Chinese, Tradtional", "zh-Hant" }, // # from Installer.app Info.plist IFLanguages key, misspelled
395 { "Croatian", "hr" }, // # __CFBundleLanguageNamesArray
396 { "Czech", "cs" }, // # __CFBundleLanguageNamesArray
397 { "Danish", "da" }, // # __CFBundleLanguageNamesArray
398 { "Dutch", "nl" }, // # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE")
399 { "Dzongkha", "dz" }, // # __CFBundleLanguageNamesArray
400 { "English", "en" }, // # __CFBundleLanguageNamesArray
401 { "Esperanto", "eo" }, // # __CFBundleLanguageNamesArray
402 { "Estonian", "et" }, // # __CFBundleLanguageNamesArray
403 { "Faroese", "fo" }, // # __CFBundleLanguageNamesArray
404 { "Farsi", "fa" }, // # __CFBundleLanguageNamesArray
405 { "Finnish", "fi" }, // # __CFBundleLanguageNamesArray
406 { "Flemish", "nl-BE" }, // # handle other names
407 { "French", "fr" }, // # __CFBundleLanguageNamesArray
408 { "Galician", "gl" }, // # __CFBundleLanguageNamesArray
409 { "Gallegan", "gl" }, // # handle other names
410 { "Georgian", "ka" }, // # __CFBundleLanguageNamesArray
411 { "German", "de" }, // # __CFBundleLanguageNamesArray
412 { "Greek", "el" }, // # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc")
413 { "Greenlandic", "kl" }, // # __CFBundleLanguageNamesArray
414 { "Guarani", "gn" }, // # __CFBundleLanguageNamesArray
415 { "Gujarati", "gu" }, // # __CFBundleLanguageNamesArray
416 { "Hawaiian", "haw" }, // # handle new languages
417 { "Hebrew", "he" }, // # __CFBundleLanguageNamesArray
418 { "Hindi", "hi" }, // # __CFBundleLanguageNamesArray
419 { "Hungarian", "hu" }, // # __CFBundleLanguageNamesArray
420 { "Icelandic", "is" }, // # __CFBundleLanguageNamesArray
421 { "Indonesian", "id" }, // # __CFBundleLanguageNamesArray
422 { "Inuktitut", "iu" }, // # __CFBundleLanguageNamesArray
423 { "Irish", "ga" }, // # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots")
424 { "Italian", "it" }, // # __CFBundleLanguageNamesArray
425 { "Japanese", "ja" }, // # __CFBundleLanguageNamesArray
426 { "Javanese", "jv" }, // # __CFBundleLanguageNamesArray
427 { "Kalaallisut", "kl" }, // # handle other names
428 { "Kannada", "kn" }, // # __CFBundleLanguageNamesArray
429 { "Kashmiri", "ks" }, // # __CFBundleLanguageNamesArray
430 { "Kazakh", "kk" }, // # __CFBundleLanguageNamesArray
431 { "Khmer", "km" }, // # __CFBundleLanguageNamesArray
432 { "Kinyarwanda", "rw" }, // # __CFBundleLanguageNamesArray
433 { "Kirghiz", "ky" }, // # __CFBundleLanguageNamesArray
434 { "Korean", "ko" }, // # __CFBundleLanguageNamesArray
435 { "Kurdish", "ku" }, // # __CFBundleLanguageNamesArray
436 { "Lao", "lo" }, // # __CFBundleLanguageNamesArray
437 { "Latin", "la" }, // # __CFBundleLanguageNamesArray
438 { "Latvian", "lv" }, // # __CFBundleLanguageNamesArray
439 { "Lithuanian", "lt" }, // # __CFBundleLanguageNamesArray
440 { "Macedonian", "mk" }, // # __CFBundleLanguageNamesArray
441 { "Malagasy", "mg" }, // # __CFBundleLanguageNamesArray
442 { "Malay", "ms" }, // -Latn,-Arab? # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab")
443 { "Malayalam", "ml" }, // # __CFBundleLanguageNamesArray
444 { "Maltese", "mt" }, // # __CFBundleLanguageNamesArray
445 { "Manx", "gv" }, // # __CFBundleLanguageNamesArray
446 { "Marathi", "mr" }, // # __CFBundleLanguageNamesArray
447 { "Moldavian", "mo" }, // # __CFBundleLanguageNamesArray
448 { "Mongolian", "mn" }, // -Mong,-Cyrl? # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl")
449 { "Nepali", "ne" }, // # __CFBundleLanguageNamesArray
450 { "Norwegian", "nb" }, // # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no")
451 { "Nyanja", "ny" }, // # __CFBundleLanguageNamesArray
452 { "Nynorsk", "nn" }, // # handle other names (no entry in __CFBundleLanguageNamesArray)
453 { "Oriya", "or" }, // # __CFBundleLanguageNamesArray
454 { "Oromo", "om" }, // # __CFBundleLanguageNamesArray
455 { "Panjabi", "pa" }, // # handle other names
456 { "Pashto", "ps" }, // # __CFBundleLanguageNamesArray
457 { "Persian", "fa" }, // # handle other names
458 { "Polish", "pl" }, // # __CFBundleLanguageNamesArray
459 { "Portuguese", "pt" }, // # __CFBundleLanguageNamesArray
460 { "Portuguese, Brazilian", "pt-BR" }, // # handle other names
461 { "Punjabi", "pa" }, // # __CFBundleLanguageNamesArray
462 { "Pushto", "ps" }, // # handle other names
463 { "Quechua", "qu" }, // # __CFBundleLanguageNamesArray
464 { "Romanian", "ro" }, // # __CFBundleLanguageNamesArray
465 { "Ruanda", "rw" }, // # handle other names
466 { "Rundi", "rn" }, // # __CFBundleLanguageNamesArray
467 { "Russian", "ru" }, // # __CFBundleLanguageNamesArray
468 { "Sami", "se" }, // # __CFBundleLanguageNamesArray
469 { "Sanskrit", "sa" }, // # __CFBundleLanguageNamesArray
470 { "Scottish", "gd" }, // # __CFBundleLanguageNamesArray
471 { "Serbian", "sr" }, // # __CFBundleLanguageNamesArray
472 { "Simplified Chinese", "zh-Hans" }, // # handle other names
473 { "Sindhi", "sd" }, // # __CFBundleLanguageNamesArray
474 { "Sinhalese", "si" }, // # __CFBundleLanguageNamesArray
475 { "Slovak", "sk" }, // # __CFBundleLanguageNamesArray
476 { "Slovenian", "sl" }, // # __CFBundleLanguageNamesArray
477 { "Somali", "so" }, // # __CFBundleLanguageNamesArray
478 { "Spanish", "es" }, // # __CFBundleLanguageNamesArray
479 { "Sundanese", "su" }, // # __CFBundleLanguageNamesArray
480 { "Swahili", "sw" }, // # __CFBundleLanguageNamesArray
481 { "Swedish", "sv" }, // # __CFBundleLanguageNamesArray
482 { "Tagalog", "tl" }, // # __CFBundleLanguageNamesArray
483 { "Tajik", "tg" }, // # handle other names
484 { "Tajiki", "tg" }, // # __CFBundleLanguageNamesArray
485 { "Tamil", "ta" }, // # __CFBundleLanguageNamesArray
486 { "Tatar", "tt" }, // # __CFBundleLanguageNamesArray
487 { "Telugu", "te" }, // # __CFBundleLanguageNamesArray
488 { "Thai", "th" }, // # __CFBundleLanguageNamesArray
489 { "Tibetan", "bo" }, // # __CFBundleLanguageNamesArray
490 { "Tigrinya", "ti" }, // # __CFBundleLanguageNamesArray
491 { "Tongan", "to" }, // # __CFBundleLanguageNamesArray
492 { "Traditional Chinese", "zh-Hant" }, // # handle other names
493 { "Turkish", "tr" }, // # __CFBundleLanguageNamesArray
494 { "Turkmen", "tk" }, // # __CFBundleLanguageNamesArray
495 { "Uighur", "ug" }, // # __CFBundleLanguageNamesArray
496 { "Ukrainian", "uk" }, // # __CFBundleLanguageNamesArray
497 { "Urdu", "ur" }, // # __CFBundleLanguageNamesArray
498 { "Uzbek", "uz" }, // # __CFBundleLanguageNamesArray
499 { "Vietnamese", "vi" }, // # __CFBundleLanguageNamesArray
500 { "Welsh", "cy" }, // # __CFBundleLanguageNamesArray
501 { "Yiddish", "yi" }, // # __CFBundleLanguageNamesArray
502 { "ar_??", "ar" }, // # from old MapScriptInfoAndISOCodes
503 { "az.Ar", "az-Arab" }, // # from old LocaleRefGetPartString
504 { "az.Cy", "az-Cyrl" }, // # from old LocaleRefGetPartString
505 { "az.La", "az-Latn" }, // # from old LocaleRefGetPartString
506 { "be_??", "be_BY" }, // # from old MapScriptInfoAndISOCodes
507 { "bn_??", "bn" }, // # from old LocaleRefGetPartString
508 { "bo_??", "bo" }, // # from old MapScriptInfoAndISOCodes
509 { "br_??", "br" }, // # from old MapScriptInfoAndISOCodes
510 { "cy_??", "cy" }, // # from old MapScriptInfoAndISOCodes
511 { "de-96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9>
512 { "de_96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9>
513 { "de_??", "de-1996" }, // # from old MapScriptInfoAndISOCodes
514 { "el.El-P", "grc" }, // # from old LocaleRefGetPartString
515 { "en-ascii", "en_001" }, // # from earlier version of tables in this file!
516 { "en_??", "en_001" }, // # from old MapScriptInfoAndISOCodes
517 { "eo_??", "eo" }, // # from old MapScriptInfoAndISOCodes
518 { "es_??", "es_419" }, // # from old MapScriptInfoAndISOCodes
519 { "es_XL", "es_419" }, // # from earlier version of tables in this file!
520 { "fr_??", "fr_001" }, // # from old MapScriptInfoAndISOCodes
521 { "ga-dots", "ga-Latg" }, // # from earlier version of tables in this file! // <1.8>
522 { "ga-dots_IE", "ga-Latg_IE" }, // # from earlier version of tables in this file! // <1.8>
523 { "ga.Lg", "ga-Latg" }, // # from old LocaleRefGetPartString // <1.8>
524 { "ga.Lg_IE", "ga-Latg_IE" }, // # from old LocaleRefGetPartString // <1.8>
525 { "gd_??", "gd" }, // # from old MapScriptInfoAndISOCodes
526 { "gv_??", "gv" }, // # from old MapScriptInfoAndISOCodes
527 { "jv.La", "jv" }, // # logical extension // <1.9>
528 { "jw.La", "jv" }, // # from old LocaleRefGetPartString
529 { "kk.Cy", "kk" }, // # from old LocaleRefGetPartString
530 { "kl.La", "kl" }, // # from old LocaleRefGetPartString
531 { "kl.La_GL", "kl_GL" }, // # from old LocaleRefGetPartString // <1.9>
532 { "lp_??", "se" }, // # from old MapScriptInfoAndISOCodes
533 { "mk_??", "mk_MK" }, // # from old MapScriptInfoAndISOCodes
534 { "mn.Cy", "mn-Cyrl" }, // # from old LocaleRefGetPartString
535 { "mn.Mn", "mn-Mong" }, // # from old LocaleRefGetPartString
536 { "ms.Ar", "ms-Arab" }, // # from old LocaleRefGetPartString
537 { "ms.La", "ms" }, // # from old LocaleRefGetPartString
538 { "nl-be", "nl-BE" }, // # from old LocaleRefGetPartString
539 { "nl-be_BE", "nl_BE" }, // # from old LocaleRefGetPartString
540 { "no-NO", "nb-NO" }, // # not handled by localeStringPrefixToCanonical
541 { "no-NO_NO", "nb-NO_NO" }, // # not handled by localeStringPrefixToCanonical
542 // { "no-bok_NO", "nb_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
543 // { "no-nyn_NO", "nn_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
544 // { "nya", "ny" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
545 { "pa_??", "pa" }, // # from old LocaleRefGetPartString
546 { "sa.Dv", "sa" }, // # from old LocaleRefGetPartString
547 { "sl_??", "sl_SI" }, // # from old MapScriptInfoAndISOCodes
548 { "sr_??", "sr_RS" }, // # from old MapScriptInfoAndISOCodes // <1.18>
549 { "su.La", "su" }, // # from old LocaleRefGetPartString
550 { "yi.He", "yi" }, // # from old LocaleRefGetPartString
551 { "zh-simp", "zh-Hans" }, // # from earlier version of tables in this file!
552 { "zh-trad", "zh-Hant" }, // # from earlier version of tables in this file!
553 { "zh.Ha-S", "zh-Hans" }, // # from old LocaleRefGetPartString
554 { "zh.Ha-S_CN", "zh_CN" }, // # from old LocaleRefGetPartString
555 { "zh.Ha-T", "zh-Hant" }, // # from old LocaleRefGetPartString
556 { "zh.Ha-T_TW", "zh_TW" }, // # from old LocaleRefGetPartString
557 };
558 enum {
559 kNumOldAppleLocaleToCanonical = sizeof(oldAppleLocaleToCanonical)/sizeof(KeyStringToResultString)
560 };
561
562 static const KeyStringToResultString localeStringPrefixToCanonical[] = {
563 // Map 3-letter & obsolete ISO 639 codes, plus obsolete RFC 3066 codes, to 2-letter ISO 639 code.
564 // (special cases for 'sh' handled separately)
565 // First column must be all lowercase; must be sorted according to how strcmp compares the strings in the first column.
566 //
567 // non-canonical canonical [ comment ] # source/reason for non-canonical string
568 // prefix prefix
569 // ------------- ---------
570
571 { "afr", "af" }, // Afrikaans
572 { "alb", "sq" }, // Albanian
573 { "amh", "am" }, // Amharic
574 { "ara", "ar" }, // Arabic
575 { "arm", "hy" }, // Armenian
576 { "asm", "as" }, // Assamese
577 { "aym", "ay" }, // Aymara
578 { "aze", "az" }, // Azerbaijani
579 { "baq", "eu" }, // Basque
580 { "bel", "be" }, // Belarusian
581 { "ben", "bn" }, // Bengali
582 { "bih", "bh" }, // Bihari
583 { "bod", "bo" }, // Tibetan
584 { "bos", "bs" }, // Bosnian
585 { "bre", "br" }, // Breton
586 { "bul", "bg" }, // Bulgarian
587 { "bur", "my" }, // Burmese
588 { "cat", "ca" }, // Catalan
589 { "ces", "cs" }, // Czech
590 { "che", "ce" }, // Chechen
591 { "chi", "zh" }, // Chinese
592 { "cor", "kw" }, // Cornish
593 { "cos", "co" }, // Corsican
594 { "cym", "cy" }, // Welsh
595 { "cze", "cs" }, // Czech
596 { "dan", "da" }, // Danish
597 { "deu", "de" }, // German
598 { "dut", "nl" }, // Dutch
599 { "dzo", "dz" }, // Dzongkha
600 { "ell", "el" }, // Greek, Modern (1453-)
601 { "eng", "en" }, // English
602 { "epo", "eo" }, // Esperanto
603 { "est", "et" }, // Estonian
604 { "eus", "eu" }, // Basque
605 { "fao", "fo" }, // Faroese
606 { "fas", "fa" }, // Persian
607 { "fin", "fi" }, // Finnish
608 { "fra", "fr" }, // French
609 { "fre", "fr" }, // French
610 { "geo", "ka" }, // Georgian
611 { "ger", "de" }, // German
612 { "gla", "gd" }, // Gaelic,Scottish
613 { "gle", "ga" }, // Irish
614 { "glg", "gl" }, // Gallegan
615 { "glv", "gv" }, // Manx
616 { "gre", "el" }, // Greek, Modern (1453-)
617 { "grn", "gn" }, // Guarani
618 { "guj", "gu" }, // Gujarati
619 { "heb", "he" }, // Hebrew
620 { "hin", "hi" }, // Hindi
621 { "hrv", "hr" }, // Croatian
622 { "hun", "hu" }, // Hungarian
623 { "hye", "hy" }, // Armenian
624 { "i-hak", "zh-hakka" }, // Hakka # deprecated RFC 3066
625 { "i-lux", "lb" }, // Luxembourgish # deprecated RFC 3066
626 { "i-navajo", "nv" }, // Navajo # deprecated RFC 3066
627 { "ice", "is" }, // Icelandic
628 { "iku", "iu" }, // Inuktitut
629 { "ile", "ie" }, // Interlingue
630 { "in", "id" }, // Indonesian # deprecated 639 code in -> id (1989)
631 { "ina", "ia" }, // Interlingua
632 { "ind", "id" }, // Indonesian
633 { "isl", "is" }, // Icelandic
634 { "ita", "it" }, // Italian
635 { "iw", "he" }, // Hebrew # deprecated 639 code iw -> he (1989)
636 { "jav", "jv" }, // Javanese
637 { "jaw", "jv" }, // Javanese # deprecated 639 code jaw -> jv (2001)
638 { "ji", "yi" }, // Yiddish # deprecated 639 code ji -> yi (1989)
639 { "jpn", "ja" }, // Japanese
640 { "kal", "kl" }, // Kalaallisut
641 { "kan", "kn" }, // Kannada
642 { "kas", "ks" }, // Kashmiri
643 { "kat", "ka" }, // Georgian
644 { "kaz", "kk" }, // Kazakh
645 { "khm", "km" }, // Khmer
646 { "kin", "rw" }, // Kinyarwanda
647 { "kir", "ky" }, // Kirghiz
648 { "kor", "ko" }, // Korean
649 { "kur", "ku" }, // Kurdish
650 { "lao", "lo" }, // Lao
651 { "lat", "la" }, // Latin
652 { "lav", "lv" }, // Latvian
653 { "lit", "lt" }, // Lithuanian
654 { "ltz", "lb" }, // Letzeburgesch
655 { "mac", "mk" }, // Macedonian
656 { "mal", "ml" }, // Malayalam
657 { "mar", "mr" }, // Marathi
658 { "may", "ms" }, // Malay
659 { "mkd", "mk" }, // Macedonian
660 { "mlg", "mg" }, // Malagasy
661 { "mlt", "mt" }, // Maltese
662 { "mol", "mo" }, // Moldavian
663 { "mon", "mn" }, // Mongolian
664 { "msa", "ms" }, // Malay
665 { "mya", "my" }, // Burmese
666 { "nep", "ne" }, // Nepali
667 { "nld", "nl" }, // Dutch
668 { "nno", "nn" }, // Norwegian Nynorsk
669 { "no", "nb" }, // Norwegian generic # ambiguous 639 code no -> nb
670 { "no-bok", "nb" }, // Norwegian Bokmal # deprecated RFC 3066 tag - used in old LocaleRefGetPartString
671 { "no-nyn", "nn" }, // Norwegian Nynorsk # deprecated RFC 3066 tag - used in old LocaleRefGetPartString
672 { "nob", "nb" }, // Norwegian Bokmal
673 { "nor", "nb" }, // Norwegian generic # ambiguous 639 code nor -> nb
674 { "nya", "ny" }, // Nyanja/Chewa/Chichewa # 3-letter code used in old LocaleRefGetPartString
675 { "oci", "oc" }, // Occitan/Provencal
676 { "ori", "or" }, // Oriya
677 { "orm", "om" }, // Oromo,Galla
678 { "pan", "pa" }, // Panjabi
679 { "per", "fa" }, // Persian
680 { "pol", "pl" }, // Polish
681 { "por", "pt" }, // Portuguese
682 { "pus", "ps" }, // Pushto
683 { "que", "qu" }, // Quechua
684 { "roh", "rm" }, // Raeto-Romance
685 { "ron", "ro" }, // Romanian
686 { "rum", "ro" }, // Romanian
687 { "run", "rn" }, // Rundi
688 { "rus", "ru" }, // Russian
689 { "san", "sa" }, // Sanskrit
690 { "scc", "sr" }, // Serbian
691 { "scr", "hr" }, // Croatian
692 { "sin", "si" }, // Sinhalese
693 { "slk", "sk" }, // Slovak
694 { "slo", "sk" }, // Slovak
695 { "slv", "sl" }, // Slovenian
696 { "sme", "se" }, // Sami,Northern
697 { "snd", "sd" }, // Sindhi
698 { "som", "so" }, // Somali
699 { "spa", "es" }, // Spanish
700 { "sqi", "sq" }, // Albanian
701 { "srp", "sr" }, // Serbian
702 { "sun", "su" }, // Sundanese
703 { "swa", "sw" }, // Swahili
704 { "swe", "sv" }, // Swedish
705 { "tam", "ta" }, // Tamil
706 { "tat", "tt" }, // Tatar
707 { "tel", "te" }, // Telugu
708 { "tgk", "tg" }, // Tajik
709 { "tgl", "tl" }, // Tagalog
710 { "tha", "th" }, // Thai
711 { "tib", "bo" }, // Tibetan
712 { "tir", "ti" }, // Tigrinya
713 { "ton", "to" }, // Tongan
714 { "tuk", "tk" }, // Turkmen
715 { "tur", "tr" }, // Turkish
716 { "uig", "ug" }, // Uighur
717 { "ukr", "uk" }, // Ukrainian
718 { "urd", "ur" }, // Urdu
719 { "uzb", "uz" }, // Uzbek
720 { "vie", "vi" }, // Vietnamese
721 { "wel", "cy" }, // Welsh
722 { "yid", "yi" }, // Yiddish
723 { "zho", "zh" }, // Chinese
724 };
725 enum {
726 kNumLocaleStringPrefixToCanonical = sizeof(localeStringPrefixToCanonical)/sizeof(KeyStringToResultString)
727 };
728
729
730 static const SpecialCaseUpdates specialCases[] = {
731 // Data for special cases
732 // a) The 3166 code CS was used for Czechoslovakia until 1993, when that country split and the code was
733 // replaced by CZ and SK. Then in 2003-07, the code YU (formerly designating all of Yugoslavia, then after
734 // the 1990s breakup just designating what is now Serbia and Montenegro) was changed to CS! Then after
735 // Serbia and Montenegro split, the code CS was replaced in 2006-09 with separate codes RS and ME. If we
736 // see CS but a language of cs or sk, we change CS to CZ or SK. Otherwise, we change CS (and old YU) to RS.
737 // b) The 639 code sh for Serbo-Croatian was also replaced in the 1990s by separate codes hr and sr, and
738 // deprecated in 2000. We guess which one to map it to as follows: If there is a region tag of HR we use
739 // hr; if there is a region tag of (now) RS we use sr; else we do not change it (not enough info).
740 // c) There are other codes that have been updated without these issues (eg. TP to TL), plus among the
741 // "exceptionally reserved" codes some are just alternates for standard codes (eg. UK for GB).
742 { NULL, "-UK", "GB", NULL, NULL }, // always change UK to GB (UK is "exceptionally reserved" to mean GB)
743 { NULL, "-TP", "TL", NULL, NULL }, // always change TP to TL (East Timor, code changed 2002-05)
744 { "cs", "-CS", "CZ", NULL, NULL }, // if language is cs, change CS (pre-1993 Czechoslovakia) to CZ (Czech Republic)
745 { "sk", "-CS", "SK", NULL, NULL }, // if language is sk, change CS (pre-1993 Czechoslovakia) to SK (Slovakia)
746 { NULL, "-CS", "RS", NULL, NULL }, // otherwise map CS (assume Serbia+Montenegro) to RS (Serbia)
747 { NULL, "-YU", "RS", NULL, NULL }, // also map old YU (assume Serbia+Montenegro) to RS (Serbia)
748 { "sh", "-HR", "hr", "-RS", "sr" }, // then if language is old 'sh' (SerboCroatian), change it to 'hr' (Croatian)
749 // if we find HR (Croatia) or to 'sr' (Serbian) if we find RS (Serbia).
750 // Note: Do this after changing YU/CS toRS as above.
751 { NULL, NULL, NULL, NULL, NULL } // terminator
752 };
753
754
755 static const KeyStringToResultString localeStringRegionToDefaults[] = {
756 // For some region-code suffixes, there are default substrings to strip off for canonical string.
757 // Must be sorted according to how strcmp compares the strings in the first column
758 //
759 // region default writing
760 // suffix system tags, strip comment
761 // -------- ------------- ---------
762 { "_CN", "-Hans" }, // mainland China, default is simplified
763 { "_HK", "-Hant" }, // Hong Kong, default is traditional
764 { "_MO", "-Hant" }, // Macao, default is traditional
765 { "_SG", "-Hans" }, // Singapore, default is simplified
766 { "_TW", "-Hant" }, // Taiwan, default is traditional
767 };
768 enum {
769 kNumLocaleStringRegionToDefaults = sizeof(localeStringRegionToDefaults)/sizeof(KeyStringToResultString)
770 };
771
772 static const KeyStringToResultString localeStringPrefixToDefaults[] = {
773 // For some initial portions of language tag, there are default substrings to strip off for canonical string.
774 // Must be sorted according to how strcmp compares the strings in the first column
775 //
776 // language default writing
777 // tag prefix system tags, strip comment
778 // -------- ------------- ---------
779 { "ab-", "-Cyrl" }, // Abkhazian
780 { "af-", "-Latn" }, // Afrikaans
781 { "am-", "-Ethi" }, // Amharic
782 { "ar-", "-Arab" }, // Arabic
783 { "as-", "-Beng" }, // Assamese
784 { "ay-", "-Latn" }, // Aymara
785 { "be-", "-Cyrl" }, // Belarusian
786 { "bg-", "-Cyrl" }, // Bulgarian
787 { "bn-", "-Beng" }, // Bengali
788 { "bo-", "-Tibt" }, // Tibetan (? not Suppress-Script)
789 { "br-", "-Latn" }, // Breton (? not Suppress-Script)
790 { "bs-", "-Latn" }, // Bosnian
791 { "ca-", "-Latn" }, // Catalan
792 { "cs-", "-Latn" }, // Czech
793 { "cy-", "-Latn" }, // Welsh
794 { "da-", "-Latn" }, // Danish
795 { "de-", "-Latn -1901" }, // German, traditional orthography
796 { "dv-", "-Thaa" }, // Divehi/Maldivian
797 { "dz-", "-Tibt" }, // Dzongkha
798 { "el-", "-Grek" }, // Greek (modern, monotonic)
799 { "en-", "-Latn" }, // English
800 { "eo-", "-Latn" }, // Esperanto
801 { "es-", "-Latn" }, // Spanish
802 { "et-", "-Latn" }, // Estonian
803 { "eu-", "-Latn" }, // Basque
804 { "fa-", "-Arab" }, // Farsi
805 { "fi-", "-Latn" }, // Finnish
806 { "fo-", "-Latn" }, // Faroese
807 { "fr-", "-Latn" }, // French
808 { "ga-", "-Latn" }, // Irish
809 { "gd-", "-Latn" }, // Scottish Gaelic (? not Suppress-Script)
810 { "gl-", "-Latn" }, // Galician
811 { "gn-", "-Latn" }, // Guarani
812 { "gu-", "-Gujr" }, // Gujarati
813 { "gv-", "-Latn" }, // Manx
814 { "haw-", "-Latn" }, // Hawaiian (? not Suppress-Script)
815 { "he-", "-Hebr" }, // Hebrew
816 { "hi-", "-Deva" }, // Hindi
817 { "hr-", "-Latn" }, // Croatian
818 { "hu-", "-Latn" }, // Hungarian
819 { "hy-", "-Armn" }, // Armenian
820 { "id-", "-Latn" }, // Indonesian
821 { "is-", "-Latn" }, // Icelandic
822 { "it-", "-Latn" }, // Italian
823 { "ja-", "-Jpan" }, // Japanese
824 { "ka-", "-Geor" }, // Georgian
825 { "kk-", "-Cyrl" }, // Kazakh
826 { "kl-", "-Latn" }, // Kalaallisut/Greenlandic
827 { "km-", "-Khmr" }, // Central Khmer
828 { "kn-", "-Knda" }, // Kannada
829 { "ko-", "-Hang" }, // Korean (? not Suppress-Script)
830 { "kok-", "-Deva" }, // Konkani
831 { "la-", "-Latn" }, // Latin
832 { "lb-", "-Latn" }, // Luxembourgish
833 { "lo-", "-Laoo" }, // Lao
834 { "lt-", "-Latn" }, // Lithuanian
835 { "lv-", "-Latn" }, // Latvian
836 { "mg-", "-Latn" }, // Malagasy
837 { "mk-", "-Cyrl" }, // Macedonian
838 { "ml-", "-Mlym" }, // Malayalam
839 { "mo-", "-Latn" }, // Moldavian
840 { "mr-", "-Deva" }, // Marathi
841 { "ms-", "-Latn" }, // Malay
842 { "mt-", "-Latn" }, // Maltese
843 { "my-", "-Mymr" }, // Burmese/Myanmar
844 { "nb-", "-Latn" }, // Norwegian Bokmal
845 { "ne-", "-Deva" }, // Nepali
846 { "nl-", "-Latn" }, // Dutch
847 { "nn-", "-Latn" }, // Norwegian Nynorsk
848 { "ny-", "-Latn" }, // Chichewa/Nyanja
849 { "om-", "-Latn" }, // Oromo
850 { "or-", "-Orya" }, // Oriya
851 { "pa-", "-Guru" }, // Punjabi
852 { "pl-", "-Latn" }, // Polish
853 { "ps-", "-Arab" }, // Pushto
854 { "pt-", "-Latn" }, // Portuguese
855 { "qu-", "-Latn" }, // Quechua
856 { "rn-", "-Latn" }, // Rundi
857 { "ro-", "-Latn" }, // Romanian
858 { "ru-", "-Cyrl" }, // Russian
859 { "rw-", "-Latn" }, // Kinyarwanda
860 { "sa-", "-Deva" }, // Sanskrit (? not Suppress-Script)
861 { "se-", "-Latn" }, // Sami (? not Suppress-Script)
862 { "si-", "-Sinh" }, // Sinhala
863 { "sk-", "-Latn" }, // Slovak
864 { "sl-", "-Latn" }, // Slovenian
865 { "so-", "-Latn" }, // Somali
866 { "sq-", "-Latn" }, // Albanian
867 { "sv-", "-Latn" }, // Swedish
868 { "sw-", "-Latn" }, // Swahili
869 { "ta-", "-Taml" }, // Tamil
870 { "te-", "-Telu" }, // Telugu
871 { "th-", "-Thai" }, // Thai
872 { "ti-", "-Ethi" }, // Tigrinya
873 { "tl-", "-Latn" }, // Tagalog
874 { "tn-", "-Latn" }, // Tswana
875 { "to-", "-Latn" }, // Tonga of Tonga Islands
876 { "tr-", "-Latn" }, // Turkish
877 { "uk-", "-Cyrl" }, // Ukrainian
878 { "ur-", "-Arab" }, // Urdu
879 { "vi-", "-Latn" }, // Vietnamese
880 { "wo-", "-Latn" }, // Wolof
881 { "xh-", "-Latn" }, // Xhosa
882 { "yi-", "-Hebr" }, // Yiddish
883 { "zh-", "-Hani" }, // Chinese (? not Suppress-Script)
884 { "zu-", "-Latn" }, // Zulu
885 };
886 enum {
887 kNumLocaleStringPrefixToDefaults = sizeof(localeStringPrefixToDefaults)/sizeof(KeyStringToResultString)
888 };
889
890 static const KeyStringToResultString appleLocaleToLanguageString[] = {
891 // Map locale strings that Apple uses as language IDs to real language strings.
892 // Must be sorted according to how strcmp compares the strings in the first column.
893 // Note: Now we remove all transforms of the form ll_RR -> ll-RR, they are now
894 // handled in the code. <1.19>
895 //
896 // locale lang [ comment ]
897 // string string
898 // ------- -------
899 { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752]
900 { "zh_CN", "zh-Hans" }, // mainland China => simplified
901 { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used
902 { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used
903 { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used
904 { "zh_TW", "zh-Hant" }, // Taiwan => traditional
905 };
906 enum {
907 kNumAppleLocaleToLanguageString = sizeof(appleLocaleToLanguageString)/sizeof(KeyStringToResultString)
908 };
909
910 static const KeyStringToResultString appleLocaleToLanguageStringForCFBundle[] = {
911 // Map locale strings that Apple uses as language IDs to real language strings.
912 // Must be sorted according to how strcmp compares the strings in the first column.
913 //
914 // locale lang [ comment ]
915 // string string
916 // ------- -------
917 { "de_AT", "de-AT" }, // Austrian German
918 { "de_CH", "de-CH" }, // Swiss German
919 // { "de_DE", "de-DE" }, // German for Germany (default), not currently used
920 { "en_AU", "en-AU" }, // Australian English
921 { "en_CA", "en-CA" }, // Canadian English
922 { "en_GB", "en-GB" }, // British English
923 // { "en_IE", "en-IE" }, // Irish English, not currently used
924 { "en_US", "en-US" }, // U.S. English
925 { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752]
926 // { "fr_BE", "fr-BE" }, // Belgian French, not currently used
927 { "fr_CA", "fr-CA" }, // Canadian French
928 { "fr_CH", "fr-CH" }, // Swiss French
929 // { "fr_FR", "fr-FR" }, // French for France (default), not currently used
930 { "nl_BE", "nl-BE" }, // Flemish = Vlaams, Dutch for Belgium
931 // { "nl_NL", "nl-NL" }, // Dutch for Netherlands (default), not currently used
932 { "pt_BR", "pt-BR" }, // Brazilian Portuguese
933 { "pt_PT", "pt-PT" }, // Portuguese for Portugal
934 { "zh_CN", "zh-Hans" }, // mainland China => simplified
935 { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used
936 { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used
937 { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used
938 { "zh_TW", "zh-Hant" }, // Taiwan => traditional
939 };
940 enum {
941 kNumAppleLocaleToLanguageStringForCFBundle = sizeof(appleLocaleToLanguageStringForCFBundle)/sizeof(KeyStringToResultString)
942 };
943
944
945 struct LocaleToLegacyCodes {
946 const char * locale; // reduced to language plus one other component (script, region, variant), separators normalized to'_'
947 RegionCode regCode;
948 LangCode langCode;
949 CFStringEncoding encoding;
950 };
951 typedef struct LocaleToLegacyCodes LocaleToLegacyCodes;
952
953 static const LocaleToLegacyCodes localeToLegacyCodes[] = {
954 // locale RegionCode LangCode CFStringEncoding
955 { "af"/*ZA*/, 102/*verAfrikaans*/, 141/*langAfrikaans*/, 0/*Roman*/ }, // Latn
956 { "am", -1, 85/*langAmharic*/, 28/*Ethiopic*/ }, // Ethi
957 { "ar", 16/*verArabic*/, 12/*langArabic*/, 4/*Arabic*/ }, // Arab;
958 { "as", -1, 68/*langAssamese*/, 13/*Bengali*/ }, // Beng;
959 { "ay", -1, 134/*langAymara*/, 0/*Roman*/ }, // Latn;
960 { "az", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // assume "az" defaults to -Cyrl
961 { "az_Arab", -1, 50/*langAzerbaijanAr*/, 4/*Arabic*/ }, // Arab;
962 { "az_Cyrl", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // Cyrl;
963 { "az_Latn", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // Latn;
964 { "be"/*BY*/, 61/*verBelarus*/, 46/*langBelorussian*/, 7/*Cyrillic*/ }, // Cyrl;
965 { "bg"/*BG*/, 72/*verBulgaria*/, 44/*langBulgarian*/, 7/*Cyrillic*/ }, // Cyrl;
966 { "bn", 60/*verBengali*/, 67/*langBengali*/, 13/*Bengali*/ }, // Beng;
967 { "bo", 105/*verTibetan*/, 63/*langTibetan*/, 26/*Tibetan*/ }, // Tibt;
968 { "br", 77/*verBreton*/, 142/*langBreton*/, 39/*Celtic*/ }, // Latn;
969 { "ca"/*ES*/, 73/*verCatalonia*/, 130/*langCatalan*/, 0/*Roman*/ }, // Latn;
970 { "cs"/*CZ*/, 56/*verCzech*/, 38/*langCzech*/, 29/*CentralEurRoman*/ }, // Latn;
971 { "cy", 79/*verWelsh*/, 128/*langWelsh*/, 39/*Celtic*/ }, // Latn;
972 { "da"/*DK*/, 9/*verDenmark*/, 7/*langDanish*/, 0/*Roman*/ }, // Latn;
973 { "de", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, // assume "de" defaults to verGermany
974 { "de_1996", 70/*verGermanReformed*/, 2/*langGerman*/, 0/*Roman*/ },
975 { "de_AT", 92/*verAustria*/, 2/*langGerman*/, 0/*Roman*/ },
976 { "de_CH", 19/*verGrSwiss*/, 2/*langGerman*/, 0/*Roman*/ },
977 { "de_DE", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ },
978 { "dz"/*BT*/, 83/*verBhutan*/, 137/*langDzongkha*/, 26/*Tibetan*/ }, // Tibt;
979 { "el", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // assume "el" defaults to verGreece
980 { "el_CY", 23/*verCyprus*/, 14/*langGreek*/, 6/*Greek*/ },
981 { "el_GR", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // modern monotonic
982 { "en", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, // "en" defaults to verUS (per Chris Hansten)
983 { "en_001", 37/*verInternational*/, 0/*langEnglish*/, 0/*Roman*/ },
984 { "en_AU", 15/*verAustralia*/, 0/*langEnglish*/, 0/*Roman*/ },
985 { "en_CA", 82/*verEngCanada*/, 0/*langEnglish*/, 0/*Roman*/ },
986 { "en_GB", 2/*verBritain*/, 0/*langEnglish*/, 0/*Roman*/ },
987 { "en_IE", 108/*verIrelandEnglish*/, 0/*langEnglish*/, 0/*Roman*/ },
988 { "en_SG", 100/*verSingapore*/, 0/*langEnglish*/, 0/*Roman*/ },
989 { "en_US", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ },
990 { "eo", 103/*verEsperanto*/, 94/*langEsperanto*/, 0/*Roman*/ }, // Latn;
991 { "es", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, // "es" defaults to verSpain (per Chris Hansten)
992 { "es_419", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, // new BCP 47 tag
993 { "es_ES", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ },
994 { "es_MX", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ },
995 { "es_US", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ },
996 { "et"/*EE*/, 44/*verEstonia*/, 27/*langEstonian*/, 29/*CentralEurRoman*/ },
997 { "eu", -1, 129/*langBasque*/, 0/*Roman*/ }, // Latn;
998 { "fa"/*IR*/, 48/*verIran*/, 31/*langFarsi/Persian*/, 0x8C/*Farsi*/ }, // Arab;
999 { "fi"/*FI*/, 17/*verFinland*/, 13/*langFinnish*/, 0/*Roman*/ },
1000 { "fo"/*FO*/, 47/*verFaroeIsl*/, 30/*langFaroese*/, 37/*Icelandic*/ },
1001 { "fr", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, // "fr" defaults to verFrance (per Chris Hansten)
1002 { "fr_001", 91/*verFrenchUniversal*/, 1/*langFrench*/, 0/*Roman*/ },
1003 { "fr_BE", 98/*verFrBelgium*/, 1/*langFrench*/, 0/*Roman*/ },
1004 { "fr_CA", 11/*verFrCanada*/, 1/*langFrench*/, 0/*Roman*/ },
1005 { "fr_CH", 18/*verFrSwiss*/, 1/*langFrench*/, 0/*Roman*/ },
1006 { "fr_FR", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ },
1007 { "ga"/*IE*/, 50/*verIreland*/, 35/*langIrishGaelic*/, 0/*Roman*/ }, // no dots (h after)
1008 { "ga_Latg"/*IE*/, 81/*verIrishGaelicScrip*/, 146/*langIrishGaelicScript*/, 40/*Gaelic*/ }, // using dots
1009 { "gd", 75/*verScottishGaelic*/, 144/*langScottishGaelic*/, 39/*Celtic*/ },
1010 { "gl", -1, 140/*langGalician*/, 0/*Roman*/ }, // Latn;
1011 { "gn", -1, 133/*langGuarani*/, 0/*Roman*/ }, // Latn;
1012 { "grc", 40/*verGreekAncient*/, 148/*langGreekAncient*/, 6/*Greek*/ }, // polytonic (MacGreek doesn't actually support it)
1013 { "gu"/*IN*/, 94/*verGujarati*/, 69/*langGujarati*/, 11/*Gujarati*/ }, // Gujr;
1014 { "gv", 76/*verManxGaelic*/, 145/*langManxGaelic*/, 39/*Celtic*/ }, // Latn;
1015 { "he"/*IL*/, 13/*verIsrael*/, 10/*langHebrew*/, 5/*Hebrew*/ }, // Hebr;
1016 { "hi"/*IN*/, 33/*verIndiaHindi*/, 21/*langHindi*/, 9/*Devanagari*/ }, // Deva;
1017 { "hr"/*HR*/, 68/*verCroatia*/, 18/*langCroatian*/, 36/*Croatian*/ },
1018 { "hu"/*HU*/, 43/*verHungary*/, 26/*langHungarian*/, 29/*CentralEurRoman*/ },
1019 { "hy"/*AM*/, 84/*verArmenian*/, 51/*langArmenian*/, 24/*Armenian*/ }, // Armn;
1020 { "id", -1, 81/*langIndonesian*/, 0/*Roman*/ }, // Latn;
1021 { "is"/*IS*/, 21/*verIceland*/, 15/*langIcelandic*/, 37/*Icelandic*/ },
1022 { "it", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, // "it" defaults to verItaly
1023 { "it_CH", 36/*verItalianSwiss*/, 3/*langItalian*/, 0/*Roman*/ },
1024 { "it_IT", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ },
1025 { "iu"/*CA*/, 78/*verNunavut*/, 143/*langInuktitut*/, 0xEC/*Inuit*/ }, // Cans;
1026 { "ja"/*JP*/, 14/*verJapan*/, 11/*langJapanese*/, 1/*Japanese*/ }, // Jpan;
1027 { "jv", -1, 138/*langJavaneseRom*/, 0/*Roman*/ }, // Latn;
1028 { "ka"/*GE*/, 85/*verGeorgian*/, 52/*langGeorgian*/, 23/*Georgian*/ }, // Geor;
1029 { "kk", -1, 48/*langKazakh*/, 7/*Cyrillic*/ }, // "kk" defaults to -Cyrl; also have -Latn, -Arab
1030 { "kl", 107/*verGreenland*/, 149/*langGreenlandic*/, 0/*Roman*/ }, // Latn;
1031 { "km", -1, 78/*langKhmer*/, 20/*Khmer*/ }, // Khmr;
1032 { "kn", -1, 73/*langKannada*/, 16/*Kannada*/ }, // Knda;
1033 { "ko"/*KR*/, 51/*verKorea*/, 23/*langKorean*/, 3/*Korean*/ }, // Hang;
1034 { "ks", -1, 61/*langKashmiri*/, 4/*Arabic*/ }, // Arab;
1035 { "ku", -1, 60/*langKurdish*/, 4/*Arabic*/ }, // Arab;
1036 { "ky", -1, 54/*langKirghiz*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1037 { "la", -1, 131/*langLatin*/, 0/*Roman*/ }, // Latn;
1038 { "lo", -1, 79/*langLao*/, 22/*Laotian*/ }, // Laoo;
1039 { "lt"/*LT*/, 41/*verLithuania*/, 24/*langLithuanian*/, 29/*CentralEurRoman*/ },
1040 { "lv"/*LV*/, 45/*verLatvia*/, 28/*langLatvian*/, 29/*CentralEurRoman*/ },
1041 { "mg", -1, 93/*langMalagasy*/, 0/*Roman*/ }, // Latn;
1042 { "mk"/*MK*/, 67/*verMacedonian*/, 43/*langMacedonian*/, 7/*Cyrillic*/ }, // Cyrl;
1043 { "ml", -1, 72/*langMalayalam*/, 17/*Malayalam*/ }, // Mlym;
1044 { "mn", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // "mn" defaults to -Mong
1045 { "mn_Cyrl", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // Cyrl;
1046 { "mn_Mong", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // Mong;
1047 { "mo", -1, 53/*langMoldavian*/, 7/*Cyrillic*/ }, // Cyrl;
1048 { "mr"/*IN*/, 104/*verMarathi*/, 66/*langMarathi*/, 9/*Devanagari*/ }, // Deva;
1049 { "ms", -1, 83/*langMalayRoman*/, 0/*Roman*/ }, // "ms" defaults to -Latn;
1050 { "ms_Arab", -1, 84/*langMalayArabic*/, 4/*Arabic*/ }, // Arab;
1051 { "mt"/*MT*/, 22/*verMalta*/, 16/*langMaltese*/, 0/*Roman*/ }, // Latn;
1052 { "mul", 74/*verMultilingual*/, -1, 0 },
1053 { "my", -1, 77/*langBurmese*/, 19/*Burmese*/ }, // Mymr;
1054 { "nb"/*NO*/, 12/*verNorway*/, 9/*langNorwegian*/, 0/*Roman*/ },
1055 { "ne"/*NP*/, 106/*verNepal*/, 64/*langNepali*/, 9/*Devanagari*/ }, // Deva;
1056 { "nl", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, // "nl" defaults to verNetherlands
1057 { "nl_BE", 6/*verFlemish*/, 34/*langFlemish*/, 0/*Roman*/ },
1058 { "nl_NL", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ },
1059 { "nn"/*NO*/, 101/*verNynorsk*/, 151/*langNynorsk*/, 0/*Roman*/ },
1060 { "ny", -1, 92/*langNyanja/Chewa*/, 0/*Roman*/ }, // Latn;
1061 { "om", -1, 87/*langOromo*/, 28/*Ethiopic*/ }, // Ethi;
1062 { "or", -1, 71/*langOriya*/, 12/*Oriya*/ }, // Orya;
1063 { "pa", 95/*verPunjabi*/, 70/*langPunjabi*/, 10/*Gurmukhi*/ }, // Guru;
1064 { "pl"/*PL*/, 42/*verPoland*/, 25/*langPolish*/, 29/*CentralEurRoman*/ },
1065 { "ps", -1, 59/*langPashto*/, 0x8C/*Farsi*/ }, // Arab;
1066 { "pt", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, // "pt" defaults to verBrazil (per Chris Hansten)
1067 { "pt_BR", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ },
1068 { "pt_PT", 10/*verPortugal*/, 8/*langPortuguese*/, 0/*Roman*/ },
1069 { "qu", -1, 132/*langQuechua*/, 0/*Roman*/ }, // Latn;
1070 { "rn", -1, 91/*langRundi*/, 0/*Roman*/ }, // Latn;
1071 { "ro"/*RO*/, 39/*verRomania*/, 37/*langRomanian*/, 38/*Romanian*/ },
1072 { "ru"/*RU*/, 49/*verRussia*/, 32/*langRussian*/, 7/*Cyrillic*/ }, // Cyrl;
1073 { "rw", -1, 90/*langKinyarwanda*/, 0/*Roman*/ }, // Latn;
1074 { "sa", -1, 65/*langSanskrit*/, 9/*Devanagari*/ }, // Deva;
1075 { "sd", -1, 62/*langSindhi*/, 0x8C/*Farsi*/ }, // Arab;
1076 { "se", 46/*verSami*/, 29/*langSami*/, 0/*Roman*/ },
1077 { "si", -1, 76/*langSinhalese*/, 18/*Sinhalese*/ }, // Sinh;
1078 { "sk"/*SK*/, 57/*verSlovak*/, 39/*langSlovak*/, 29/*CentralEurRoman*/ },
1079 { "sl"/*SI*/, 66/*verSlovenian*/, 40/*langSlovenian*/, 36/*Croatian*/ },
1080 { "so", -1, 88/*langSomali*/, 0/*Roman*/ }, // Latn;
1081 { "sq", -1, 36/*langAlbanian*/, 0/*Roman*/ },
1082 { "sr"/*CS,RS*/, 65/*verSerbian*/, 42/*langSerbian*/, 7/*Cyrillic*/ }, // Cyrl;
1083 { "su", -1, 139/*langSundaneseRom*/, 0/*Roman*/ }, // Latn;
1084 { "sv"/*SE*/, 7/*verSweden*/, 5/*langSwedish*/, 0/*Roman*/ },
1085 { "sw", -1, 89/*langSwahili*/, 0/*Roman*/ }, // Latn;
1086 { "ta", -1, 74/*langTamil*/, 14/*Tamil*/ }, // Taml;
1087 { "te", -1, 75/*langTelugu*/, 15/*Telugu*/ }, // Telu
1088 { "tg", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // "tg" defaults to "Cyrl"
1089 { "tg_Cyrl", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1090 { "th"/*TH*/, 54/*verThailand*/, 22/*langThai*/, 21/*Thai*/ }, // Thai;
1091 { "ti", -1, 86/*langTigrinya*/, 28/*Ethiopic*/ }, // Ethi;
1092 { "tk", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // "tk" defaults to Cyrl
1093 { "tk_Cyrl", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1094 { "tl", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn;
1095 { "to"/*TO*/, 88/*verTonga*/, 147/*langTongan*/, 0/*Roman*/ }, // Latn;
1096 { "tr"/*TR*/, 24/*verTurkey*/, 17/*langTurkish*/, 35/*Turkish*/ }, // Latn;
1097 { "tt", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl;
1098 { "tt_Cyrl", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl;
1099 { "ug", -1, 136/*langUighur*/, 4/*Arabic*/ }, // Arab;
1100 { "uk"/*UA*/, 62/*verUkraine*/, 45/*langUkrainian*/, 7/*Cyrillic*/ }, // Cyrl;
1101 { "und", 55/*verScriptGeneric*/, -1, 0 },
1102 { "ur", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // "ur" defaults to verPakistanUrdu
1103 { "ur_IN", 96/*verIndiaUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab
1104 { "ur_PK", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab
1105 { "uz"/*UZ*/, 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1106 { "uz_Cyrl", 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ },
1107 { "vi"/*VN*/, 97/*verVietnam*/, 80/*langVietnamese*/, 30/*Vietnamese*/ }, // Latn
1108 { "yi", -1, 41/*langYiddish*/, 5/*Hebrew*/ }, // Hebr;
1109 { "zh", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, // "zh" defaults to verChina, langSimpChinese
1110 { "zh_CN", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
1111 { "zh_HK", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1112 { "zh_Hans", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
1113 { "zh_Hant", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1114 { "zh_MO", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1115 { "zh_SG", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
1116 { "zh_TW", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1117 };
1118 enum {
1119 kNumLocaleToLegacyCodes = sizeof(localeToLegacyCodes)/sizeof(localeToLegacyCodes[0])
1120 };
1121
1122 /*
1123 For reference here is a list of ICU locales with variants and how some
1124 of them are canonicalized with the ICU function uloc_canonicalize:
1125
1126 ICU 3.0 has:
1127 en_US_POSIX x no change
1128 hy_AM_REVISED x no change
1129 ja_JP_TRADITIONAL -> ja_JP@calendar=japanese
1130 th_TH_TRADITIONAL -> th_TH@calendar=buddhist
1131
1132 ICU 2.8 also had the following (now obsolete):
1133 ca_ES_PREEURO
1134 de__PHONEBOOK -> de@collation=phonebook
1135 de_AT_PREEURO
1136 de_DE_PREEURO
1137 de_LU_PREEURO
1138 el_GR_PREEURO
1139 en_BE_PREEURO
1140 en_GB_EURO -> en_GB@currency=EUR
1141 en_IE_PREEURO -> en_IE@currency=IEP
1142 es__TRADITIONAL -> es@collation=traditional
1143 es_ES_PREEURO
1144 eu_ES_PREEURO
1145 fi_FI_PREEURO
1146 fr_BE_PREEURO
1147 fr_FR_PREEURO -> fr_FR@currency=FRF
1148 fr_LU_PREEURO
1149 ga_IE_PREEURO
1150 gl_ES_PREEURO
1151 hi__DIRECT -> hi@collation=direct
1152 it_IT_PREEURO
1153 nl_BE_PREEURO
1154 nl_NL_PREEURO
1155 pt_PT_PREEURO
1156 zh__PINYIN -> zh@collation=pinyin
1157 zh_TW_STROKE -> zh_TW@collation=stroke
1158
1159 */
1160
1161 // _CompareTestEntryToTableEntryKey
1162 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1163 // comparison function for bsearch
1164 static int _CompareTestEntryToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
1165 return strcmp( ((const KeyStringToResultString *)testEntryPtr)->key, ((const KeyStringToResultString *)tableEntryKeyPtr)->key );
1166 }
1167
1168 // _CompareTestEntryPrefixToTableEntryKey
1169 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1170 // Comparison function for bsearch. Assumes prefix IS terminated with '-' or '_'.
1171 // Do the following instead of strlen & strncmp so we don't walk tableEntry key twice.
1172 static int _CompareTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
1173 const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key;
1174 const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key;
1175
1176 while ( *testPtr == *tablePtr && *tablePtr != 0 ) {
1177 testPtr++; tablePtr++;
1178 }
1179 if ( *tablePtr != 0 ) {
1180 // strings are different, and the string in the table has not run out;
1181 // i.e. the table entry is not a prefix of the text string.
1182 return ( *testPtr < *tablePtr )? -1: 1;
1183 }
1184 return 0;
1185 }
1186
1187 // _CompareLowerTestEntryPrefixToTableEntryKey
1188 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1189 // Comparison function for bsearch. Assumes prefix NOT terminated with '-' or '_'.
1190 // Lowercases the test string before comparison (the table should already have lowercased entries).
1191 static int _CompareLowerTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
1192 const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key;
1193 const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key;
1194 char lowerTestChar;
1195
1196 while ( (lowerTestChar = tolower(*testPtr)) == *tablePtr && *tablePtr != 0 && lowerTestChar != '_' ) { // <1.9>
1197 testPtr++; tablePtr++;
1198 }
1199 if ( *tablePtr != 0 ) {
1200 // strings are different, and the string in the table has not run out;
1201 // i.e. the table entry is not a prefix of the text string.
1202 if (lowerTestChar == '_') // <1.9>
1203 return -1; // <1.9>
1204 return ( lowerTestChar < *tablePtr )? -1: 1;
1205 }
1206 // The string in the table has run out. If the test string char is not alnum,
1207 // then the string matches, else the test string sorts after.
1208 return ( !isalnum(lowerTestChar) )? 0: 1;
1209 }
1210
1211 // _DeleteCharsAtPointer
1212 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1213 // remove _length_ characters from the beginning of the string indicated by _stringPtr_
1214 // (we know that the string has at least _length_ characters in it)
1215 static void _DeleteCharsAtPointer(char *stringPtr, int length) {
1216 do {
1217 *stringPtr = stringPtr[length];
1218 } while (*stringPtr++ != 0);
1219 }
1220
1221 // _CopyReplacementAtPointer
1222 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1223 // Copy replacement string (*excluding* terminating NULL byte) to the place indicated by stringPtr
1224 static void _CopyReplacementAtPointer(char *stringPtr, const char *replacementPtr) {
1225 while (*replacementPtr != 0) {
1226 *stringPtr++ = *replacementPtr++;
1227 }
1228 }
1229
1230 // _CheckForTag
1231 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1232 static Boolean _CheckForTag(const char *localeStringPtr, const char *tagPtr, int tagLen) {
1233 return ( strncmp(localeStringPtr, tagPtr, tagLen) == 0 && !isalnum(localeStringPtr[tagLen]) );
1234 }
1235
1236 // _ReplacePrefix
1237 // Move this code from _UpdateFullLocaleString into separate function // <1.10>
1238 static void _ReplacePrefix(char locString[], int locStringMaxLen, int oldPrefixLen, const char *newPrefix) {
1239 int newPrefixLen = strlen(newPrefix);
1240 int lengthDelta = newPrefixLen - oldPrefixLen;
1241
1242 if (lengthDelta < 0) {
1243 // replacement is shorter, delete chars by shifting tail of string
1244 _DeleteCharsAtPointer(locString + newPrefixLen, -lengthDelta);
1245 } else if (lengthDelta > 0) {
1246 // replacement is longer...
1247 int stringLen = strlen(locString);
1248
1249 if (stringLen + lengthDelta < locStringMaxLen) {
1250 // make room by shifting tail of string
1251 char * tailShiftPtr = locString + stringLen;
1252 char * tailStartPtr = locString + oldPrefixLen; // pointer to tail of string to shift
1253
1254 while (tailShiftPtr >= tailStartPtr) {
1255 tailShiftPtr[lengthDelta] = *tailShiftPtr;
1256 tailShiftPtr--;
1257 }
1258 } else {
1259 // no room, can't do substitution
1260 newPrefix = NULL;
1261 }
1262 }
1263
1264 if (newPrefix) {
1265 // do the substitution
1266 _CopyReplacementAtPointer(locString, newPrefix);
1267 }
1268 }
1269
1270 // _UpdateFullLocaleString
1271 // Given a locale string that uses standard codes (not a special old-style Apple string),
1272 // update all the language codes and region codes to latest versions, map 3-letter
1273 // language codes to 2-letter codes if possible, and normalize casing. If requested, return
1274 // pointers to a language-region variant subtag (if present) and a region tag (if present).
1275 // (add locStringMaxLen parameter) // <1.10>
1276 static void _UpdateFullLocaleString(char inLocaleString[], int locStringMaxLen,
1277 char **langRegSubtagRef, char **regionTagRef,
1278 char varKeyValueString[]) // <1.17>
1279 {
1280 KeyStringToResultString testEntry;
1281 KeyStringToResultString * foundEntry;
1282 const SpecialCaseUpdates * specialCasePtr;
1283 char * inLocalePtr;
1284 char * subtagPtr;
1285 char * langRegSubtag = NULL;
1286 char * regionTag = NULL;
1287 char * variantTag = NULL;
1288 Boolean subtagHasDigits, pastPrimarySubtag, hadRegion;
1289
1290 // 1. First replace any non-canonical prefix (case insensitive) with canonical
1291 // (change 3-letter ISO 639 code to 2-letter, update obsolete ISO 639 codes & RFC 3066 tags, etc.)
1292
1293 testEntry.key = inLocaleString;
1294 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToCanonical, kNumLocaleStringPrefixToCanonical,
1295 sizeof(KeyStringToResultString), _CompareLowerTestEntryPrefixToTableEntryKey );
1296 if (foundEntry) {
1297 // replace key (at beginning of string) with result
1298 _ReplacePrefix(inLocaleString, locStringMaxLen, strlen(foundEntry->key), foundEntry->result); // <1.10>
1299 }
1300
1301 // 2. Walk through input string, normalizing case & marking use of ISO 3166 codes
1302
1303 inLocalePtr = inLocaleString;
1304 subtagPtr = inLocaleString;
1305 subtagHasDigits = false;
1306 pastPrimarySubtag = false;
1307 hadRegion = false;
1308
1309 while ( true ) {
1310 if ( isalpha(*inLocalePtr) ) {
1311 // if not past a region tag, then lowercase, else uppercase
1312 *inLocalePtr = (!hadRegion)? tolower(*inLocalePtr): toupper(*inLocalePtr);
1313 } else if ( isdigit(*inLocalePtr) ) {
1314 subtagHasDigits = true;
1315 } else {
1316
1317 if (!pastPrimarySubtag) {
1318 // may have a NULL primary subtag
1319 if (subtagHasDigits) {
1320 break;
1321 }
1322 pastPrimarySubtag = true;
1323 } else if (!hadRegion) {
1324 // We are after any primary language subtag, but not past any region tag.
1325 // This subtag is preceded by '-' or '_'.
1326 int subtagLength = inLocalePtr - subtagPtr; // includes leading '-' or '_'
1327
1328 if (subtagLength == 3 && !subtagHasDigits) {
1329 // potential ISO 3166 code for region or language variant; if so, needs uppercasing
1330 if (*subtagPtr == '_') {
1331 regionTag = subtagPtr;
1332 hadRegion = true;
1333 subtagPtr[1] = toupper(subtagPtr[1]);
1334 subtagPtr[2] = toupper(subtagPtr[2]);
1335 } else if (langRegSubtag == NULL) {
1336 langRegSubtag = subtagPtr;
1337 subtagPtr[1] = toupper(subtagPtr[1]);
1338 subtagPtr[2] = toupper(subtagPtr[2]);
1339 }
1340 } else if (subtagLength == 4 && subtagHasDigits) {
1341 // potential UN M.49 region code
1342 if (*subtagPtr == '_') {
1343 regionTag = subtagPtr;
1344 hadRegion = true;
1345 } else if (langRegSubtag == NULL) {
1346 langRegSubtag = subtagPtr;
1347 }
1348 } else if (subtagLength == 5 && !subtagHasDigits) {
1349 // ISO 15924 script code, uppercase just the first letter
1350 subtagPtr[1] = toupper(subtagPtr[1]);
1351 } else if (subtagLength == 1 && *subtagPtr == '_') { // <1.17>
1352 hadRegion = true;
1353 }
1354
1355 if (!hadRegion) {
1356 // convert improper '_' to '-'
1357 *subtagPtr = '-';
1358 }
1359 } else {
1360 variantTag = subtagPtr; // <1.17>
1361 }
1362
1363 if (*inLocalePtr == '-' || *inLocalePtr == '_') {
1364 subtagPtr = inLocalePtr;
1365 subtagHasDigits = false;
1366 } else {
1367 break;
1368 }
1369 }
1370
1371 inLocalePtr++;
1372 }
1373
1374 // 3 If there is a variant tag, see if ICU canonicalizes it to keywords. // <1.17> [3577669]
1375 // If so, copy the keywords to varKeyValueString and delete the variant tag
1376 // from the original string (but don't otherwise use the ICU canonicalization).
1377 varKeyValueString[0] = 0;
1378 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
1379 if (variantTag) {
1380 UErrorCode icuStatus;
1381 int icuCanonStringLen;
1382 char * varKeyValueStringPtr = varKeyValueString;
1383
1384 icuStatus = U_ZERO_ERROR;
1385 icuCanonStringLen = uloc_canonicalize( inLocaleString, varKeyValueString, locStringMaxLen, &icuStatus );
1386 if ( U_SUCCESS(icuStatus) ) {
1387 char * icuCanonStringPtr = varKeyValueString;
1388
1389 if (icuCanonStringLen >= locStringMaxLen)
1390 icuCanonStringLen = locStringMaxLen - 1;
1391 varKeyValueString[icuCanonStringLen] = 0;
1392 while (*icuCanonStringPtr != 0 && *icuCanonStringPtr != ULOC_KEYWORD_SEPARATOR)
1393 ++icuCanonStringPtr;
1394 if (*icuCanonStringPtr != 0) {
1395 // the canonicalized string has keywords
1396 // delete the variant tag in the original string (and other trailing '_' or '-')
1397 *variantTag-- = 0;
1398 while (*variantTag == '_')
1399 *variantTag-- = 0;
1400 // delete all of the canonicalized string except the keywords
1401 while (*icuCanonStringPtr != 0)
1402 *varKeyValueStringPtr++ = *icuCanonStringPtr++;
1403 }
1404 *varKeyValueStringPtr = 0;
1405 }
1406 }
1407 #endif
1408
1409 // 4. Handle special cases of updating region codes, or updating language codes based on
1410 // region code.
1411 for (specialCasePtr = specialCases; specialCasePtr->reg1 != NULL; specialCasePtr++) {
1412 if ( specialCasePtr->lang == NULL || _CheckForTag(inLocaleString, specialCasePtr->lang, 2) ) {
1413 // OK, we matched any language specified. Now what needs updating?
1414 char * foundTag;
1415
1416 if ( isupper(specialCasePtr->update1[0]) ) {
1417 // updating a region code
1418 if ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) {
1419 _CopyReplacementAtPointer(foundTag+1, specialCasePtr->update1);
1420 }
1421 if ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) {
1422 _CopyReplacementAtPointer(regionTag+1, specialCasePtr->update1);
1423 }
1424
1425 } else {
1426 // updating the language, there will be two choices based on region
1427 if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) ||
1428 ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) ) {
1429 _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update1);
1430 } else if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg2 + 1, 2) ) ||
1431 ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg2) ) && !isalnum(foundTag[3]) ) ) {
1432 _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update2);
1433 }
1434 }
1435 }
1436 }
1437
1438 // 5. return pointers if requested.
1439 if (langRegSubtagRef != NULL) {
1440 *langRegSubtagRef = langRegSubtag;
1441 }
1442 if (regionTagRef != NULL) {
1443 *regionTagRef = regionTag;
1444 }
1445 }
1446
1447
1448 // _RemoveSubstringsIfPresent
1449 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1450 // substringList is a list of space-separated substrings to strip if found in localeString
1451 static void _RemoveSubstringsIfPresent(char *localeString, const char *substringList) {
1452 while (*substringList != 0) {
1453 char currentSubstring[kLocaleIdentifierCStringMax];
1454 int substringLength = 0;
1455 char * foundSubstring;
1456
1457 // copy current substring & get its length
1458 while ( isgraph(*substringList) ) {
1459 currentSubstring[substringLength++] = *substringList++;
1460 }
1461 // move to next substring
1462 while ( isspace(*substringList) ) {
1463 substringList++;
1464 }
1465
1466 // search for current substring in locale string
1467 if (substringLength == 0)
1468 continue;
1469 currentSubstring[substringLength] = 0;
1470 foundSubstring = strstr(localeString, currentSubstring);
1471
1472 // if substring is found, delete it
1473 if (foundSubstring) {
1474 _DeleteCharsAtPointer(foundSubstring, substringLength);
1475 }
1476 }
1477 }
1478
1479
1480 // _GetKeyValueString // <1.10>
1481 // Removes any key-value string from inLocaleString, puts canonized version in keyValueString
1482
1483 static void _GetKeyValueString(char inLocaleString[], char keyValueString[]) {
1484 char * inLocalePtr = inLocaleString;
1485
1486 while (*inLocalePtr != 0 && *inLocalePtr != ULOC_KEYWORD_SEPARATOR) {
1487 inLocalePtr++;
1488 }
1489 if (*inLocalePtr != 0) { // we found a key-value section
1490 char * keyValuePtr = keyValueString;
1491
1492 *keyValuePtr = *inLocalePtr;
1493 *inLocalePtr = 0;
1494 do {
1495 if ( *(++inLocalePtr) != ' ' ) {
1496 *(++keyValuePtr) = *inLocalePtr; // remove "tolower() for *inLocalePtr" // <1.11>
1497 }
1498 } while (*inLocalePtr != 0);
1499 } else {
1500 keyValueString[0] = 0;
1501 }
1502 }
1503
1504 static void _AppendKeyValueString(char inLocaleString[], int locStringMaxLen, char keyValueString[]) {
1505 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
1506 if (keyValueString[0] != 0) {
1507 UErrorCode uerr = U_ZERO_ERROR;
1508 UEnumeration * uenum = uloc_openKeywords(keyValueString, &uerr);
1509 if ( uenum != NULL ) {
1510 const char * keyword;
1511 int32_t length;
1512 char value[ULOC_KEYWORDS_CAPACITY]; // use as max for keyword value
1513 while ( U_SUCCESS(uerr) ) {
1514 keyword = uenum_next(uenum, &length, &uerr);
1515 if ( keyword == NULL ) {
1516 break;
1517 }
1518 length = uloc_getKeywordValue( keyValueString, keyword, value, sizeof(value), &uerr );
1519 length = uloc_setKeywordValue( keyword, value, inLocaleString, locStringMaxLen, &uerr );
1520 }
1521 uenum_close(uenum);
1522 }
1523 }
1524 #endif
1525 }
1526
1527 // __private_extern__ CFStringRef _CFLocaleCreateCanonicalLanguageIdentifierForCFBundle(CFAllocatorRef allocator, CFStringRef localeIdentifier) {}
1528
1529 CFStringRef CFLocaleCreateCanonicalLanguageIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) {
1530 char inLocaleString[kLocaleIdentifierCStringMax];
1531 CFStringRef outStringRef = NULL;
1532
1533 if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) {
1534 KeyStringToResultString testEntry;
1535 KeyStringToResultString * foundEntry;
1536 char keyValueString[sizeof(inLocaleString)]; // <1.10>
1537 char varKeyValueString[sizeof(inLocaleString)]; // <1.17>
1538
1539 _GetKeyValueString(inLocaleString, keyValueString); // <1.10>
1540 testEntry.result = NULL;
1541
1542 // A. First check if input string matches an old-style string that has a replacement
1543 // (do this before case normalization)
1544 testEntry.key = inLocaleString;
1545 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical,
1546 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1547 if (foundEntry) {
1548 // It does match, so replace old string with new
1549 strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
1550 varKeyValueString[0] = 0;
1551 } else {
1552 char * langRegSubtag = NULL;
1553 char * regionTag = NULL;
1554
1555 // B. No match with an old-style string, use input string but update codes, normalize case, etc.
1556 _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, &regionTag, varKeyValueString); // <1.10><1.17><1.19>
1557
1558 // if the language part already includes a regional variant, then delete any region tag. <1.19>
1559 if (langRegSubtag && regionTag)
1560 *regionTag = 0;
1561 }
1562
1563 // C. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string
1564
1565 // 1. Strip defaults in input string based on initial part of locale string
1566 // (mainly to strip default script tag for a language)
1567 testEntry.key = inLocaleString;
1568 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults,
1569 sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey );
1570 if (foundEntry) {
1571 // The input string begins with a character sequence for which
1572 // there are default substrings which should be stripped if present
1573 _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
1574 }
1575
1576 // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string
1577 testEntry.key = inLocaleString;
1578 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageString, kNumAppleLocaleToLanguageString,
1579 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1580 if (foundEntry) {
1581 // it does match
1582 strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
1583 } else {
1584 // skip to any region tag or java-type variant
1585 char * inLocalePtr = inLocaleString;
1586 while (*inLocalePtr != 0 && *inLocalePtr != '_') {
1587 inLocalePtr++;
1588 }
1589 // if there is still a region tag, turn it into a language variant <1.19>
1590 if (*inLocalePtr == '_') {
1591 // handle 3-digit regions in addition to 2-letter ones
1592 char * regionTag = inLocalePtr++;
1593 long expectedLength = 0;
1594 if ( isalpha(*inLocalePtr) ) {
1595 while ( isalpha(*(++inLocalePtr)) )
1596 ;
1597 expectedLength = 3;
1598 } else if ( isdigit(*inLocalePtr) ) {
1599 while ( isdigit(*(++inLocalePtr)) )
1600 ;
1601 expectedLength = 4;
1602 }
1603 *regionTag = (inLocalePtr - regionTag == expectedLength)? '-': 0;
1604 }
1605 // anything else at/after '_' just gets deleted
1606 *inLocalePtr = 0;
1607 }
1608
1609 // D. Re-append any key-value strings, now canonical // <1.10><1.17>
1610 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString );
1611 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString );
1612
1613 // All done, return what we came up with.
1614 outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII);
1615 }
1616
1617 return outStringRef;
1618 }
1619
1620
1621 CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) {
1622 char inLocaleString[kLocaleIdentifierCStringMax];
1623 CFStringRef outStringRef = NULL;
1624
1625 if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) {
1626 KeyStringToResultString testEntry;
1627 KeyStringToResultString * foundEntry;
1628 char keyValueString[sizeof(inLocaleString)]; // <1.10>
1629 char varKeyValueString[sizeof(inLocaleString)]; // <1.17>
1630
1631 _GetKeyValueString(inLocaleString, keyValueString); // <1.10>
1632 testEntry.result = NULL;
1633
1634 // A. First check if input string matches an old-style Apple string that has a replacement
1635 // (do this before case normalization)
1636 testEntry.key = inLocaleString;
1637 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical,
1638 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1639 if (foundEntry) {
1640 // It does match, so replace old string with new // <1.10>
1641 strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
1642 varKeyValueString[0] = 0;
1643 } else {
1644 char * langRegSubtag = NULL;
1645 char * regionTag = NULL;
1646
1647 // B. No match with an old-style string, use input string but update codes, normalize case, etc.
1648 _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, &regionTag, varKeyValueString); // <1.10><1.17>
1649
1650
1651 // C. Now strip defaults that are implied by other fields.
1652
1653 // 1. If an ISO 3166 region tag matches an ISO 3166 regional language variant subtag, strip the latter.
1654 if ( langRegSubtag && regionTag && strncmp(langRegSubtag+1, regionTag+1, 2) == 0 ) {
1655 _DeleteCharsAtPointer(langRegSubtag, 3);
1656 }
1657
1658 // 2. Strip defaults in input string based on final region tag in locale string
1659 // (mainly for Chinese, to strip -Hans for _CN/_SG, -Hant for _TW/_HK/_MO)
1660 if ( regionTag ) {
1661 testEntry.key = regionTag;
1662 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringRegionToDefaults, kNumLocaleStringRegionToDefaults,
1663 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1664 if (foundEntry) {
1665 _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
1666 }
1667 }
1668
1669 // 3. Strip defaults in input string based on initial part of locale string
1670 // (mainly to strip default script tag for a language)
1671 testEntry.key = inLocaleString;
1672 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults,
1673 sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey );
1674 if (foundEntry) {
1675 // The input string begins with a character sequence for which
1676 // there are default substrings which should be stripped if present
1677 _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
1678 }
1679 }
1680
1681 // D. Re-append any key-value strings, now canonical // <1.10><1.17>
1682 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString );
1683 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString );
1684
1685 // Now create the CFString (even if empty!)
1686 outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII);
1687 }
1688
1689 return outStringRef;
1690 }
1691
1692 // CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, based on
1693 // the first part of the SPI CFBundleCopyLocalizationForLocalizationInfo in CFBundle_Resources.c
1694 CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes(CFAllocatorRef allocator, LangCode lcode, RegionCode rcode) {
1695 CFStringRef result = NULL;
1696 if (0 <= rcode && rcode < kNumRegionCodeToLocaleString) {
1697 const char *localeString = regionCodeToLocaleString[rcode];
1698 if (localeString != NULL && *localeString != '\0') {
1699 result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull);
1700 }
1701 }
1702 if (result) return result;
1703 if (0 <= lcode && lcode < kNumLangCodeToLocaleString) {
1704 const char *localeString = langCodeToLocaleString[lcode];
1705 if (localeString != NULL && *localeString != '\0') {
1706 result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull);
1707 }
1708 }
1709 return result;
1710 }
1711
1712
1713 /*
1714 SPI: CFLocaleGetLanguageRegionEncodingForLocaleIdentifier gets the appropriate language and region codes,
1715 and the default legacy script code and encoding, for the specified locale (or language) string.
1716 Returns false if CFLocale has no information about the given locale (in which case none of the by-reference return values are set);
1717 otherwise may set *langCode and/or *regCode to -1 if there is no appropriate legacy value for the locale.
1718 This is a replacement for the CFBundle SPI CFBundleGetLocalizationInfoForLocalization (which was intended to be temporary and transitional);
1719 this function is more up-to-date in its handling of locale strings, and is in CFLocale where this functionality should belong. Compared
1720 to CFBundleGetLocalizationInfoForLocalization, this function does not spcially interpret a NULL localeIdentifier to mean use the single most
1721 preferred localization in the current context (this function returns NO for a NULL localeIdentifier); and in this function
1722 langCode, regCode, and scriptCode are all SInt16* (not SInt32* like the equivalent parameters in CFBundleGetLocalizationInfoForLocalization).
1723 */
1724 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
1725 static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 );
1726 #endif
1727
1728 Boolean CFLocaleGetLanguageRegionEncodingForLocaleIdentifier(CFStringRef localeIdentifier, LangCode *langCode, RegionCode *regCode, ScriptCode *scriptCode, CFStringEncoding *stringEncoding) {
1729 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
1730 Boolean returnValue = false;
1731 CFStringRef canonicalIdentifier = CFLocaleCreateCanonicalLocaleIdentifierFromString(NULL, localeIdentifier);
1732 if (canonicalIdentifier) {
1733 char localeCString[kLocaleIdentifierCStringMax];
1734 if ( CFStringGetCString(canonicalIdentifier, localeCString, sizeof(localeCString), kCFStringEncodingASCII) ) {
1735 UErrorCode icuStatus = U_ZERO_ERROR;
1736 int32_t languagelength;
1737 char searchString[ULOC_LANG_CAPACITY + ULOC_FULLNAME_CAPACITY];
1738
1739 languagelength = uloc_getLanguage( localeCString, searchString, ULOC_LANG_CAPACITY, &icuStatus );
1740 if ( U_SUCCESS(icuStatus) && languagelength > 0 ) {
1741 // OK, here we have at least a language code, check for other components in order
1742 LocaleToLegacyCodes searchEntry = { (const char *)searchString, 0, 0, 0 };
1743 const LocaleToLegacyCodes * foundEntryPtr;
1744 int32_t componentLength;
1745 char componentString[ULOC_FULLNAME_CAPACITY];
1746
1747 languagelength = strlen(searchString); // in case it got truncated
1748 icuStatus = U_ZERO_ERROR;
1749 componentLength = uloc_getScript( localeCString, componentString, sizeof(componentString), &icuStatus );
1750 if ( U_FAILURE(icuStatus) || componentLength == 0 ) {
1751 icuStatus = U_ZERO_ERROR;
1752 componentLength = uloc_getCountry( localeCString, componentString, sizeof(componentString), &icuStatus );
1753 if ( U_FAILURE(icuStatus) || componentLength == 0 ) {
1754 icuStatus = U_ZERO_ERROR;
1755 componentLength = uloc_getVariant( localeCString, componentString, sizeof(componentString), &icuStatus );
1756 if ( U_FAILURE(icuStatus) ) {
1757 componentLength = 0;
1758 }
1759 }
1760 }
1761
1762 // Append whichever other component we first found
1763 if (componentLength > 0) {
1764 strlcat(searchString, "_", sizeof(searchString));
1765 strlcat(searchString, componentString, sizeof(searchString));
1766 }
1767
1768 // Search
1769 foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries );
1770 if (foundEntryPtr == NULL && (int32_t) strlen(searchString) > languagelength) {
1771 // truncate to language al;one and try again
1772 searchString[languagelength] = 0;
1773 foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries );
1774 }
1775
1776 // If found a matching entry, return requested values
1777 if (foundEntryPtr) {
1778 returnValue = true;
1779 if (langCode) *langCode = foundEntryPtr->langCode;
1780 if (regCode) *regCode = foundEntryPtr->regCode;
1781 if (stringEncoding) *stringEncoding = foundEntryPtr->encoding;
1782 if (scriptCode) {
1783 // map CFStringEncoding to ScriptCode
1784 if (foundEntryPtr->encoding < 33/*kCFStringEncodingMacSymbol*/) {
1785 *scriptCode = foundEntryPtr->encoding;
1786 } else {
1787 switch (foundEntryPtr->encoding) {
1788 case 0x8C/*kCFStringEncodingMacFarsi*/: *scriptCode = 4/*smArabic*/; break;
1789 case 0x98/*kCFStringEncodingMacUkrainian*/: *scriptCode = 7/*smCyrillic*/; break;
1790 case 0xEC/*kCFStringEncodingMacInuit*/: *scriptCode = 28/*smEthiopic*/; break;
1791 case 0xFC/*kCFStringEncodingMacVT100*/: *scriptCode = 32/*smUninterp*/; break;
1792 default: *scriptCode = 0/*smRoman*/; break;
1793 }
1794 }
1795 }
1796 }
1797 }
1798 }
1799 CFRelease(canonicalIdentifier);
1800 }
1801 return returnValue;
1802 #else
1803 return false;
1804 #endif
1805 }
1806
1807 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
1808 static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 ) {
1809 const char * localeString1 = ((const LocaleToLegacyCodes *)entry1)->locale;
1810 const char * localeString2 = ((const LocaleToLegacyCodes *)entry2)->locale;
1811 return strcmp(localeString1, localeString2);
1812 }
1813 #endif
1814
1815 CFDictionaryRef CFLocaleCreateComponentsFromLocaleIdentifier(CFAllocatorRef allocator, CFStringRef localeID) {
1816 CFMutableDictionaryRef working = CFDictionaryCreateMutable(allocator, 10, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1817 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
1818 char cLocaleID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY];
1819 char buffer[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY];
1820
1821 UErrorCode icuStatus = U_ZERO_ERROR;
1822 int32_t length = 0;
1823
1824 if (!localeID) goto out;
1825
1826 // Extract the C string locale ID, for ICU
1827 CFIndex outBytes = 0;
1828 CFStringGetBytes(localeID, CFRangeMake(0, CFStringGetLength(localeID)), kCFStringEncodingASCII, (UInt8) '?', true, (unsigned char *)cLocaleID, sizeof(cLocaleID)/sizeof(char) - 1, &outBytes);
1829 cLocaleID[outBytes] = '\0';
1830
1831 // Get the components
1832 length = uloc_getLanguage(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
1833 if (U_SUCCESS(icuStatus) && length > 0)
1834 {
1835 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
1836 CFDictionaryAddValue(working, kCFLocaleLanguageCodeKey, string);
1837 CFRelease(string);
1838 }
1839 icuStatus = U_ZERO_ERROR;
1840
1841 length = uloc_getScript(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
1842 if (U_SUCCESS(icuStatus) && length > 0)
1843 {
1844 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
1845 CFDictionaryAddValue(working, kCFLocaleScriptCodeKey, string);
1846 CFRelease(string);
1847 }
1848 icuStatus = U_ZERO_ERROR;
1849
1850 length = uloc_getCountry(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
1851 if (U_SUCCESS(icuStatus) && length > 0)
1852 {
1853 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
1854 CFDictionaryAddValue(working, kCFLocaleCountryCodeKey, string);
1855 CFRelease(string);
1856 }
1857 icuStatus = U_ZERO_ERROR;
1858
1859 length = uloc_getVariant(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
1860 if (U_SUCCESS(icuStatus) && length > 0)
1861 {
1862 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
1863 CFDictionaryAddValue(working, kCFLocaleVariantCodeKey, string);
1864 CFRelease(string);
1865 }
1866 icuStatus = U_ZERO_ERROR;
1867
1868 // Now get the keywords; open an enumerator on them
1869 UEnumeration *iter = uloc_openKeywords(cLocaleID, &icuStatus);
1870 const char *locKey = NULL;
1871 int32_t locKeyLen = 0;
1872 while ((locKey = uenum_next(iter, &locKeyLen, &icuStatus)) && U_SUCCESS(icuStatus))
1873 {
1874 char locValue[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1875
1876 // Get the value for this keyword
1877 if (uloc_getKeywordValue(cLocaleID, locKey, locValue, sizeof(locValue)/sizeof(char), &icuStatus) > 0
1878 && U_SUCCESS(icuStatus))
1879 {
1880 CFStringRef key = CFStringCreateWithBytes(allocator, (UInt8 *)locKey, strlen(locKey), kCFStringEncodingASCII, true);
1881 CFStringRef value = CFStringCreateWithBytes(allocator, (UInt8 *)locValue, strlen(locValue), kCFStringEncodingASCII, true);
1882 if (key && value)
1883 CFDictionaryAddValue(working, key, value);
1884 if (key)
1885 CFRelease(key);
1886 if (value)
1887 CFRelease(value);
1888 }
1889 }
1890 uenum_close(iter);
1891
1892 out:;
1893 #endif
1894 // Convert to an immutable dictionary and return
1895 CFDictionaryRef result = CFDictionaryCreateCopy(allocator, working);
1896 CFRelease(working);
1897 return result;
1898 }
1899
1900 static char *__CStringFromString(CFStringRef str) {
1901 if (!str) return NULL;
1902 CFRange rg = CFRangeMake(0, CFStringGetLength(str));
1903 CFIndex neededLength = 0;
1904 CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, NULL, 0, &neededLength);
1905 char *buf = (char *)malloc(neededLength + 1);
1906 CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, (uint8_t *)buf, neededLength, &neededLength);
1907 buf[neededLength] = '\0';
1908 return buf;
1909 }
1910
1911 CFStringRef CFLocaleCreateLocaleIdentifierFromComponents(CFAllocatorRef allocator, CFDictionaryRef dictionary) {
1912 if (!dictionary) return NULL;
1913
1914 CFIndex cnt = CFDictionaryGetCount(dictionary);
1915 STACK_BUFFER_DECL(CFStringRef, values, cnt);
1916 STACK_BUFFER_DECL(CFStringRef, keys, cnt);
1917 CFDictionaryGetKeysAndValues(dictionary, (const void **)keys, (const void **)values);
1918
1919 char *language = NULL, *script = NULL, *country = NULL, *variant = NULL;
1920 for (CFIndex idx = 0; idx < cnt; idx++) {
1921 if (CFEqual(kCFLocaleLanguageCodeKey, keys[idx])) {
1922 language = __CStringFromString(values[idx]);
1923 keys[idx] = NULL;
1924 } else if (CFEqual(kCFLocaleScriptCodeKey, keys[idx])) {
1925 script = __CStringFromString(values[idx]);
1926 keys[idx] = NULL;
1927 } else if (CFEqual(kCFLocaleCountryCodeKey, keys[idx])) {
1928 country = __CStringFromString(values[idx]);
1929 keys[idx] = NULL;
1930 } else if (CFEqual(kCFLocaleVariantCodeKey, keys[idx])) {
1931 variant = __CStringFromString(values[idx]);
1932 keys[idx] = NULL;
1933 }
1934 }
1935
1936 char *buf1 = NULL; // (|L)(|_S)(|_C|_C_V|__V)
1937 asprintf(&buf1, "%s%s%s%s%s%s%s", language ? language : "", script ? "_" : "", script ? script : "", (country || variant ? "_" : ""), country ? country : "", variant ? "_" : "", variant ? variant : "");
1938
1939 char cLocaleID[2 * ULOC_FULLNAME_CAPACITY + 2 * ULOC_KEYWORD_AND_VALUES_CAPACITY];
1940 strlcpy(cLocaleID, buf1, sizeof(cLocaleID));
1941 free(language);
1942 free(script);
1943 free(country);
1944 free(variant);
1945 free(buf1);
1946
1947 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
1948 for (CFIndex idx = 0; idx < cnt; idx++) {
1949 if (keys[idx]) {
1950 char *key = __CStringFromString(keys[idx]);
1951 char *value;
1952 if (0 == strcmp(key, "kCFLocaleCalendarKey")) {
1953 // For interchangeability convenience, we alternatively allow a
1954 // calendar object to be passed in, with the alternate key, and
1955 // we'll extract the identifier.
1956 CFCalendarRef cal = (CFCalendarRef)values[idx];
1957 CFStringRef ident = CFCalendarGetIdentifier(cal);
1958 value = __CStringFromString(ident);
1959 char *oldkey = key;
1960 key = strdup("calendar");
1961 free(oldkey);
1962 } else {
1963 value = __CStringFromString(values[idx]);
1964 }
1965 UErrorCode status = U_ZERO_ERROR;
1966 uloc_setKeywordValue(key, value, cLocaleID, sizeof(cLocaleID), &status);
1967 free(key);
1968 free(value);
1969 }
1970 }
1971 #endif
1972
1973 return CFStringCreateWithCString(allocator, cLocaleID, kCFStringEncodingASCII);
1974 }
1975