]> git.saurik.com Git - apple/cf.git/blame - CFLocaleIdentifier.c
CF-1153.18.tar.gz
[apple/cf.git] / CFLocaleIdentifier.c
CommitLineData
bd5b749c 1/*
e29e285d 2 * Copyright (c) 2015 Apple Inc. All rights reserved.
bd5b749c
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
d7384798 5 *
bd5b749c
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
d7384798 12 *
bd5b749c
A
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
d7384798 20 *
bd5b749c
A
21 * @APPLE_LICENSE_HEADER_END@
22 */
f64f9b69 23
bd5b749c
A
24/*
25 CFLocaleIdentifier.c
d7384798 26 Copyright (c) 2002-2014, Apple Inc. All rights reserved.
8ca704e1 27 Responsibility: David Smith
bd5b749c
A
28
29 CFLocaleIdentifier.c defines
30 - enum value kLocaleIdentifierCStringMax
31 - structs KeyStringToResultString, SpecialCaseUpdates
32 and provides the following data for the functions
33 CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes,
34 CFLocaleCreateCanonicalLocaleIdentifierFromString
35 CFLocaleCreateCanonicalLanguageIdentifierFromString
36
37 1. static const char * regionCodeToLocaleString[]; enum kNumRegionCodeToLocaleString;
38 map RegionCode 0..kNumRegionCodeToLocaleString-1 to canonical locale string
39
40 2. static const char * langCodeToLocaleString[]; enum kNumLangCodeToLocaleString;
41 map LangCode 0..kNumLangCodeToLocaleString-1 to canonical locale string
42
43 3. static const KeyStringToResultString oldAppleLocaleToCanonical[]; enum kNumOldAppleLocaleToCanonical;
44 map old Apple string oldAppleLocaleToCanonical[n].key
45 to canonical locale string oldAppleLocaleToCanonical[n].result
46 for n = 0..kNumOldAppleLocaleToCanonical-1
47
48 4. static const KeyStringToResultString localeStringPrefixToCanonical[]; enum kNumLocaleStringPrefixToCanonical;
49 map non-canonical language prefix (3-letter, obsolete) localeStringPrefixToCanonical[].key
50 to updated replacement localeStringPrefixToCanonical[].result
51 for n = 0..kNumLocaleStringPrefixToCanonical-1
52
53 5. static const SpecialCaseUpdates specialCases[];
54 various special cases for updating region codes, or for updating language codes based on region codes
55
56 6. static const KeyStringToResultString localeStringRegionToDefaults[]; enum kNumLocaleStringRegionToDefaults;
57 map locale string region tag localeStringRegionToDefaults[n].key
58 to default substrings to delete localeStringRegionToDefaults[n].result
59 for n = 0..kNumLocaleStringRegionToDefaults-1
60
61 7. static const KeyStringToResultString localeStringPrefixToDefaults[]; enum kNumLocaleStringPrefixToDefaults;
62 map locale string initial part localeStringPrefixToDefaults[n].key
63 to default substrings to delete localeStringPrefixToDefaults[n].result
64 for n = 0..kNumLocaleStringPrefixToDefaults-1
65
66 8. static const KeyStringToResultString appleLocaleToLanguageString[]; enum kNumAppleLocaleToLanguageString;
67 map Apple locale string appleLocaleToLanguageString[].key
68 to equivalent language string appleLocaleToLanguageString[].result
69 for n = 0..kNumAppleLocaleToLanguageString-1
70
71*/
72
73#include <CoreFoundation/CFString.h>
8ca704e1 74#include <CoreFoundation/CFCalendar.h>
bd5b749c
A
75#include <ctype.h>
76#include <string.h>
77#include <stdlib.h>
8ca704e1 78#include <stdio.h>
856091c5 79#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
bd5b749c 80#include <unicode/uloc.h>
856091c5
A
81#else
82#define ULOC_KEYWORD_SEPARATOR '@'
83#define ULOC_FULLNAME_CAPACITY 56
84#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
85#endif
cf7d2af9
A
86#include "CFInternal.h"
87#include "CFLocaleInternal.h"
bd5b749c
A
88
89// Max byte length of locale identifier (ASCII) as C string, including terminating null byte
90enum {
91 kLocaleIdentifierCStringMax = ULOC_FULLNAME_CAPACITY + ULOC_KEYWORD_AND_VALUES_CAPACITY // currently 56 + 100
92};
93
94// KeyStringToResultString struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString
95struct KeyStringToResultString {
96 const char * key;
97 const char * result;
98};
99typedef struct KeyStringToResultString KeyStringToResultString;
100
101// SpecialCaseUpdates struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString
102struct SpecialCaseUpdates {
103 const char * lang;
104 const char * reg1;
105 const char * update1;
106 const char * reg2;
107 const char * update2;
108};
109typedef struct SpecialCaseUpdates SpecialCaseUpdates;
110
111
112static const char * const regionCodeToLocaleString[] = {
113// map RegionCode (array index) to canonical locale string
114//
115// canon. string region code; language code; [comment] [ # __CFBundleLocaleAbbreviationsArray
116// -------- ------------ ------------------ ------------ -------- string, if different ]
117 "en_US", // 0 verUS; 0 langEnglish;
118 "fr_FR", // 1 verFrance; 1 langFrench;
119 "en_GB", // 2 verBritain; 0 langEnglish;
120 "de_DE", // 3 verGermany; 2 langGerman;
121 "it_IT", // 4 verItaly; 3 langItalian;
122 "nl_NL", // 5 verNetherlands; 4 langDutch;
123 "nl_BE", // 6 verFlemish; 34 langFlemish (redundant, =Dutch);
124 "sv_SE", // 7 verSweden; 5 langSwedish;
125 "es_ES", // 8 verSpain; 6 langSpanish;
126 "da_DK", // 9 verDenmark; 7 langDanish;
127 "pt_PT", // 10 verPortugal; 8 langPortuguese;
128 "fr_CA", // 11 verFrCanada; 1 langFrench;
129 "nb_NO", // 12 verNorway; 9 langNorwegian (Bokmal); # "no_NO"
130 "he_IL", // 13 verIsrael; 10 langHebrew;
131 "ja_JP", // 14 verJapan; 11 langJapanese;
132 "en_AU", // 15 verAustralia; 0 langEnglish;
133 "ar", // 16 verArabic; 12 langArabic;
134 "fi_FI", // 17 verFinland; 13 langFinnish;
135 "fr_CH", // 18 verFrSwiss; 1 langFrench;
136 "de_CH", // 19 verGrSwiss; 2 langGerman;
137 "el_GR", // 20 verGreece; 14 langGreek (modern)-Grek-mono;
138 "is_IS", // 21 verIceland; 15 langIcelandic;
139 "mt_MT", // 22 verMalta; 16 langMaltese;
140 "el_CY", // 23 verCyprus; 14 langGreek?; el or tr? guess el # ""
141 "tr_TR", // 24 verTurkey; 17 langTurkish;
142 "hr_HR", // 25 verYugoCroatian; 18 langCroatian; * one-way mapping -> verCroatia
143 "nl_NL", // 26 KCHR, Netherlands; 4 langDutch; * one-way mapping
144 "nl_BE", // 27 KCHR, verFlemish; 34 langFlemish; * one-way mapping
145 "_CA", // 28 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
146 "_CA", // 29 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
147 "pt_PT", // 30 KCHR, Portugal; 8 langPortuguese; * one-way mapping
148 "nb_NO", // 31 KCHR, Norway; 9 langNorwegian (Bokmal); * one-way mapping # "no_NO"
149 "da_DK", // 32 KCHR, Denmark; 7 langDanish; * one-way mapping
150 "hi_IN", // 33 verIndiaHindi; 21 langHindi;
151 "ur_PK", // 34 verPakistanUrdu; 20 langUrdu;
152 "tr_TR", // 35 verTurkishModified; 17 langTurkish; * one-way mapping
153 "it_CH", // 36 verItalianSwiss; 3 langItalian;
154 "en_001", // 37 verInternational; 0 langEnglish; ASCII only # "en"
155 NULL, // 38 *unassigned; -1 none; * one-way mapping # ""
156 "ro_RO", // 39 verRomania; 37 langRomanian;
157 "grc", // 40 verGreekAncient; 148 langGreekAncient -Grek-poly; # "el_GR"
158 "lt_LT", // 41 verLithuania; 24 langLithuanian;
159 "pl_PL", // 42 verPoland; 25 langPolish;
160 "hu_HU", // 43 verHungary; 26 langHungarian;
161 "et_EE", // 44 verEstonia; 27 langEstonian;
162 "lv_LV", // 45 verLatvia; 28 langLatvian;
163 "se", // 46 verSami; 29 langSami;
164 "fo_FO", // 47 verFaroeIsl; 30 langFaroese;
165 "fa_IR", // 48 verIran; 31 langFarsi/Persian;
166 "ru_RU", // 49 verRussia; 32 langRussian;
167 "ga_IE", // 50 verIreland; 35 langIrishGaelic (no dots);
168 "ko_KR", // 51 verKorea; 23 langKorean;
169 "zh_CN", // 52 verChina; 33 langSimpChinese;
170 "zh_TW", // 53 verTaiwan; 19 langTradChinese;
171 "th_TH", // 54 verThailand; 22 langThai;
172 "und", // 55 verScriptGeneric; -1 none; # "" // <1.9>
173 "cs_CZ", // 56 verCzech; 38 langCzech;
174 "sk_SK", // 57 verSlovak; 39 langSlovak;
175 "und", // 58 verEastAsiaGeneric; -1 none; * one-way mapping # "" // <1.9>
176 "hu_HU", // 59 verMagyar; 26 langHungarian; * one-way mapping -> verHungary
177 "bn", // 60 verBengali; 67 langBengali; _IN or _BD? guess generic
178 "be_BY", // 61 verBelarus; 46 langBelorussian;
179 "uk_UA", // 62 verUkraine; 45 langUkrainian;
180 NULL, // 63 *unused; -1 none; * one-way mapping # ""
181 "el_GR", // 64 verGreeceAlt; 14 langGreek (modern)-Grek-mono; * one-way mapping
cf7d2af9 182 "sr_RS", // 65 verSerbian; 42 langSerbian -Cyrl; // <1.18>
bd5b749c
A
183 "sl_SI", // 66 verSlovenian; 40 langSlovenian;
184 "mk_MK", // 67 verMacedonian; 43 langMacedonian;
185 "hr_HR", // 68 verCroatia; 18 langCroatian;
186 NULL, // 69 *unused; -1 none; * one-way mapping # ""
187 "de-1996", // 70 verGermanReformed; 2 langGerman; 1996 orthogr. # "de_DE"
188 "pt_BR", // 71 verBrazil; 8 langPortuguese;
189 "bg_BG", // 72 verBulgaria; 44 langBulgarian;
190 "ca_ES", // 73 verCatalonia; 130 langCatalan;
191 "mul", // 74 verMultilingual; -1 none; # ""
192 "gd", // 75 verScottishGaelic; 144 langScottishGaelic;
193 "gv", // 76 verManxGaelic; 145 langManxGaelic;
194 "br", // 77 verBreton; 142 langBreton;
195 "iu_CA", // 78 verNunavut; 143 langInuktitut -Cans;
196 "cy", // 79 verWelsh; 128 langWelsh;
197 "_CA", // 80 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA"
198 "ga-Latg_IE", // 81 verIrishGaelicScrip; 146 langIrishGaelicScript -dots; # "ga_IE" // <xx>
199 "en_CA", // 82 verEngCanada; 0 langEnglish;
200 "dz_BT", // 83 verBhutan; 137 langDzongkha;
201 "hy_AM", // 84 verArmenian; 51 langArmenian;
202 "ka_GE", // 85 verGeorgian; 52 langGeorgian;
203 "es_419", // 86 verSpLatinAmerica; 6 langSpanish; # "es"
204 "es_ES", // 87 KCHR, Spain; 6 langSpanish; * one-way mapping
205 "to_TO", // 88 verTonga; 147 langTongan;
206 "pl_PL", // 89 KCHR, Poland; 25 langPolish; * one-way mapping
207 "ca_ES", // 90 KCHR, Catalonia; 130 langCatalan; * one-way mapping
208 "fr_001", // 91 verFrenchUniversal; 1 langFrench;
209 "de_AT", // 92 verAustria; 2 langGerman;
210 "es_419", // 93 > verSpLatinAmerica; 6 langSpanish; * one-way mapping # "es"
211 "gu_IN", // 94 verGujarati; 69 langGujarati;
212 "pa", // 95 verPunjabi; 70 langPunjabi; _IN or _PK? guess generic
213 "ur_IN", // 96 verIndiaUrdu; 20 langUrdu;
214 "vi_VN", // 97 verVietnam; 80 langVietnamese;
215 "fr_BE", // 98 verFrBelgium; 1 langFrench;
216 "uz_UZ", // 99 verUzbek; 47 langUzbek;
217 "en_SG", // 100 verSingapore; 0 langEnglish?; en, zh, or ms? guess en # ""
218 "nn_NO", // 101 verNynorsk; 151 langNynorsk; # ""
219 "af_ZA", // 102 verAfrikaans; 141 langAfrikaans;
220 "eo", // 103 verEsperanto; 94 langEsperanto;
221 "mr_IN", // 104 verMarathi; 66 langMarathi;
222 "bo", // 105 verTibetan; 63 langTibetan;
223 "ne_NP", // 106 verNepal; 64 langNepali;
224 "kl", // 107 verGreenland; 149 langGreenlandic;
225 "en_IE", // 108 verIrelandEnglish; 0 langEnglish; # (no entry)
226};
227enum {
228 kNumRegionCodeToLocaleString = sizeof(regionCodeToLocaleString)/sizeof(char *)
229};
230
231static const char * const langCodeToLocaleString[] = {
232// map LangCode (array index) to canonical locale string
233//
234// canon. string language code; [ comment] [ # __CFBundleLanguageAbbreviationsArray
235// -------- -------------- ---------- -------- string, if different ]
236 "en", // 0 langEnglish;
237 "fr", // 1 langFrench;
238 "de", // 2 langGerman;
239 "it", // 3 langItalian;
240 "nl", // 4 langDutch;
241 "sv", // 5 langSwedish;
242 "es", // 6 langSpanish;
243 "da", // 7 langDanish;
244 "pt", // 8 langPortuguese;
245 "nb", // 9 langNorwegian (Bokmal); # "no"
246 "he", // 10 langHebrew -Hebr;
247 "ja", // 11 langJapanese -Jpan;
248 "ar", // 12 langArabic -Arab;
249 "fi", // 13 langFinnish;
250 "el", // 14 langGreek (modern)-Grek-mono;
251 "is", // 15 langIcelandic;
252 "mt", // 16 langMaltese -Latn;
253 "tr", // 17 langTurkish -Latn;
254 "hr", // 18 langCroatian;
255 "zh-Hant", // 19 langTradChinese; # "zh"
256 "ur", // 20 langUrdu -Arab;
257 "hi", // 21 langHindi -Deva;
258 "th", // 22 langThai -Thai;
259 "ko", // 23 langKorean -Hang;
260 "lt", // 24 langLithuanian;
261 "pl", // 25 langPolish;
262 "hu", // 26 langHungarian;
263 "et", // 27 langEstonian;
264 "lv", // 28 langLatvian;
265 "se", // 29 langSami;
266 "fo", // 30 langFaroese;
267 "fa", // 31 langFarsi/Persian -Arab;
268 "ru", // 32 langRussian -Cyrl;
269 "zh-Hans", // 33 langSimpChinese; # "zh"
270 "nl-BE", // 34 langFlemish (redundant, =Dutch); # "nl"
271 "ga", // 35 langIrishGaelic (no dots);
272 "sq", // 36 langAlbanian; no region codes
273 "ro", // 37 langRomanian;
274 "cs", // 38 langCzech;
275 "sk", // 39 langSlovak;
276 "sl", // 40 langSlovenian;
277 "yi", // 41 langYiddish -Hebr; no region codes
278 "sr", // 42 langSerbian -Cyrl;
279 "mk", // 43 langMacedonian -Cyrl;
280 "bg", // 44 langBulgarian -Cyrl;
281 "uk", // 45 langUkrainian -Cyrl;
282 "be", // 46 langBelorussian -Cyrl;
9f29f3f8 283 "uz", // 47 langUzbek -Cyrl; also -Latn, -Arab
bd5b749c
A
284 "kk", // 48 langKazakh -Cyrl; no region codes; also -Latn, -Arab
285 "az-Cyrl", // 49 langAzerbaijani -Cyrl; no region codes # "az"
286 "az-Arab", // 50 langAzerbaijanAr -Arab; no region codes # "az"
287 "hy", // 51 langArmenian -Armn;
288 "ka", // 52 langGeorgian -Geor;
289 "mo", // 53 langMoldavian -Cyrl; no region codes
290 "ky", // 54 langKirghiz -Cyrl; no region codes; also -Latn, -Arab
9f29f3f8 291 "tg", // 55 langTajiki -Cyrl; no region codes; also -Latn, -Arab
bd5b749c
A
292 "tk-Cyrl", // 56 langTurkmen -Cyrl; no region codes; also -Latn, -Arab
293 "mn-Mong", // 57 langMongolian -Mong; no region codes # "mn"
9f29f3f8 294 "mn", // 58 langMongolianCyr -Cyrl; no region codes # "mn"
bd5b749c
A
295 "ps", // 59 langPashto -Arab; no region codes
296 "ku", // 60 langKurdish -Arab; no region codes
297 "ks", // 61 langKashmiri -Arab; no region codes
298 "sd", // 62 langSindhi -Arab; no region codes
299 "bo", // 63 langTibetan -Tibt;
300 "ne", // 64 langNepali -Deva;
301 "sa", // 65 langSanskrit -Deva; no region codes
302 "mr", // 66 langMarathi -Deva;
303 "bn", // 67 langBengali -Beng;
304 "as", // 68 langAssamese -Beng; no region codes
305 "gu", // 69 langGujarati -Gujr;
306 "pa", // 70 langPunjabi -Guru;
307 "or", // 71 langOriya -Orya; no region codes
308 "ml", // 72 langMalayalam -Mlym; no region codes
309 "kn", // 73 langKannada -Knda; no region codes
310 "ta", // 74 langTamil -Taml; no region codes
311 "te", // 75 langTelugu -Telu; no region codes
312 "si", // 76 langSinhalese -Sinh; no region codes
313 "my", // 77 langBurmese -Mymr; no region codes
314 "km", // 78 langKhmer -Khmr; no region codes
315 "lo", // 79 langLao -Laoo; no region codes
316 "vi", // 80 langVietnamese -Latn;
317 "id", // 81 langIndonesian -Latn; no region codes
9f29f3f8 318 "fil", // 82 langTagalog -Latn; no region codes
bd5b749c
A
319 "ms", // 83 langMalayRoman -Latn; no region codes # "ms"
320 "ms-Arab", // 84 langMalayArabic -Arab; no region codes # "ms"
321 "am", // 85 langAmharic -Ethi; no region codes
322 "ti", // 86 langTigrinya -Ethi; no region codes
323 "om", // 87 langOromo -Ethi; no region codes
324 "so", // 88 langSomali -Latn; no region codes
325 "sw", // 89 langSwahili -Latn; no region codes
326 "rw", // 90 langKinyarwanda -Latn; no region codes
327 "rn", // 91 langRundi -Latn; no region codes
328 "ny", // 92 langNyanja/Chewa -Latn; no region codes # ""
329 "mg", // 93 langMalagasy -Latn; no region codes
330 "eo", // 94 langEsperanto -Latn;
331 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 95 to 105 (gap)
332 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 106 to 116 (gap)
333 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 107 to 117 (gap)
334 "cy", // 128 langWelsh -Latn;
335 "eu", // 129 langBasque -Latn; no region codes
336 "ca", // 130 langCatalan -Latn;
337 "la", // 131 langLatin -Latn; no region codes
338 "qu", // 132 langQuechua -Latn; no region codes
339 "gn", // 133 langGuarani -Latn; no region codes
340 "ay", // 134 langAymara -Latn; no region codes
341 "tt-Cyrl", // 135 langTatar -Cyrl; no region codes
342 "ug", // 136 langUighur -Arab; no region codes
343 "dz", // 137 langDzongkha -Tibt;
344 "jv", // 138 langJavaneseRom -Latn; no region codes
345 "su", // 139 langSundaneseRom -Latn; no region codes
346 "gl", // 140 langGalician -Latn; no region codes
347 "af", // 141 langAfrikaans -Latn;
348 "br", // 142 langBreton -Latn;
349 "iu", // 143 langInuktitut -Cans;
350 "gd", // 144 langScottishGaelic;
351 "gv", // 145 langManxGaelic -Latn;
352 "ga-Latg", // 146 langIrishGaelicScript -Latn-dots; # "ga" // <xx>
353 "to", // 147 langTongan -Latn;
354 "grc", // 148 langGreekAncient -Grek-poly; # "el"
355 "kl", // 149 langGreenlandic -Latn;
9f29f3f8 356 "az", // 150 langAzerbaijanRoman -Latn; no region codes # "az"
bd5b749c
A
357 "nn", // 151 langNynorsk -Latn; # (no entry)
358};
359enum {
360 kNumLangCodeToLocaleString = sizeof(langCodeToLocaleString)/sizeof(char *)
361};
362
363static const KeyStringToResultString oldAppleLocaleToCanonical[] = {
364// Map obsolete/old-style Apple strings to canonical
365// Must be sorted according to how strcmp compares the strings in the first column
366//
367// non-canonical canonical [ comment ] # source/reason for non-canonical string
368// string string
369// ------------- ---------
370 { "Afrikaans", "af" }, // # __CFBundleLanguageNamesArray
371 { "Albanian", "sq" }, // # __CFBundleLanguageNamesArray
372 { "Amharic", "am" }, // # __CFBundleLanguageNamesArray
373 { "Arabic", "ar" }, // # __CFBundleLanguageNamesArray
374 { "Armenian", "hy" }, // # __CFBundleLanguageNamesArray
375 { "Assamese", "as" }, // # __CFBundleLanguageNamesArray
376 { "Aymara", "ay" }, // # __CFBundleLanguageNamesArray
377 { "Azerbaijani", "az" }, // -Arab,-Cyrl,-Latn? # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn")
378 { "Basque", "eu" }, // # __CFBundleLanguageNamesArray
379 { "Belarusian", "be" }, // # handle other names
380 { "Belorussian", "be" }, // # handle other names
381 { "Bengali", "bn" }, // # __CFBundleLanguageNamesArray
382 { "Brazilian Portugese", "pt-BR" }, // # from Installer.app Info.plist IFLanguages key, misspelled
383 { "Brazilian Portuguese", "pt-BR" }, // # correct spelling for above
384 { "Breton", "br" }, // # __CFBundleLanguageNamesArray
385 { "Bulgarian", "bg" }, // # __CFBundleLanguageNamesArray
386 { "Burmese", "my" }, // # __CFBundleLanguageNamesArray
387 { "Byelorussian", "be" }, // # __CFBundleLanguageNamesArray
388 { "Catalan", "ca" }, // # __CFBundleLanguageNamesArray
389 { "Chewa", "ny" }, // # handle other names
390 { "Chichewa", "ny" }, // # handle other names
391 { "Chinese", "zh" }, // -Hans,-Hant? # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans")
392 { "Chinese, Simplified", "zh-Hans" }, // # from Installer.app Info.plist IFLanguages key
393 { "Chinese, Traditional", "zh-Hant" }, // # correct spelling for below
394 { "Chinese, Tradtional", "zh-Hant" }, // # from Installer.app Info.plist IFLanguages key, misspelled
395 { "Croatian", "hr" }, // # __CFBundleLanguageNamesArray
396 { "Czech", "cs" }, // # __CFBundleLanguageNamesArray
397 { "Danish", "da" }, // # __CFBundleLanguageNamesArray
398 { "Dutch", "nl" }, // # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE")
399 { "Dzongkha", "dz" }, // # __CFBundleLanguageNamesArray
400 { "English", "en" }, // # __CFBundleLanguageNamesArray
401 { "Esperanto", "eo" }, // # __CFBundleLanguageNamesArray
402 { "Estonian", "et" }, // # __CFBundleLanguageNamesArray
403 { "Faroese", "fo" }, // # __CFBundleLanguageNamesArray
404 { "Farsi", "fa" }, // # __CFBundleLanguageNamesArray
405 { "Finnish", "fi" }, // # __CFBundleLanguageNamesArray
406 { "Flemish", "nl-BE" }, // # handle other names
407 { "French", "fr" }, // # __CFBundleLanguageNamesArray
408 { "Galician", "gl" }, // # __CFBundleLanguageNamesArray
409 { "Gallegan", "gl" }, // # handle other names
410 { "Georgian", "ka" }, // # __CFBundleLanguageNamesArray
411 { "German", "de" }, // # __CFBundleLanguageNamesArray
412 { "Greek", "el" }, // # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc")
413 { "Greenlandic", "kl" }, // # __CFBundleLanguageNamesArray
414 { "Guarani", "gn" }, // # __CFBundleLanguageNamesArray
415 { "Gujarati", "gu" }, // # __CFBundleLanguageNamesArray
416 { "Hawaiian", "haw" }, // # handle new languages
417 { "Hebrew", "he" }, // # __CFBundleLanguageNamesArray
418 { "Hindi", "hi" }, // # __CFBundleLanguageNamesArray
419 { "Hungarian", "hu" }, // # __CFBundleLanguageNamesArray
420 { "Icelandic", "is" }, // # __CFBundleLanguageNamesArray
421 { "Indonesian", "id" }, // # __CFBundleLanguageNamesArray
422 { "Inuktitut", "iu" }, // # __CFBundleLanguageNamesArray
423 { "Irish", "ga" }, // # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots")
424 { "Italian", "it" }, // # __CFBundleLanguageNamesArray
425 { "Japanese", "ja" }, // # __CFBundleLanguageNamesArray
426 { "Javanese", "jv" }, // # __CFBundleLanguageNamesArray
427 { "Kalaallisut", "kl" }, // # handle other names
428 { "Kannada", "kn" }, // # __CFBundleLanguageNamesArray
429 { "Kashmiri", "ks" }, // # __CFBundleLanguageNamesArray
430 { "Kazakh", "kk" }, // # __CFBundleLanguageNamesArray
431 { "Khmer", "km" }, // # __CFBundleLanguageNamesArray
432 { "Kinyarwanda", "rw" }, // # __CFBundleLanguageNamesArray
433 { "Kirghiz", "ky" }, // # __CFBundleLanguageNamesArray
434 { "Korean", "ko" }, // # __CFBundleLanguageNamesArray
435 { "Kurdish", "ku" }, // # __CFBundleLanguageNamesArray
436 { "Lao", "lo" }, // # __CFBundleLanguageNamesArray
437 { "Latin", "la" }, // # __CFBundleLanguageNamesArray
438 { "Latvian", "lv" }, // # __CFBundleLanguageNamesArray
439 { "Lithuanian", "lt" }, // # __CFBundleLanguageNamesArray
440 { "Macedonian", "mk" }, // # __CFBundleLanguageNamesArray
441 { "Malagasy", "mg" }, // # __CFBundleLanguageNamesArray
442 { "Malay", "ms" }, // -Latn,-Arab? # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab")
443 { "Malayalam", "ml" }, // # __CFBundleLanguageNamesArray
444 { "Maltese", "mt" }, // # __CFBundleLanguageNamesArray
445 { "Manx", "gv" }, // # __CFBundleLanguageNamesArray
446 { "Marathi", "mr" }, // # __CFBundleLanguageNamesArray
447 { "Moldavian", "mo" }, // # __CFBundleLanguageNamesArray
448 { "Mongolian", "mn" }, // -Mong,-Cyrl? # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl")
449 { "Nepali", "ne" }, // # __CFBundleLanguageNamesArray
450 { "Norwegian", "nb" }, // # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no")
451 { "Nyanja", "ny" }, // # __CFBundleLanguageNamesArray
452 { "Nynorsk", "nn" }, // # handle other names (no entry in __CFBundleLanguageNamesArray)
453 { "Oriya", "or" }, // # __CFBundleLanguageNamesArray
454 { "Oromo", "om" }, // # __CFBundleLanguageNamesArray
455 { "Panjabi", "pa" }, // # handle other names
456 { "Pashto", "ps" }, // # __CFBundleLanguageNamesArray
457 { "Persian", "fa" }, // # handle other names
458 { "Polish", "pl" }, // # __CFBundleLanguageNamesArray
459 { "Portuguese", "pt" }, // # __CFBundleLanguageNamesArray
460 { "Portuguese, Brazilian", "pt-BR" }, // # handle other names
461 { "Punjabi", "pa" }, // # __CFBundleLanguageNamesArray
462 { "Pushto", "ps" }, // # handle other names
463 { "Quechua", "qu" }, // # __CFBundleLanguageNamesArray
464 { "Romanian", "ro" }, // # __CFBundleLanguageNamesArray
465 { "Ruanda", "rw" }, // # handle other names
466 { "Rundi", "rn" }, // # __CFBundleLanguageNamesArray
467 { "Russian", "ru" }, // # __CFBundleLanguageNamesArray
468 { "Sami", "se" }, // # __CFBundleLanguageNamesArray
469 { "Sanskrit", "sa" }, // # __CFBundleLanguageNamesArray
470 { "Scottish", "gd" }, // # __CFBundleLanguageNamesArray
471 { "Serbian", "sr" }, // # __CFBundleLanguageNamesArray
472 { "Simplified Chinese", "zh-Hans" }, // # handle other names
473 { "Sindhi", "sd" }, // # __CFBundleLanguageNamesArray
474 { "Sinhalese", "si" }, // # __CFBundleLanguageNamesArray
475 { "Slovak", "sk" }, // # __CFBundleLanguageNamesArray
476 { "Slovenian", "sl" }, // # __CFBundleLanguageNamesArray
477 { "Somali", "so" }, // # __CFBundleLanguageNamesArray
478 { "Spanish", "es" }, // # __CFBundleLanguageNamesArray
479 { "Sundanese", "su" }, // # __CFBundleLanguageNamesArray
480 { "Swahili", "sw" }, // # __CFBundleLanguageNamesArray
481 { "Swedish", "sv" }, // # __CFBundleLanguageNamesArray
9f29f3f8 482 { "Tagalog", "fil" }, // # __CFBundleLanguageNamesArray
bd5b749c
A
483 { "Tajik", "tg" }, // # handle other names
484 { "Tajiki", "tg" }, // # __CFBundleLanguageNamesArray
485 { "Tamil", "ta" }, // # __CFBundleLanguageNamesArray
486 { "Tatar", "tt" }, // # __CFBundleLanguageNamesArray
487 { "Telugu", "te" }, // # __CFBundleLanguageNamesArray
488 { "Thai", "th" }, // # __CFBundleLanguageNamesArray
489 { "Tibetan", "bo" }, // # __CFBundleLanguageNamesArray
490 { "Tigrinya", "ti" }, // # __CFBundleLanguageNamesArray
491 { "Tongan", "to" }, // # __CFBundleLanguageNamesArray
492 { "Traditional Chinese", "zh-Hant" }, // # handle other names
493 { "Turkish", "tr" }, // # __CFBundleLanguageNamesArray
494 { "Turkmen", "tk" }, // # __CFBundleLanguageNamesArray
495 { "Uighur", "ug" }, // # __CFBundleLanguageNamesArray
496 { "Ukrainian", "uk" }, // # __CFBundleLanguageNamesArray
497 { "Urdu", "ur" }, // # __CFBundleLanguageNamesArray
498 { "Uzbek", "uz" }, // # __CFBundleLanguageNamesArray
499 { "Vietnamese", "vi" }, // # __CFBundleLanguageNamesArray
500 { "Welsh", "cy" }, // # __CFBundleLanguageNamesArray
501 { "Yiddish", "yi" }, // # __CFBundleLanguageNamesArray
502 { "ar_??", "ar" }, // # from old MapScriptInfoAndISOCodes
503 { "az.Ar", "az-Arab" }, // # from old LocaleRefGetPartString
504 { "az.Cy", "az-Cyrl" }, // # from old LocaleRefGetPartString
9f29f3f8 505 { "az.La", "az" }, // # from old LocaleRefGetPartString
bd5b749c
A
506 { "be_??", "be_BY" }, // # from old MapScriptInfoAndISOCodes
507 { "bn_??", "bn" }, // # from old LocaleRefGetPartString
508 { "bo_??", "bo" }, // # from old MapScriptInfoAndISOCodes
509 { "br_??", "br" }, // # from old MapScriptInfoAndISOCodes
510 { "cy_??", "cy" }, // # from old MapScriptInfoAndISOCodes
511 { "de-96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9>
512 { "de_96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9>
513 { "de_??", "de-1996" }, // # from old MapScriptInfoAndISOCodes
514 { "el.El-P", "grc" }, // # from old LocaleRefGetPartString
515 { "en-ascii", "en_001" }, // # from earlier version of tables in this file!
516 { "en_??", "en_001" }, // # from old MapScriptInfoAndISOCodes
517 { "eo_??", "eo" }, // # from old MapScriptInfoAndISOCodes
518 { "es_??", "es_419" }, // # from old MapScriptInfoAndISOCodes
519 { "es_XL", "es_419" }, // # from earlier version of tables in this file!
520 { "fr_??", "fr_001" }, // # from old MapScriptInfoAndISOCodes
521 { "ga-dots", "ga-Latg" }, // # from earlier version of tables in this file! // <1.8>
522 { "ga-dots_IE", "ga-Latg_IE" }, // # from earlier version of tables in this file! // <1.8>
523 { "ga.Lg", "ga-Latg" }, // # from old LocaleRefGetPartString // <1.8>
524 { "ga.Lg_IE", "ga-Latg_IE" }, // # from old LocaleRefGetPartString // <1.8>
525 { "gd_??", "gd" }, // # from old MapScriptInfoAndISOCodes
526 { "gv_??", "gv" }, // # from old MapScriptInfoAndISOCodes
527 { "jv.La", "jv" }, // # logical extension // <1.9>
528 { "jw.La", "jv" }, // # from old LocaleRefGetPartString
529 { "kk.Cy", "kk" }, // # from old LocaleRefGetPartString
530 { "kl.La", "kl" }, // # from old LocaleRefGetPartString
531 { "kl.La_GL", "kl_GL" }, // # from old LocaleRefGetPartString // <1.9>
532 { "lp_??", "se" }, // # from old MapScriptInfoAndISOCodes
533 { "mk_??", "mk_MK" }, // # from old MapScriptInfoAndISOCodes
9f29f3f8 534 { "mn.Cy", "mn" }, // # from old LocaleRefGetPartString
bd5b749c
A
535 { "mn.Mn", "mn-Mong" }, // # from old LocaleRefGetPartString
536 { "ms.Ar", "ms-Arab" }, // # from old LocaleRefGetPartString
537 { "ms.La", "ms" }, // # from old LocaleRefGetPartString
538 { "nl-be", "nl-BE" }, // # from old LocaleRefGetPartString
539 { "nl-be_BE", "nl_BE" }, // # from old LocaleRefGetPartString
cf7d2af9
A
540 { "no-NO", "nb-NO" }, // # not handled by localeStringPrefixToCanonical
541 { "no-NO_NO", "nb-NO_NO" }, // # not handled by localeStringPrefixToCanonical
bd5b749c
A
542// { "no-bok_NO", "nb_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
543// { "no-nyn_NO", "nn_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
544// { "nya", "ny" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
545 { "pa_??", "pa" }, // # from old LocaleRefGetPartString
546 { "sa.Dv", "sa" }, // # from old LocaleRefGetPartString
547 { "sl_??", "sl_SI" }, // # from old MapScriptInfoAndISOCodes
cf7d2af9 548 { "sr_??", "sr_RS" }, // # from old MapScriptInfoAndISOCodes // <1.18>
bd5b749c
A
549 { "su.La", "su" }, // # from old LocaleRefGetPartString
550 { "yi.He", "yi" }, // # from old LocaleRefGetPartString
551 { "zh-simp", "zh-Hans" }, // # from earlier version of tables in this file!
552 { "zh-trad", "zh-Hant" }, // # from earlier version of tables in this file!
553 { "zh.Ha-S", "zh-Hans" }, // # from old LocaleRefGetPartString
554 { "zh.Ha-S_CN", "zh_CN" }, // # from old LocaleRefGetPartString
555 { "zh.Ha-T", "zh-Hant" }, // # from old LocaleRefGetPartString
556 { "zh.Ha-T_TW", "zh_TW" }, // # from old LocaleRefGetPartString
557};
558enum {
559 kNumOldAppleLocaleToCanonical = sizeof(oldAppleLocaleToCanonical)/sizeof(KeyStringToResultString)
560};
561
562static const KeyStringToResultString localeStringPrefixToCanonical[] = {
563// Map 3-letter & obsolete ISO 639 codes, plus obsolete RFC 3066 codes, to 2-letter ISO 639 code.
564// (special cases for 'sh' handled separately)
565// First column must be all lowercase; must be sorted according to how strcmp compares the strings in the first column.
566//
567// non-canonical canonical [ comment ] # source/reason for non-canonical string
568// prefix prefix
569// ------------- ---------
570
9f29f3f8
A
571 { "aar", "aa" }, // Afar
572 // { "aa_SAAHO", "ssy" }, // Saho # deprecated/grandfathered, handled as a special case
573 { "abk", "ab" }, // Abkhazian
bd5b749c 574 { "afr", "af" }, // Afrikaans
9f29f3f8
A
575 { "aju", "jrb" }, // Moroccan Judeo-Arabic -> Judeo-Arabic (macrolang.)
576 { "aka", "ak" }, // Akan
bd5b749c 577 { "alb", "sq" }, // Albanian
9f29f3f8 578 { "als", "sq" }, // Tosk Albanian -> Albanian (macrolang.)
bd5b749c
A
579 { "amh", "am" }, // Amharic
580 { "ara", "ar" }, // Arabic
9f29f3f8
A
581 { "arb", "ar" }, // Std Arabic -> Arabic (macrolang.)
582 { "arg", "an" }, // Aragonese
bd5b749c 583 { "arm", "hy" }, // Armenian
9f29f3f8 584 { "art-lojban", "jbo" }, // Lojban # deprecated/grandfathered
bd5b749c 585 { "asm", "as" }, // Assamese
9f29f3f8
A
586 { "ava", "av" }, // Avaric
587 { "ave", "ae" }, // Avestan
bd5b749c 588 { "aym", "ay" }, // Aymara
9f29f3f8 589 { "ayr", "ay" }, // Central Aymara -> Aymara (macrolang.)
bd5b749c 590 { "aze", "az" }, // Azerbaijani
9f29f3f8
A
591 { "azj", "az" }, // N.Azerbaijani -> Azerbaijani (macrolang.)
592 { "bak", "ba" }, // Bashkir
593 { "bam", "bm" }, // Bambara
bd5b749c 594 { "baq", "eu" }, // Basque
9f29f3f8
A
595 { "bcc", "bal" }, // Balochi, Southern -> Baluchi (macrolang.)
596 { "bcl", "bik" }, // Bicolano, Central -> Bikol (macrolang.)
bd5b749c
A
597 { "bel", "be" }, // Belarusian
598 { "ben", "bn" }, // Bengali
599 { "bih", "bh" }, // Bihari
9f29f3f8 600 { "bis", "bi" }, // Bislama
bd5b749c
A
601 { "bod", "bo" }, // Tibetan
602 { "bos", "bs" }, // Bosnian
603 { "bre", "br" }, // Breton
604 { "bul", "bg" }, // Bulgarian
605 { "bur", "my" }, // Burmese
9f29f3f8
A
606 { "bxk", "luy" }, // Lubukusu -> Luyia (macrolang.)
607 { "bxr", "bua" }, // Buriat, Russia -> Buriat (macrolang.)
bd5b749c
A
608 { "cat", "ca" }, // Catalan
609 { "ces", "cs" }, // Czech
9f29f3f8 610 { "cha", "ch" }, // Chamorro
bd5b749c
A
611 { "che", "ce" }, // Chechen
612 { "chi", "zh" }, // Chinese
9f29f3f8
A
613 { "chu", "cu" }, // Church Slavic, Church Slavonic, Old Bulgarian, Old Church Slavonic, Old Slavonic
614 { "chv", "cv" }, // Chuvash
615 { "cld", "syr" }, // Chaldean Neo-Aramaic -> Syriac (macrolang.)
616 { "cmn", "zh" }, // Mandarin -> Chinese (macrolang.)
bd5b749c
A
617 { "cor", "kw" }, // Cornish
618 { "cos", "co" }, // Corsican
9f29f3f8
A
619 { "cre", "cr" }, // Cree
620 { "cwd", "cr" }, // Cree, Woods -> Cree (macrolang.)
bd5b749c
A
621 { "cym", "cy" }, // Welsh
622 { "cze", "cs" }, // Czech
623 { "dan", "da" }, // Danish
624 { "deu", "de" }, // German
9f29f3f8
A
625 { "dgo", "doi" }, // Dogri -> Dogri (macrolang.)
626 { "dhd", "mwr" }, // Dhundari -> Marwari (macrolang.)
627 { "dik", "din" }, // Southwestern Dinka -> Dinka (macrolang.)
628 { "diq", "zza" }, // Dimli -> Zaza (macrolang.)
629 { "div", "dv" }, // Dhivehi, Divehi, Maldivian
bd5b749c
A
630 { "dut", "nl" }, // Dutch
631 { "dzo", "dz" }, // Dzongkha
9f29f3f8 632 { "ekk", "et" }, // Std Estonian -> Estonian (macrolang.)
bd5b749c 633 { "ell", "el" }, // Greek, Modern (1453-)
9f29f3f8 634 { "emk", "man" }, // Maninkakan, Eastern -> Mandingo (macrolang.)
bd5b749c
A
635 { "eng", "en" }, // English
636 { "epo", "eo" }, // Esperanto
9f29f3f8 637 { "esk", "ik" }, // Northwest Alaska Inupiatun -> Inupiaq (macrolang.)
bd5b749c
A
638 { "est", "et" }, // Estonian
639 { "eus", "eu" }, // Basque
9f29f3f8 640 { "ewe", "ee" }, // Ewe
bd5b749c
A
641 { "fao", "fo" }, // Faroese
642 { "fas", "fa" }, // Persian
9f29f3f8
A
643 { "fat", "ak" }, // Fanti -> Akan (macrolang.)
644 { "fij", "fj" }, // Fijian
bd5b749c
A
645 { "fin", "fi" }, // Finnish
646 { "fra", "fr" }, // French
647 { "fre", "fr" }, // French
9f29f3f8
A
648 { "fry", "fy" }, // Western Frisian
649 { "fuc", "ff" }, // Pular -> Fulah (macrolang.)
650 { "ful", "ff" }, // Fulah
651 { "gaz", "om" }, // W.Central Oromo -> Oromo (macrolang.)
652 { "gbo", "grb" }, // Northern Grebo -> Grebo (macrolang.)
bd5b749c
A
653 { "geo", "ka" }, // Georgian
654 { "ger", "de" }, // German
655 { "gla", "gd" }, // Gaelic,Scottish
656 { "gle", "ga" }, // Irish
657 { "glg", "gl" }, // Gallegan
658 { "glv", "gv" }, // Manx
9f29f3f8 659 { "gno", "gon" }, // Northern Gondi -> Gondi (macrolang.)
bd5b749c
A
660 { "gre", "el" }, // Greek, Modern (1453-)
661 { "grn", "gn" }, // Guarani
9f29f3f8 662 { "gug", "gn" }, // Paraguayan Guarani -> Guarani (macrolang.)
bd5b749c 663 { "guj", "gu" }, // Gujarati
9f29f3f8
A
664 { "gya", "gba" }, // Northwest Gbaya -> Gbaya (Cent. Afr. Rep.) (macrolang.)
665 { "hat", "ht" }, // Haitian, Haitian Creole
666 { "hau", "ha" }, // Hausa
667 { "hbs", "sr_Latn" }, // Serbo-Croatian
668 { "hdn", "hai" }, // Northern Haida -> Haida (macrolang.)
669 { "hea", "hmn" }, // Northern Qiandong Miao -> Hmong (macrolang.)
bd5b749c 670 { "heb", "he" }, // Hebrew
9f29f3f8
A
671 { "her", "hz" }, // Herero
672 { "him", "srx" }, // Himachali -> Sirmauri (= Pahari, Himachali) (macrolang.)
bd5b749c 673 { "hin", "hi" }, // Hindi
9f29f3f8 674 { "hmo", "ho" }, // Hiri Motu
bd5b749c
A
675 { "hrv", "hr" }, // Croatian
676 { "hun", "hu" }, // Hungarian
677 { "hye", "hy" }, // Armenian
9f29f3f8
A
678 { "i-ami", "ami" }, // Amis # deprecated/grandfathered
679 { "i-bnn", "bnn" }, // Bunun # deprecated/grandfathered
680 { "i-hak", "hak" }, // Hakka # deprecated RFC 3066
681 { "i-klingon", "tlh" }, // Klingon # deprecated/grandfathered
bd5b749c
A
682 { "i-lux", "lb" }, // Luxembourgish # deprecated RFC 3066
683 { "i-navajo", "nv" }, // Navajo # deprecated RFC 3066
9f29f3f8
A
684 { "i-pwn", "pwn" }, // Paiwan # deprecated/grandfathered
685 { "i-tao", "tao" }, // Tao # deprecated/grandfathered
686 { "i-tay", "tay" }, // Tayal # deprecated/grandfathered
687 { "i-tsu", "tsu" }, // Tsou # deprecated/grandfathered
688 { "ibo", "ig" }, // Igbo
bd5b749c 689 { "ice", "is" }, // Icelandic
9f29f3f8
A
690 { "ido", "io" }, // Ido
691 { "iii", "ii" }, // Sichuan Yi, Nuosu
692 { "ike", "iu" }, // E.Canada Inuktitut -> Inuktitut (macrolang.)
bd5b749c
A
693 { "iku", "iu" }, // Inuktitut
694 { "ile", "ie" }, // Interlingue
695 { "in", "id" }, // Indonesian # deprecated 639 code in -> id (1989)
696 { "ina", "ia" }, // Interlingua
697 { "ind", "id" }, // Indonesian
9f29f3f8 698 { "ipk", "ik" }, // Inupiaq
bd5b749c
A
699 { "isl", "is" }, // Icelandic
700 { "ita", "it" }, // Italian
701 { "iw", "he" }, // Hebrew # deprecated 639 code iw -> he (1989)
702 { "jav", "jv" }, // Javanese
703 { "jaw", "jv" }, // Javanese # deprecated 639 code jaw -> jv (2001)
704 { "ji", "yi" }, // Yiddish # deprecated 639 code ji -> yi (1989)
705 { "jpn", "ja" }, // Japanese
9f29f3f8 706 { "jw", "jv" }, // Javanese # deprecated
bd5b749c
A
707 { "kal", "kl" }, // Kalaallisut
708 { "kan", "kn" }, // Kannada
709 { "kas", "ks" }, // Kashmiri
710 { "kat", "ka" }, // Georgian
9f29f3f8 711 { "kau", "kr" }, // Kanuri
bd5b749c 712 { "kaz", "kk" }, // Kazakh
9f29f3f8 713 { "khk", "mn" }, // Halh Mongolian [mainly Cyrl] -> Mongolian (macrolang.)
bd5b749c 714 { "khm", "km" }, // Khmer
9f29f3f8 715 { "kik", "ki" }, // Kikuyu, Gikuyu
bd5b749c
A
716 { "kin", "rw" }, // Kinyarwanda
717 { "kir", "ky" }, // Kirghiz
9f29f3f8
A
718 { "kmr", "ku" }, // Northern Kurdish -> Kurdish (macrolang.)
719 { "knc", "kr" }, // Central Kanuri -> Kanuri (macrolang.)
720 { "kng", "kg" }, // Koongo -> Kongo (macrolang.)
721 { "knn", "kok" }, // Konkani (individ.lang) -> Konkani (macrolang.)
722 { "kom", "kv" }, // Komi
723 { "kon", "kg" }, // Kongo
bd5b749c 724 { "kor", "ko" }, // Korean
9f29f3f8
A
725 { "kpv", "kv" }, // Komi-Zyrian -> Komi (macrolang.)
726 { "kua", "kj" }, // Kuanyama, Kwanyama
bd5b749c
A
727 { "kur", "ku" }, // Kurdish
728 { "lao", "lo" }, // Lao
729 { "lat", "la" }, // Latin
730 { "lav", "lv" }, // Latvian
9f29f3f8
A
731 { "lbk", "bnc" }, // Central Bontok -> Bontok (macrolang.)
732 { "lim", "li" }, // Limburgan, Limburger, Limburgish
733 { "lin", "ln" }, // Lingala
bd5b749c
A
734 { "lit", "lt" }, // Lithuanian
735 { "ltz", "lb" }, // Letzeburgesch
9f29f3f8
A
736 { "lub", "lu" }, // Luba-Katanga
737 { "lug", "lg" }, // Ganda
738 { "lvs", "lv" }, // Std Latvian -> Latvian (macrolang.)
bd5b749c
A
739 { "mac", "mk" }, // Macedonian
740 { "mal", "ml" }, // Malayalam
741 { "mar", "mr" }, // Marathi
742 { "may", "ms" }, // Malay
9f29f3f8 743 { "mhr", "chm" }, // Mari, Eastern -> Mari (Russia) (macrolang.)
bd5b749c
A
744 { "mkd", "mk" }, // Macedonian
745 { "mlg", "mg" }, // Malagasy
746 { "mlt", "mt" }, // Maltese
747 { "mol", "mo" }, // Moldavian
748 { "mon", "mn" }, // Mongolian
749 { "msa", "ms" }, // Malay
9f29f3f8 750 { "mup", "raj" }, // Malvi -> Rajasthani (macrolang.)
bd5b749c 751 { "mya", "my" }, // Burmese
9f29f3f8
A
752 { "nau", "na" }, // Nauru
753 { "nav", "nv" }, // Navajo, Navaho
754 { "nbl", "nr" }, // South Ndebele
755 { "nde", "nd" }, // North Ndebele
756 { "ndo", "ng" }, // Ndonga
bd5b749c
A
757 { "nep", "ne" }, // Nepali
758 { "nld", "nl" }, // Dutch
759 { "nno", "nn" }, // Norwegian Nynorsk
760 { "no", "nb" }, // Norwegian generic # ambiguous 639 code no -> nb
761 { "no-bok", "nb" }, // Norwegian Bokmal # deprecated RFC 3066 tag - used in old LocaleRefGetPartString
762 { "no-nyn", "nn" }, // Norwegian Nynorsk # deprecated RFC 3066 tag - used in old LocaleRefGetPartString
763 { "nob", "nb" }, // Norwegian Bokmal
764 { "nor", "nb" }, // Norwegian generic # ambiguous 639 code nor -> nb
9f29f3f8
A
765 // { "no_BOKMAL", "nb" }, // Norwegian Bokmal # deprecated/grandfathered, handled as a special case
766 // { "no_NYNORSK", "nn" }, // Norwegian Nynorsk # deprecated/grandfathered, handled as a special case
bd5b749c
A
767 { "nya", "ny" }, // Nyanja/Chewa/Chichewa # 3-letter code used in old LocaleRefGetPartString
768 { "oci", "oc" }, // Occitan/Provencal
9f29f3f8
A
769 { "ojg", "oj" }, // Ojibwa, Eastern -> Ojibwa (macrolang.)
770 { "oji", "oj" }, // Ojibwa
bd5b749c
A
771 { "ori", "or" }, // Oriya
772 { "orm", "om" }, // Oromo,Galla
9f29f3f8 773 { "oss", "os" }, // Ossetian, Ossetic
bd5b749c 774 { "pan", "pa" }, // Panjabi
9f29f3f8 775 { "pbu", "ps" }, // N.Pashto, -> Pushto (macrolang.)
bd5b749c 776 { "per", "fa" }, // Persian
9f29f3f8
A
777 { "pes", "fa" }, // W.Farsi -> Persian (macrolang.)
778 { "pli", "pi" }, // Pali
779 { "plt", "mg" }, // Plateau Malagasy -> Malagasy (macrolang.)
780 { "pnb", "lah" }, // W.Panjabi -> Lahnda (macrolang.)
bd5b749c
A
781 { "pol", "pl" }, // Polish
782 { "por", "pt" }, // Portuguese
783 { "pus", "ps" }, // Pushto
784 { "que", "qu" }, // Quechua
9f29f3f8
A
785 { "qxp", "qu" }, // Puno Quechua -> Quechua (macrolang.)
786 { "rmy", "rom" }, // Vlax Romani -> Romany (macrolang.)
bd5b749c
A
787 { "roh", "rm" }, // Raeto-Romance
788 { "ron", "ro" }, // Romanian
789 { "rum", "ro" }, // Romanian
790 { "run", "rn" }, // Rundi
791 { "rus", "ru" }, // Russian
9f29f3f8 792 { "sag", "sg" }, // Sango
bd5b749c
A
793 { "san", "sa" }, // Sanskrit
794 { "scc", "sr" }, // Serbian
795 { "scr", "hr" }, // Croatian
9f29f3f8
A
796 { "sgn-be-fr", "sfb" }, // Belgian-French Sign Lang. # deprecated/grandfathered
797 { "sgn-be-nl", "vgt" }, // Belgian-Flemish Sign Lang. # deprecated/grandfathered
798 { "sgn-ch-de", "sgg" }, // Swiss German Sign Lang. # deprecated/grandfathered
bd5b749c
A
799 { "sin", "si" }, // Sinhalese
800 { "slk", "sk" }, // Slovak
801 { "slo", "sk" }, // Slovak
802 { "slv", "sl" }, // Slovenian
803 { "sme", "se" }, // Sami,Northern
9f29f3f8
A
804 { "smo", "sm" }, // Samoan
805 { "sna", "sn" }, // Shona
bd5b749c
A
806 { "snd", "sd" }, // Sindhi
807 { "som", "so" }, // Somali
9f29f3f8 808 { "sot", "st" }, // Southern Sotho
bd5b749c 809 { "spa", "es" }, // Spanish
9f29f3f8 810 { "spy", "kln" }, // Sabaot -> Kalenjin (macrolang.)
bd5b749c 811 { "sqi", "sq" }, // Albanian
9f29f3f8
A
812 { "src", "sc" }, // Sardinian, Logudorese -> Sardinian (macrolang.)
813 { "srd", "sc" }, // Sardinian
bd5b749c 814 { "srp", "sr" }, // Serbian
9f29f3f8 815 { "ssw", "ss" }, // Swati
bd5b749c
A
816 { "sun", "su" }, // Sundanese
817 { "swa", "sw" }, // Swahili
818 { "swe", "sv" }, // Swedish
9f29f3f8
A
819 { "swh", "sw" }, // Swahili (individ.lang) -> Swahili (macrolang.)
820 { "tah", "ty" }, // Tahitian
bd5b749c
A
821 { "tam", "ta" }, // Tamil
822 { "tat", "tt" }, // Tatar
823 { "tel", "te" }, // Telugu
824 { "tgk", "tg" }, // Tajik
d7384798 825 { "tgl", "fil" }, // Tagalog
bd5b749c
A
826 { "tha", "th" }, // Thai
827 { "tib", "bo" }, // Tibetan
828 { "tir", "ti" }, // Tigrinya
9f29f3f8 829 { "tl", "fil" }, // Tagalog # legacy
bd5b749c 830 { "ton", "to" }, // Tongan
9f29f3f8
A
831 { "tsn", "tn" }, // Tswana
832 { "tso", "ts" }, // Tsonga
833 { "ttq", "tmh" }, // Tamajaq, Tawallammat -> Tamashek (macrolang.)
bd5b749c
A
834 { "tuk", "tk" }, // Turkmen
835 { "tur", "tr" }, // Turkish
9f29f3f8
A
836 { "tw", "ak" }, // Twi -> Akan (macrolang.)
837 { "twi", "ak" }, // Twi
bd5b749c
A
838 { "uig", "ug" }, // Uighur
839 { "ukr", "uk" }, // Ukrainian
9f29f3f8 840 { "umu", "del" }, // Munsee -> Delaware (macrolang.)
bd5b749c
A
841 { "urd", "ur" }, // Urdu
842 { "uzb", "uz" }, // Uzbek
9f29f3f8
A
843 { "uzn", "uz" }, // N. Uzbek -> Uzbek (macrolang.)
844 { "ven", "ve" }, // Venda
bd5b749c 845 { "vie", "vi" }, // Vietnamese
9f29f3f8 846 { "vol", "vo" }, // Volapük
bd5b749c 847 { "wel", "cy" }, // Welsh
9f29f3f8
A
848 { "wln", "wa" }, // Walloon
849 { "wol", "wo" }, // Wolof
850 { "xho", "xh" }, // Xhosa
851 { "xpe", "kpe" }, // Kpelle, Liberia -> Kpelle (macrolang.)
852 { "xsl", "den" }, // Slavey, South -> Slave (Athapascan) (macrolang.)
853 { "ydd", "yi" }, // Yiddish,E. -> Yiddish (macrolang.)
bd5b749c 854 { "yid", "yi" }, // Yiddish
9f29f3f8
A
855 { "yor", "yo" }, // Yoruba
856 { "zai", "zap" }, // Zapotec, Isthmus -> Zapotec (macrolang.)
857 { "zh-cdo", "cdo" }, // Chinese, Min Dong # extlang
858 { "zh-cjy", "cjy" }, // Chinese, Jinyu # extlang
859 { "zh-cmn", "zh" }, // Chinese, Mandarin # extlang
860 { "zh-cpx", "cpx" }, // Chinese, Pu-Xian # extlang
861 { "zh-czh", "czh" }, // Chinese, Huizhou # extlang
862 { "zh-czo", "czo" }, // Chinese, Min Zhong # extlang
863 { "zh-gan", "gan" }, // Chinese, Gan # extlang
864 { "zh-guoyu", "zh" }, // Mandarin/Std Chinese # deprecated
865 { "zh-hak", "hak" }, // Chinese, Hakka # extlang
866 { "zh-hakka", "hak" }, // Hakka # deprecated
867 { "zh-hsn", "hsn" }, // Chinese, Xiang # extlang
868 { "zh-min-nan", "nan" }, // Minnan,Hokkien,Taiwanese,So. Fujian # deprecated
869 { "zh-mnp", "mnp" }, // Chinese, Min Bei # extlang
870 { "zh-nan", "nan" }, // Chinese, Min Nan # extlang
871 { "zh-wuu", "wuu" }, // Chinese, Wu # extlang
872 { "zh-xiang", "hsn" }, // Xiang/Hunanese # deprecated
873 { "zh-yue", "yue" }, // Chinese, Yue # extlang
874 { "zha", "za" }, // Zhuang, Chuang
bd5b749c 875 { "zho", "zh" }, // Chinese
9f29f3f8
A
876 { "zsm", "ms" }, // Std Malay -> Malay (macrolang.)
877 { "zul", "zu" }, // Zulu
878 { "zyb", "za" }, // Yongbei Zhuang -> Zhuang (macrolang.)
bd5b749c
A
879};
880enum {
881 kNumLocaleStringPrefixToCanonical = sizeof(localeStringPrefixToCanonical)/sizeof(KeyStringToResultString)
882};
883
884
885static const SpecialCaseUpdates specialCases[] = {
886// Data for special cases
887// a) The 3166 code CS was used for Czechoslovakia until 1993, when that country split and the code was
888// replaced by CZ and SK. Then in 2003-07, the code YU (formerly designating all of Yugoslavia, then after
cf7d2af9
A
889// the 1990s breakup just designating what is now Serbia and Montenegro) was changed to CS! Then after
890// Serbia and Montenegro split, the code CS was replaced in 2006-09 with separate codes RS and ME. If we
891// see CS but a language of cs or sk, we change CS to CZ or SK. Otherwise, we change CS (and old YU) to RS.
bd5b749c
A
892// b) The 639 code sh for Serbo-Croatian was also replaced in the 1990s by separate codes hr and sr, and
893// deprecated in 2000. We guess which one to map it to as follows: If there is a region tag of HR we use
cf7d2af9 894// hr; if there is a region tag of (now) RS we use sr; else we do not change it (not enough info).
bd5b749c
A
895// c) There are other codes that have been updated without these issues (eg. TP to TL), plus among the
896// "exceptionally reserved" codes some are just alternates for standard codes (eg. UK for GB).
897 { NULL, "-UK", "GB", NULL, NULL }, // always change UK to GB (UK is "exceptionally reserved" to mean GB)
898 { NULL, "-TP", "TL", NULL, NULL }, // always change TP to TL (East Timor, code changed 2002-05)
899 { "cs", "-CS", "CZ", NULL, NULL }, // if language is cs, change CS (pre-1993 Czechoslovakia) to CZ (Czech Republic)
900 { "sk", "-CS", "SK", NULL, NULL }, // if language is sk, change CS (pre-1993 Czechoslovakia) to SK (Slovakia)
cf7d2af9
A
901 { NULL, "-CS", "RS", NULL, NULL }, // otherwise map CS (assume Serbia+Montenegro) to RS (Serbia)
902 { NULL, "-YU", "RS", NULL, NULL }, // also map old YU (assume Serbia+Montenegro) to RS (Serbia)
903 { "sh", "-HR", "hr", "-RS", "sr" }, // then if language is old 'sh' (SerboCroatian), change it to 'hr' (Croatian)
904 // if we find HR (Croatia) or to 'sr' (Serbian) if we find RS (Serbia).
905 // Note: Do this after changing YU/CS toRS as above.
bd5b749c
A
906 { NULL, NULL, NULL, NULL, NULL } // terminator
907};
908
909
910static const KeyStringToResultString localeStringRegionToDefaults[] = {
911// For some region-code suffixes, there are default substrings to strip off for canonical string.
912// Must be sorted according to how strcmp compares the strings in the first column
913//
914// region default writing
915// suffix system tags, strip comment
916// -------- ------------- ---------
917 { "_CN", "-Hans" }, // mainland China, default is simplified
918 { "_HK", "-Hant" }, // Hong Kong, default is traditional
919 { "_MO", "-Hant" }, // Macao, default is traditional
920 { "_SG", "-Hans" }, // Singapore, default is simplified
921 { "_TW", "-Hant" }, // Taiwan, default is traditional
922};
923enum {
924 kNumLocaleStringRegionToDefaults = sizeof(localeStringRegionToDefaults)/sizeof(KeyStringToResultString)
925};
926
927static const KeyStringToResultString localeStringPrefixToDefaults[] = {
928// For some initial portions of language tag, there are default substrings to strip off for canonical string.
929// Must be sorted according to how strcmp compares the strings in the first column
930//
931// language default writing
932// tag prefix system tags, strip comment
933// -------- ------------- ---------
934 { "ab-", "-Cyrl" }, // Abkhazian
935 { "af-", "-Latn" }, // Afrikaans
9f29f3f8
A
936 { "agq-", "-Latn" }, // Aghem
937 { "ak-", "-Latn" }, // Akan
bd5b749c
A
938 { "am-", "-Ethi" }, // Amharic
939 { "ar-", "-Arab" }, // Arabic
940 { "as-", "-Beng" }, // Assamese
9f29f3f8 941 { "asa-", "-Latn" }, // Asu
bd5b749c 942 { "ay-", "-Latn" }, // Aymara
9f29f3f8
A
943 { "az-", "-Latn" }, // Azerbaijani
944 { "bas-", "-Latn" }, // Basaa
bd5b749c 945 { "be-", "-Cyrl" }, // Belarusian
9f29f3f8
A
946 { "bem-", "-Latn" }, // Bemba
947 { "bez-", "-Latn" }, // Bena
bd5b749c 948 { "bg-", "-Cyrl" }, // Bulgarian
9f29f3f8 949 { "bm-", "-Latn" }, // Bambara
bd5b749c
A
950 { "bn-", "-Beng" }, // Bengali
951 { "bo-", "-Tibt" }, // Tibetan (? not Suppress-Script)
952 { "br-", "-Latn" }, // Breton (? not Suppress-Script)
9f29f3f8 953 { "brx-", "-Deva" }, // Bodo
bd5b749c
A
954 { "bs-", "-Latn" }, // Bosnian
955 { "ca-", "-Latn" }, // Catalan
9f29f3f8
A
956 { "cgg-", "-Latn" }, // Chiga
957 { "chr-", "-Cher" }, // Cherokee
bd5b749c
A
958 { "cs-", "-Latn" }, // Czech
959 { "cy-", "-Latn" }, // Welsh
960 { "da-", "-Latn" }, // Danish
9f29f3f8 961 { "dav-", "-Latn" }, // Taita
bd5b749c 962 { "de-", "-Latn -1901" }, // German, traditional orthography
9f29f3f8
A
963 { "dje-", "-Latn" }, // Zarma
964 { "dua-", "-Latn" }, // Duala
bd5b749c 965 { "dv-", "-Thaa" }, // Divehi/Maldivian
9f29f3f8 966 { "dyo-", "-Latn" }, // Jola-Fonyi
bd5b749c 967 { "dz-", "-Tibt" }, // Dzongkha
9f29f3f8
A
968 { "ebu-", "-Latn" }, // Embu
969 { "ee-", "-Latn" }, // Ewe
bd5b749c
A
970 { "el-", "-Grek" }, // Greek (modern, monotonic)
971 { "en-", "-Latn" }, // English
972 { "eo-", "-Latn" }, // Esperanto
973 { "es-", "-Latn" }, // Spanish
974 { "et-", "-Latn" }, // Estonian
975 { "eu-", "-Latn" }, // Basque
9f29f3f8 976 { "ewo-", "-Latn" }, // Ewondo
bd5b749c 977 { "fa-", "-Arab" }, // Farsi
9f29f3f8 978 { "ff-", "-Latn" }, // Fulah
bd5b749c 979 { "fi-", "-Latn" }, // Finnish
9f29f3f8 980 { "fil-", "-Latn" }, // Tagalog
bd5b749c
A
981 { "fo-", "-Latn" }, // Faroese
982 { "fr-", "-Latn" }, // French
983 { "ga-", "-Latn" }, // Irish
984 { "gd-", "-Latn" }, // Scottish Gaelic (? not Suppress-Script)
985 { "gl-", "-Latn" }, // Galician
986 { "gn-", "-Latn" }, // Guarani
9f29f3f8 987 { "gsw-", "-Latn" }, // Swiss German
bd5b749c 988 { "gu-", "-Gujr" }, // Gujarati
9f29f3f8 989 { "guz-", "-Latn" }, // Gusii
bd5b749c 990 { "gv-", "-Latn" }, // Manx
9f29f3f8 991 { "ha-", "-Latn" }, // Hausa
bd5b749c
A
992 { "haw-", "-Latn" }, // Hawaiian (? not Suppress-Script)
993 { "he-", "-Hebr" }, // Hebrew
994 { "hi-", "-Deva" }, // Hindi
995 { "hr-", "-Latn" }, // Croatian
996 { "hu-", "-Latn" }, // Hungarian
997 { "hy-", "-Armn" }, // Armenian
998 { "id-", "-Latn" }, // Indonesian
9f29f3f8
A
999 { "ig-", "-Latn" }, // Igbo
1000 { "ii-", "-Yiii" }, // Sichuan Yi
bd5b749c
A
1001 { "is-", "-Latn" }, // Icelandic
1002 { "it-", "-Latn" }, // Italian
1003 { "ja-", "-Jpan" }, // Japanese
9f29f3f8 1004 { "jmc-", "-Latn" }, // Machame
bd5b749c 1005 { "ka-", "-Geor" }, // Georgian
9f29f3f8
A
1006 { "kab-", "-Latn" }, // Kabyle
1007 { "kam-", "-Latn" }, // Kamba
1008 { "kde-", "-Latn" }, // Makonde
1009 { "kea-", "-Latn" }, // Kabuverdianu
1010 { "khq-", "-Latn" }, // Koyra Chiini
1011 { "ki-", "-Latn" }, // Kikuyu
bd5b749c
A
1012 { "kk-", "-Cyrl" }, // Kazakh
1013 { "kl-", "-Latn" }, // Kalaallisut/Greenlandic
1014 { "km-", "-Khmr" }, // Central Khmer
1015 { "kn-", "-Knda" }, // Kannada
1016 { "ko-", "-Hang" }, // Korean (? not Suppress-Script)
1017 { "kok-", "-Deva" }, // Konkani
9f29f3f8
A
1018 { "ksb-", "-Latn" }, // Shambala
1019 { "ksf-", "-Latn" }, // Bafia
1020 { "kw-", "-Latn" }, // Cornish
1021 { "ky-", "-Cyrl" }, // Kirghiz
bd5b749c 1022 { "la-", "-Latn" }, // Latin
9f29f3f8 1023 { "lag-", "-Latn" }, // Langi
bd5b749c 1024 { "lb-", "-Latn" }, // Luxembourgish
9f29f3f8
A
1025 { "lg-", "-Latn" }, // Ganda
1026 { "ln-", "-Latn" }, // Lingala
bd5b749c
A
1027 { "lo-", "-Laoo" }, // Lao
1028 { "lt-", "-Latn" }, // Lithuanian
9f29f3f8
A
1029 { "lu-", "-Latn" }, // Luba-Katanga
1030 { "luo-", "-Latn" }, // Luo
1031 { "luy-", "-Latn" }, // Luyia
bd5b749c 1032 { "lv-", "-Latn" }, // Latvian
9f29f3f8
A
1033 { "mas-", "-Latn" }, // Masai
1034 { "mer-", "-Latn" }, // Meru
1035 { "mfe-", "-Latn" }, // Morisyen
bd5b749c 1036 { "mg-", "-Latn" }, // Malagasy
9f29f3f8 1037 { "mgh-", "-Latn" }, // Makhuwa-Meetto
bd5b749c
A
1038 { "mk-", "-Cyrl" }, // Macedonian
1039 { "ml-", "-Mlym" }, // Malayalam
9f29f3f8 1040 { "mn-", "-Cyrl" }, // Mongolian
bd5b749c
A
1041 { "mo-", "-Latn" }, // Moldavian
1042 { "mr-", "-Deva" }, // Marathi
1043 { "ms-", "-Latn" }, // Malay
1044 { "mt-", "-Latn" }, // Maltese
9f29f3f8 1045 { "mua-", "-Latn" }, // Mundang
bd5b749c 1046 { "my-", "-Mymr" }, // Burmese/Myanmar
9f29f3f8 1047 { "naq-", "-Latn" }, // Nama
bd5b749c 1048 { "nb-", "-Latn" }, // Norwegian Bokmal
9f29f3f8 1049 { "nd-", "-Latn" }, // North Ndebele
bd5b749c
A
1050 { "ne-", "-Deva" }, // Nepali
1051 { "nl-", "-Latn" }, // Dutch
9f29f3f8 1052 { "nmg-", "-Latn" }, // Kwasio
bd5b749c 1053 { "nn-", "-Latn" }, // Norwegian Nynorsk
9f29f3f8 1054 { "nus-", "-Latn" }, // Nuer
bd5b749c 1055 { "ny-", "-Latn" }, // Chichewa/Nyanja
9f29f3f8 1056 { "nyn-", "-Latn" }, // Nyankole
bd5b749c
A
1057 { "om-", "-Latn" }, // Oromo
1058 { "or-", "-Orya" }, // Oriya
1059 { "pa-", "-Guru" }, // Punjabi
1060 { "pl-", "-Latn" }, // Polish
1061 { "ps-", "-Arab" }, // Pushto
1062 { "pt-", "-Latn" }, // Portuguese
1063 { "qu-", "-Latn" }, // Quechua
9f29f3f8 1064 { "rm-", "-Latn" }, // Romansh
bd5b749c
A
1065 { "rn-", "-Latn" }, // Rundi
1066 { "ro-", "-Latn" }, // Romanian
9f29f3f8 1067 { "rof-", "-Latn" }, // Rombo
bd5b749c
A
1068 { "ru-", "-Cyrl" }, // Russian
1069 { "rw-", "-Latn" }, // Kinyarwanda
9f29f3f8 1070 { "rwk-", "-Latn" }, // Rwa
bd5b749c 1071 { "sa-", "-Deva" }, // Sanskrit (? not Suppress-Script)
9f29f3f8
A
1072 { "saq-", "-Latn" }, // Samburu
1073 { "sbp-", "-Latn" }, // Sangu
bd5b749c 1074 { "se-", "-Latn" }, // Sami (? not Suppress-Script)
9f29f3f8
A
1075 { "seh-", "-Latn" }, // Sena
1076 { "ses-", "-Latn" }, // Koyraboro Senni
1077 { "sg-", "-Latn" }, // Sango
1078 { "shi-", "-Latn" }, // Tachelhit
bd5b749c
A
1079 { "si-", "-Sinh" }, // Sinhala
1080 { "sk-", "-Latn" }, // Slovak
1081 { "sl-", "-Latn" }, // Slovenian
9f29f3f8 1082 { "sn-", "-Latn" }, // Shona
bd5b749c
A
1083 { "so-", "-Latn" }, // Somali
1084 { "sq-", "-Latn" }, // Albanian
9f29f3f8 1085 { "sr-", "-Cyrl" }, // Serbian
bd5b749c
A
1086 { "sv-", "-Latn" }, // Swedish
1087 { "sw-", "-Latn" }, // Swahili
9f29f3f8 1088 { "swc-", "-Latn" }, // Congo Swahili
bd5b749c
A
1089 { "ta-", "-Taml" }, // Tamil
1090 { "te-", "-Telu" }, // Telugu
9f29f3f8
A
1091 { "teo-", "-Latn" }, // Teso
1092 { "tg-", "-Cyrl" }, // Tajik
bd5b749c
A
1093 { "th-", "-Thai" }, // Thai
1094 { "ti-", "-Ethi" }, // Tigrinya
9f29f3f8 1095 { "tk-", "-Latn" }, // Turkmen
bd5b749c
A
1096 { "tn-", "-Latn" }, // Tswana
1097 { "to-", "-Latn" }, // Tonga of Tonga Islands
1098 { "tr-", "-Latn" }, // Turkish
9f29f3f8
A
1099 { "twq-", "-Latn" }, // Tasawaq
1100 { "tzm-", "-Latn" }, // Central Morocco Tamazight
bd5b749c
A
1101 { "uk-", "-Cyrl" }, // Ukrainian
1102 { "ur-", "-Arab" }, // Urdu
9f29f3f8
A
1103 { "uz-", "-Cyrl" }, // Uzbek
1104 { "vai-", "-Vaii" }, // Vai
bd5b749c 1105 { "vi-", "-Latn" }, // Vietnamese
9f29f3f8 1106 { "vun-", "-Latn" }, // Vunjo
bd5b749c
A
1107 { "wo-", "-Latn" }, // Wolof
1108 { "xh-", "-Latn" }, // Xhosa
9f29f3f8
A
1109 { "xog-", "-Latn" }, // Soga
1110 { "yav-", "-Latn" }, // Yangben
bd5b749c 1111 { "yi-", "-Hebr" }, // Yiddish
9f29f3f8 1112 { "yo-", "-Latn" }, // Yoruba
bd5b749c
A
1113 { "zh-", "-Hani" }, // Chinese (? not Suppress-Script)
1114 { "zu-", "-Latn" }, // Zulu
1115};
1116enum {
1117 kNumLocaleStringPrefixToDefaults = sizeof(localeStringPrefixToDefaults)/sizeof(KeyStringToResultString)
1118};
1119
1120static const KeyStringToResultString appleLocaleToLanguageString[] = {
1121// Map locale strings that Apple uses as language IDs to real language strings.
1122// Must be sorted according to how strcmp compares the strings in the first column.
1123// Note: Now we remove all transforms of the form ll_RR -> ll-RR, they are now
1124// handled in the code. <1.19>
1125//
1126// locale lang [ comment ]
1127// string string
1128// ------- -------
1129 { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752]
1130 { "zh_CN", "zh-Hans" }, // mainland China => simplified
d7384798
A
1131 { "zh_HK", "zh-HK" }, // Hong Kong => traditional, not currently used
1132 { "zh_MO", "zh-MO" }, // Macao => traditional, not currently used
1133 { "zh_SG", "zh-SG" }, // Singapore => simplified, not currently used
bd5b749c
A
1134 { "zh_TW", "zh-Hant" }, // Taiwan => traditional
1135};
1136enum {
1137 kNumAppleLocaleToLanguageString = sizeof(appleLocaleToLanguageString)/sizeof(KeyStringToResultString)
1138};
1139
d7384798 1140/*
bd5b749c
A
1141static const KeyStringToResultString appleLocaleToLanguageStringForCFBundle[] = {
1142// Map locale strings that Apple uses as language IDs to real language strings.
1143// Must be sorted according to how strcmp compares the strings in the first column.
1144//
1145// locale lang [ comment ]
1146// string string
1147// ------- -------
1148 { "de_AT", "de-AT" }, // Austrian German
1149 { "de_CH", "de-CH" }, // Swiss German
1150// { "de_DE", "de-DE" }, // German for Germany (default), not currently used
1151 { "en_AU", "en-AU" }, // Australian English
1152 { "en_CA", "en-CA" }, // Canadian English
1153 { "en_GB", "en-GB" }, // British English
1154// { "en_IE", "en-IE" }, // Irish English, not currently used
1155 { "en_US", "en-US" }, // U.S. English
1156 { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752]
1157// { "fr_BE", "fr-BE" }, // Belgian French, not currently used
1158 { "fr_CA", "fr-CA" }, // Canadian French
1159 { "fr_CH", "fr-CH" }, // Swiss French
1160// { "fr_FR", "fr-FR" }, // French for France (default), not currently used
1161 { "nl_BE", "nl-BE" }, // Flemish = Vlaams, Dutch for Belgium
1162// { "nl_NL", "nl-NL" }, // Dutch for Netherlands (default), not currently used
1163 { "pt_BR", "pt-BR" }, // Brazilian Portuguese
1164 { "pt_PT", "pt-PT" }, // Portuguese for Portugal
1165 { "zh_CN", "zh-Hans" }, // mainland China => simplified
1166 { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used
1167 { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used
1168 { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used
1169 { "zh_TW", "zh-Hant" }, // Taiwan => traditional
1170};
1171enum {
1172 kNumAppleLocaleToLanguageStringForCFBundle = sizeof(appleLocaleToLanguageStringForCFBundle)/sizeof(KeyStringToResultString)
1173};
d7384798 1174 */
bd5b749c 1175
d7384798 1176#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
bd5b749c
A
1177
1178struct LocaleToLegacyCodes {
1179 const char * locale; // reduced to language plus one other component (script, region, variant), separators normalized to'_'
1180 RegionCode regCode;
1181 LangCode langCode;
1182 CFStringEncoding encoding;
1183};
1184typedef struct LocaleToLegacyCodes LocaleToLegacyCodes;
1185
1186static const LocaleToLegacyCodes localeToLegacyCodes[] = {
1187 // locale RegionCode LangCode CFStringEncoding
1188 { "af"/*ZA*/, 102/*verAfrikaans*/, 141/*langAfrikaans*/, 0/*Roman*/ }, // Latn
1189 { "am", -1, 85/*langAmharic*/, 28/*Ethiopic*/ }, // Ethi
1190 { "ar", 16/*verArabic*/, 12/*langArabic*/, 4/*Arabic*/ }, // Arab;
1191 { "as", -1, 68/*langAssamese*/, 13/*Bengali*/ }, // Beng;
1192 { "ay", -1, 134/*langAymara*/, 0/*Roman*/ }, // Latn;
9f29f3f8 1193 { "az", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // "az" defaults to -Latn
bd5b749c
A
1194 { "az_Arab", -1, 50/*langAzerbaijanAr*/, 4/*Arabic*/ }, // Arab;
1195 { "az_Cyrl", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // Cyrl;
1196 { "az_Latn", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // Latn;
1197 { "be"/*BY*/, 61/*verBelarus*/, 46/*langBelorussian*/, 7/*Cyrillic*/ }, // Cyrl;
1198 { "bg"/*BG*/, 72/*verBulgaria*/, 44/*langBulgarian*/, 7/*Cyrillic*/ }, // Cyrl;
1199 { "bn", 60/*verBengali*/, 67/*langBengali*/, 13/*Bengali*/ }, // Beng;
1200 { "bo", 105/*verTibetan*/, 63/*langTibetan*/, 26/*Tibetan*/ }, // Tibt;
1201 { "br", 77/*verBreton*/, 142/*langBreton*/, 39/*Celtic*/ }, // Latn;
1202 { "ca"/*ES*/, 73/*verCatalonia*/, 130/*langCatalan*/, 0/*Roman*/ }, // Latn;
1203 { "cs"/*CZ*/, 56/*verCzech*/, 38/*langCzech*/, 29/*CentralEurRoman*/ }, // Latn;
1204 { "cy", 79/*verWelsh*/, 128/*langWelsh*/, 39/*Celtic*/ }, // Latn;
1205 { "da"/*DK*/, 9/*verDenmark*/, 7/*langDanish*/, 0/*Roman*/ }, // Latn;
1206 { "de", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, // assume "de" defaults to verGermany
1207 { "de_1996", 70/*verGermanReformed*/, 2/*langGerman*/, 0/*Roman*/ },
1208 { "de_AT", 92/*verAustria*/, 2/*langGerman*/, 0/*Roman*/ },
1209 { "de_CH", 19/*verGrSwiss*/, 2/*langGerman*/, 0/*Roman*/ },
1210 { "de_DE", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ },
1211 { "dz"/*BT*/, 83/*verBhutan*/, 137/*langDzongkha*/, 26/*Tibetan*/ }, // Tibt;
1212 { "el", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // assume "el" defaults to verGreece
1213 { "el_CY", 23/*verCyprus*/, 14/*langGreek*/, 6/*Greek*/ },
1214 { "el_GR", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // modern monotonic
1215 { "en", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, // "en" defaults to verUS (per Chris Hansten)
1216 { "en_001", 37/*verInternational*/, 0/*langEnglish*/, 0/*Roman*/ },
1217 { "en_AU", 15/*verAustralia*/, 0/*langEnglish*/, 0/*Roman*/ },
1218 { "en_CA", 82/*verEngCanada*/, 0/*langEnglish*/, 0/*Roman*/ },
1219 { "en_GB", 2/*verBritain*/, 0/*langEnglish*/, 0/*Roman*/ },
1220 { "en_IE", 108/*verIrelandEnglish*/, 0/*langEnglish*/, 0/*Roman*/ },
1221 { "en_SG", 100/*verSingapore*/, 0/*langEnglish*/, 0/*Roman*/ },
1222 { "en_US", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ },
1223 { "eo", 103/*verEsperanto*/, 94/*langEsperanto*/, 0/*Roman*/ }, // Latn;
1224 { "es", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, // "es" defaults to verSpain (per Chris Hansten)
1225 { "es_419", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, // new BCP 47 tag
1226 { "es_ES", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ },
1227 { "es_MX", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ },
1228 { "es_US", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ },
1229 { "et"/*EE*/, 44/*verEstonia*/, 27/*langEstonian*/, 29/*CentralEurRoman*/ },
1230 { "eu", -1, 129/*langBasque*/, 0/*Roman*/ }, // Latn;
1231 { "fa"/*IR*/, 48/*verIran*/, 31/*langFarsi/Persian*/, 0x8C/*Farsi*/ }, // Arab;
1232 { "fi"/*FI*/, 17/*verFinland*/, 13/*langFinnish*/, 0/*Roman*/ },
9f29f3f8 1233 { "fil", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn;
bd5b749c
A
1234 { "fo"/*FO*/, 47/*verFaroeIsl*/, 30/*langFaroese*/, 37/*Icelandic*/ },
1235 { "fr", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, // "fr" defaults to verFrance (per Chris Hansten)
1236 { "fr_001", 91/*verFrenchUniversal*/, 1/*langFrench*/, 0/*Roman*/ },
1237 { "fr_BE", 98/*verFrBelgium*/, 1/*langFrench*/, 0/*Roman*/ },
1238 { "fr_CA", 11/*verFrCanada*/, 1/*langFrench*/, 0/*Roman*/ },
1239 { "fr_CH", 18/*verFrSwiss*/, 1/*langFrench*/, 0/*Roman*/ },
1240 { "fr_FR", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ },
1241 { "ga"/*IE*/, 50/*verIreland*/, 35/*langIrishGaelic*/, 0/*Roman*/ }, // no dots (h after)
1242 { "ga_Latg"/*IE*/, 81/*verIrishGaelicScrip*/, 146/*langIrishGaelicScript*/, 40/*Gaelic*/ }, // using dots
1243 { "gd", 75/*verScottishGaelic*/, 144/*langScottishGaelic*/, 39/*Celtic*/ },
1244 { "gl", -1, 140/*langGalician*/, 0/*Roman*/ }, // Latn;
1245 { "gn", -1, 133/*langGuarani*/, 0/*Roman*/ }, // Latn;
1246 { "grc", 40/*verGreekAncient*/, 148/*langGreekAncient*/, 6/*Greek*/ }, // polytonic (MacGreek doesn't actually support it)
1247 { "gu"/*IN*/, 94/*verGujarati*/, 69/*langGujarati*/, 11/*Gujarati*/ }, // Gujr;
1248 { "gv", 76/*verManxGaelic*/, 145/*langManxGaelic*/, 39/*Celtic*/ }, // Latn;
1249 { "he"/*IL*/, 13/*verIsrael*/, 10/*langHebrew*/, 5/*Hebrew*/ }, // Hebr;
1250 { "hi"/*IN*/, 33/*verIndiaHindi*/, 21/*langHindi*/, 9/*Devanagari*/ }, // Deva;
1251 { "hr"/*HR*/, 68/*verCroatia*/, 18/*langCroatian*/, 36/*Croatian*/ },
1252 { "hu"/*HU*/, 43/*verHungary*/, 26/*langHungarian*/, 29/*CentralEurRoman*/ },
1253 { "hy"/*AM*/, 84/*verArmenian*/, 51/*langArmenian*/, 24/*Armenian*/ }, // Armn;
1254 { "id", -1, 81/*langIndonesian*/, 0/*Roman*/ }, // Latn;
1255 { "is"/*IS*/, 21/*verIceland*/, 15/*langIcelandic*/, 37/*Icelandic*/ },
1256 { "it", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, // "it" defaults to verItaly
1257 { "it_CH", 36/*verItalianSwiss*/, 3/*langItalian*/, 0/*Roman*/ },
1258 { "it_IT", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ },
1259 { "iu"/*CA*/, 78/*verNunavut*/, 143/*langInuktitut*/, 0xEC/*Inuit*/ }, // Cans;
1260 { "ja"/*JP*/, 14/*verJapan*/, 11/*langJapanese*/, 1/*Japanese*/ }, // Jpan;
1261 { "jv", -1, 138/*langJavaneseRom*/, 0/*Roman*/ }, // Latn;
1262 { "ka"/*GE*/, 85/*verGeorgian*/, 52/*langGeorgian*/, 23/*Georgian*/ }, // Geor;
1263 { "kk", -1, 48/*langKazakh*/, 7/*Cyrillic*/ }, // "kk" defaults to -Cyrl; also have -Latn, -Arab
1264 { "kl", 107/*verGreenland*/, 149/*langGreenlandic*/, 0/*Roman*/ }, // Latn;
1265 { "km", -1, 78/*langKhmer*/, 20/*Khmer*/ }, // Khmr;
1266 { "kn", -1, 73/*langKannada*/, 16/*Kannada*/ }, // Knda;
1267 { "ko"/*KR*/, 51/*verKorea*/, 23/*langKorean*/, 3/*Korean*/ }, // Hang;
1268 { "ks", -1, 61/*langKashmiri*/, 4/*Arabic*/ }, // Arab;
1269 { "ku", -1, 60/*langKurdish*/, 4/*Arabic*/ }, // Arab;
1270 { "ky", -1, 54/*langKirghiz*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1271 { "la", -1, 131/*langLatin*/, 0/*Roman*/ }, // Latn;
1272 { "lo", -1, 79/*langLao*/, 22/*Laotian*/ }, // Laoo;
1273 { "lt"/*LT*/, 41/*verLithuania*/, 24/*langLithuanian*/, 29/*CentralEurRoman*/ },
1274 { "lv"/*LV*/, 45/*verLatvia*/, 28/*langLatvian*/, 29/*CentralEurRoman*/ },
1275 { "mg", -1, 93/*langMalagasy*/, 0/*Roman*/ }, // Latn;
1276 { "mk"/*MK*/, 67/*verMacedonian*/, 43/*langMacedonian*/, 7/*Cyrillic*/ }, // Cyrl;
1277 { "ml", -1, 72/*langMalayalam*/, 17/*Malayalam*/ }, // Mlym;
9f29f3f8 1278 { "mn", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // "mn" defaults to -Cyrl
bd5b749c
A
1279 { "mn_Cyrl", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // Cyrl;
1280 { "mn_Mong", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // Mong;
1281 { "mo", -1, 53/*langMoldavian*/, 7/*Cyrillic*/ }, // Cyrl;
1282 { "mr"/*IN*/, 104/*verMarathi*/, 66/*langMarathi*/, 9/*Devanagari*/ }, // Deva;
1283 { "ms", -1, 83/*langMalayRoman*/, 0/*Roman*/ }, // "ms" defaults to -Latn;
1284 { "ms_Arab", -1, 84/*langMalayArabic*/, 4/*Arabic*/ }, // Arab;
1285 { "mt"/*MT*/, 22/*verMalta*/, 16/*langMaltese*/, 0/*Roman*/ }, // Latn;
1286 { "mul", 74/*verMultilingual*/, -1, 0 },
1287 { "my", -1, 77/*langBurmese*/, 19/*Burmese*/ }, // Mymr;
1288 { "nb"/*NO*/, 12/*verNorway*/, 9/*langNorwegian*/, 0/*Roman*/ },
1289 { "ne"/*NP*/, 106/*verNepal*/, 64/*langNepali*/, 9/*Devanagari*/ }, // Deva;
1290 { "nl", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, // "nl" defaults to verNetherlands
1291 { "nl_BE", 6/*verFlemish*/, 34/*langFlemish*/, 0/*Roman*/ },
1292 { "nl_NL", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ },
1293 { "nn"/*NO*/, 101/*verNynorsk*/, 151/*langNynorsk*/, 0/*Roman*/ },
1294 { "ny", -1, 92/*langNyanja/Chewa*/, 0/*Roman*/ }, // Latn;
1295 { "om", -1, 87/*langOromo*/, 28/*Ethiopic*/ }, // Ethi;
1296 { "or", -1, 71/*langOriya*/, 12/*Oriya*/ }, // Orya;
1297 { "pa", 95/*verPunjabi*/, 70/*langPunjabi*/, 10/*Gurmukhi*/ }, // Guru;
1298 { "pl"/*PL*/, 42/*verPoland*/, 25/*langPolish*/, 29/*CentralEurRoman*/ },
1299 { "ps", -1, 59/*langPashto*/, 0x8C/*Farsi*/ }, // Arab;
1300 { "pt", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, // "pt" defaults to verBrazil (per Chris Hansten)
1301 { "pt_BR", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ },
1302 { "pt_PT", 10/*verPortugal*/, 8/*langPortuguese*/, 0/*Roman*/ },
1303 { "qu", -1, 132/*langQuechua*/, 0/*Roman*/ }, // Latn;
1304 { "rn", -1, 91/*langRundi*/, 0/*Roman*/ }, // Latn;
1305 { "ro"/*RO*/, 39/*verRomania*/, 37/*langRomanian*/, 38/*Romanian*/ },
1306 { "ru"/*RU*/, 49/*verRussia*/, 32/*langRussian*/, 7/*Cyrillic*/ }, // Cyrl;
1307 { "rw", -1, 90/*langKinyarwanda*/, 0/*Roman*/ }, // Latn;
1308 { "sa", -1, 65/*langSanskrit*/, 9/*Devanagari*/ }, // Deva;
1309 { "sd", -1, 62/*langSindhi*/, 0x8C/*Farsi*/ }, // Arab;
1310 { "se", 46/*verSami*/, 29/*langSami*/, 0/*Roman*/ },
1311 { "si", -1, 76/*langSinhalese*/, 18/*Sinhalese*/ }, // Sinh;
1312 { "sk"/*SK*/, 57/*verSlovak*/, 39/*langSlovak*/, 29/*CentralEurRoman*/ },
1313 { "sl"/*SI*/, 66/*verSlovenian*/, 40/*langSlovenian*/, 36/*Croatian*/ },
1314 { "so", -1, 88/*langSomali*/, 0/*Roman*/ }, // Latn;
1315 { "sq", -1, 36/*langAlbanian*/, 0/*Roman*/ },
1316 { "sr"/*CS,RS*/, 65/*verSerbian*/, 42/*langSerbian*/, 7/*Cyrillic*/ }, // Cyrl;
1317 { "su", -1, 139/*langSundaneseRom*/, 0/*Roman*/ }, // Latn;
1318 { "sv"/*SE*/, 7/*verSweden*/, 5/*langSwedish*/, 0/*Roman*/ },
1319 { "sw", -1, 89/*langSwahili*/, 0/*Roman*/ }, // Latn;
1320 { "ta", -1, 74/*langTamil*/, 14/*Tamil*/ }, // Taml;
1321 { "te", -1, 75/*langTelugu*/, 15/*Telugu*/ }, // Telu
1322 { "tg", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // "tg" defaults to "Cyrl"
1323 { "tg_Cyrl", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1324 { "th"/*TH*/, 54/*verThailand*/, 22/*langThai*/, 21/*Thai*/ }, // Thai;
1325 { "ti", -1, 86/*langTigrinya*/, 28/*Ethiopic*/ }, // Ethi;
1326 { "tk", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // "tk" defaults to Cyrl
1327 { "tk_Cyrl", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1328 { "tl", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn;
1329 { "to"/*TO*/, 88/*verTonga*/, 147/*langTongan*/, 0/*Roman*/ }, // Latn;
1330 { "tr"/*TR*/, 24/*verTurkey*/, 17/*langTurkish*/, 35/*Turkish*/ }, // Latn;
1331 { "tt", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl;
1332 { "tt_Cyrl", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl;
1333 { "ug", -1, 136/*langUighur*/, 4/*Arabic*/ }, // Arab;
1334 { "uk"/*UA*/, 62/*verUkraine*/, 45/*langUkrainian*/, 7/*Cyrillic*/ }, // Cyrl;
1335 { "und", 55/*verScriptGeneric*/, -1, 0 },
1336 { "ur", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // "ur" defaults to verPakistanUrdu
1337 { "ur_IN", 96/*verIndiaUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab
1338 { "ur_PK", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab
1339 { "uz"/*UZ*/, 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab
1340 { "uz_Cyrl", 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ },
1341 { "vi"/*VN*/, 97/*verVietnam*/, 80/*langVietnamese*/, 30/*Vietnamese*/ }, // Latn
1342 { "yi", -1, 41/*langYiddish*/, 5/*Hebrew*/ }, // Hebr;
1343 { "zh", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, // "zh" defaults to verChina, langSimpChinese
1344 { "zh_CN", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
1345 { "zh_HK", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1346 { "zh_Hans", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
1347 { "zh_Hant", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1348 { "zh_MO", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1349 { "zh_SG", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ },
1350 { "zh_TW", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ },
1351};
1352enum {
1353 kNumLocaleToLegacyCodes = sizeof(localeToLegacyCodes)/sizeof(localeToLegacyCodes[0])
1354};
1355
d7384798
A
1356#endif
1357
bd5b749c
A
1358/*
1359 For reference here is a list of ICU locales with variants and how some
1360 of them are canonicalized with the ICU function uloc_canonicalize:
1361
1362 ICU 3.0 has:
1363 en_US_POSIX x no change
1364 hy_AM_REVISED x no change
1365 ja_JP_TRADITIONAL -> ja_JP@calendar=japanese
1366 th_TH_TRADITIONAL -> th_TH@calendar=buddhist
1367
1368 ICU 2.8 also had the following (now obsolete):
1369 ca_ES_PREEURO
1370 de__PHONEBOOK -> de@collation=phonebook
1371 de_AT_PREEURO
1372 de_DE_PREEURO
1373 de_LU_PREEURO
1374 el_GR_PREEURO
1375 en_BE_PREEURO
1376 en_GB_EURO -> en_GB@currency=EUR
1377 en_IE_PREEURO -> en_IE@currency=IEP
1378 es__TRADITIONAL -> es@collation=traditional
1379 es_ES_PREEURO
1380 eu_ES_PREEURO
1381 fi_FI_PREEURO
1382 fr_BE_PREEURO
1383 fr_FR_PREEURO -> fr_FR@currency=FRF
1384 fr_LU_PREEURO
1385 ga_IE_PREEURO
1386 gl_ES_PREEURO
1387 hi__DIRECT -> hi@collation=direct
1388 it_IT_PREEURO
1389 nl_BE_PREEURO
1390 nl_NL_PREEURO
1391 pt_PT_PREEURO
1392 zh__PINYIN -> zh@collation=pinyin
1393 zh_TW_STROKE -> zh_TW@collation=stroke
1394
1395*/
1396
1397// _CompareTestEntryToTableEntryKey
1398// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1399// comparison function for bsearch
1400static int _CompareTestEntryToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
1401 return strcmp( ((const KeyStringToResultString *)testEntryPtr)->key, ((const KeyStringToResultString *)tableEntryKeyPtr)->key );
1402}
1403
1404// _CompareTestEntryPrefixToTableEntryKey
1405// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1406// Comparison function for bsearch. Assumes prefix IS terminated with '-' or '_'.
1407// Do the following instead of strlen & strncmp so we don't walk tableEntry key twice.
1408static int _CompareTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
1409 const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key;
1410 const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key;
1411
1412 while ( *testPtr == *tablePtr && *tablePtr != 0 ) {
1413 testPtr++; tablePtr++;
1414 }
1415 if ( *tablePtr != 0 ) {
1416 // strings are different, and the string in the table has not run out;
1417 // i.e. the table entry is not a prefix of the text string.
1418 return ( *testPtr < *tablePtr )? -1: 1;
1419 }
1420 return 0;
1421}
1422
1423// _CompareLowerTestEntryPrefixToTableEntryKey
1424// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1425// Comparison function for bsearch. Assumes prefix NOT terminated with '-' or '_'.
1426// Lowercases the test string before comparison (the table should already have lowercased entries).
1427static int _CompareLowerTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
1428 const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key;
1429 const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key;
1430 char lowerTestChar;
1431
1432 while ( (lowerTestChar = tolower(*testPtr)) == *tablePtr && *tablePtr != 0 && lowerTestChar != '_' ) { // <1.9>
1433 testPtr++; tablePtr++;
1434 }
1435 if ( *tablePtr != 0 ) {
1436 // strings are different, and the string in the table has not run out;
1437 // i.e. the table entry is not a prefix of the text string.
1438 if (lowerTestChar == '_') // <1.9>
1439 return -1; // <1.9>
1440 return ( lowerTestChar < *tablePtr )? -1: 1;
1441 }
1442 // The string in the table has run out. If the test string char is not alnum,
1443 // then the string matches, else the test string sorts after.
1444 return ( !isalnum(lowerTestChar) )? 0: 1;
1445}
1446
1447// _DeleteCharsAtPointer
1448// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1449// remove _length_ characters from the beginning of the string indicated by _stringPtr_
1450// (we know that the string has at least _length_ characters in it)
1451static void _DeleteCharsAtPointer(char *stringPtr, int length) {
1452 do {
1453 *stringPtr = stringPtr[length];
1454 } while (*stringPtr++ != 0);
1455}
1456
1457// _CopyReplacementAtPointer
1458// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1459// Copy replacement string (*excluding* terminating NULL byte) to the place indicated by stringPtr
1460static void _CopyReplacementAtPointer(char *stringPtr, const char *replacementPtr) {
1461 while (*replacementPtr != 0) {
1462 *stringPtr++ = *replacementPtr++;
1463 }
1464}
1465
1466// _CheckForTag
1467// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1468static Boolean _CheckForTag(const char *localeStringPtr, const char *tagPtr, int tagLen) {
1469 return ( strncmp(localeStringPtr, tagPtr, tagLen) == 0 && !isalnum(localeStringPtr[tagLen]) );
1470}
1471
1472// _ReplacePrefix
1473// Move this code from _UpdateFullLocaleString into separate function // <1.10>
1474static void _ReplacePrefix(char locString[], int locStringMaxLen, int oldPrefixLen, const char *newPrefix) {
1475 int newPrefixLen = strlen(newPrefix);
1476 int lengthDelta = newPrefixLen - oldPrefixLen;
1477
1478 if (lengthDelta < 0) {
1479 // replacement is shorter, delete chars by shifting tail of string
1480 _DeleteCharsAtPointer(locString + newPrefixLen, -lengthDelta);
1481 } else if (lengthDelta > 0) {
1482 // replacement is longer...
1483 int stringLen = strlen(locString);
1484
1485 if (stringLen + lengthDelta < locStringMaxLen) {
1486 // make room by shifting tail of string
1487 char * tailShiftPtr = locString + stringLen;
1488 char * tailStartPtr = locString + oldPrefixLen; // pointer to tail of string to shift
1489
1490 while (tailShiftPtr >= tailStartPtr) {
1491 tailShiftPtr[lengthDelta] = *tailShiftPtr;
1492 tailShiftPtr--;
1493 }
1494 } else {
1495 // no room, can't do substitution
1496 newPrefix = NULL;
1497 }
1498 }
1499
1500 if (newPrefix) {
1501 // do the substitution
1502 _CopyReplacementAtPointer(locString, newPrefix);
1503 }
1504}
1505
1506// _UpdateFullLocaleString
1507// Given a locale string that uses standard codes (not a special old-style Apple string),
1508// update all the language codes and region codes to latest versions, map 3-letter
1509// language codes to 2-letter codes if possible, and normalize casing. If requested, return
1510// pointers to a language-region variant subtag (if present) and a region tag (if present).
1511// (add locStringMaxLen parameter) // <1.10>
1512static void _UpdateFullLocaleString(char inLocaleString[], int locStringMaxLen,
1513 char **langRegSubtagRef, char **regionTagRef,
1514 char varKeyValueString[]) // <1.17>
1515{
1516 KeyStringToResultString testEntry;
1517 KeyStringToResultString * foundEntry;
1518 const SpecialCaseUpdates * specialCasePtr;
1519 char * inLocalePtr;
1520 char * subtagPtr;
1521 char * langRegSubtag = NULL;
1522 char * regionTag = NULL;
1523 char * variantTag = NULL;
1524 Boolean subtagHasDigits, pastPrimarySubtag, hadRegion;
1525
1526 // 1. First replace any non-canonical prefix (case insensitive) with canonical
1527 // (change 3-letter ISO 639 code to 2-letter, update obsolete ISO 639 codes & RFC 3066 tags, etc.)
1528
1529 testEntry.key = inLocaleString;
1530 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToCanonical, kNumLocaleStringPrefixToCanonical,
1531 sizeof(KeyStringToResultString), _CompareLowerTestEntryPrefixToTableEntryKey );
1532 if (foundEntry) {
1533 // replace key (at beginning of string) with result
1534 _ReplacePrefix(inLocaleString, locStringMaxLen, strlen(foundEntry->key), foundEntry->result); // <1.10>
1535 }
1536
1537 // 2. Walk through input string, normalizing case & marking use of ISO 3166 codes
1538
1539 inLocalePtr = inLocaleString;
1540 subtagPtr = inLocaleString;
1541 subtagHasDigits = false;
1542 pastPrimarySubtag = false;
1543 hadRegion = false;
1544
1545 while ( true ) {
1546 if ( isalpha(*inLocalePtr) ) {
1547 // if not past a region tag, then lowercase, else uppercase
1548 *inLocalePtr = (!hadRegion)? tolower(*inLocalePtr): toupper(*inLocalePtr);
1549 } else if ( isdigit(*inLocalePtr) ) {
1550 subtagHasDigits = true;
1551 } else {
1552
1553 if (!pastPrimarySubtag) {
1554 // may have a NULL primary subtag
1555 if (subtagHasDigits) {
1556 break;
1557 }
1558 pastPrimarySubtag = true;
1559 } else if (!hadRegion) {
1560 // We are after any primary language subtag, but not past any region tag.
1561 // This subtag is preceded by '-' or '_'.
1562 int subtagLength = inLocalePtr - subtagPtr; // includes leading '-' or '_'
1563
1564 if (subtagLength == 3 && !subtagHasDigits) {
1565 // potential ISO 3166 code for region or language variant; if so, needs uppercasing
1566 if (*subtagPtr == '_') {
1567 regionTag = subtagPtr;
1568 hadRegion = true;
1569 subtagPtr[1] = toupper(subtagPtr[1]);
1570 subtagPtr[2] = toupper(subtagPtr[2]);
1571 } else if (langRegSubtag == NULL) {
1572 langRegSubtag = subtagPtr;
1573 subtagPtr[1] = toupper(subtagPtr[1]);
1574 subtagPtr[2] = toupper(subtagPtr[2]);
1575 }
1576 } else if (subtagLength == 4 && subtagHasDigits) {
1577 // potential UN M.49 region code
1578 if (*subtagPtr == '_') {
1579 regionTag = subtagPtr;
1580 hadRegion = true;
1581 } else if (langRegSubtag == NULL) {
1582 langRegSubtag = subtagPtr;
1583 }
1584 } else if (subtagLength == 5 && !subtagHasDigits) {
1585 // ISO 15924 script code, uppercase just the first letter
1586 subtagPtr[1] = toupper(subtagPtr[1]);
1587 } else if (subtagLength == 1 && *subtagPtr == '_') { // <1.17>
1588 hadRegion = true;
1589 }
1590
1591 if (!hadRegion) {
1592 // convert improper '_' to '-'
1593 *subtagPtr = '-';
1594 }
1595 } else {
1596 variantTag = subtagPtr; // <1.17>
1597 }
1598
1599 if (*inLocalePtr == '-' || *inLocalePtr == '_') {
1600 subtagPtr = inLocalePtr;
1601 subtagHasDigits = false;
1602 } else {
1603 break;
1604 }
1605 }
1606
1607 inLocalePtr++;
1608 }
1609
1610 // 3 If there is a variant tag, see if ICU canonicalizes it to keywords. // <1.17> [3577669]
1611 // If so, copy the keywords to varKeyValueString and delete the variant tag
1612 // from the original string (but don't otherwise use the ICU canonicalization).
1613 varKeyValueString[0] = 0;
856091c5 1614#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
bd5b749c
A
1615 if (variantTag) {
1616 UErrorCode icuStatus;
1617 int icuCanonStringLen;
1618 char * varKeyValueStringPtr = varKeyValueString;
1619
1620 icuStatus = U_ZERO_ERROR;
1621 icuCanonStringLen = uloc_canonicalize( inLocaleString, varKeyValueString, locStringMaxLen, &icuStatus );
1622 if ( U_SUCCESS(icuStatus) ) {
1623 char * icuCanonStringPtr = varKeyValueString;
1624
1625 if (icuCanonStringLen >= locStringMaxLen)
1626 icuCanonStringLen = locStringMaxLen - 1;
1627 varKeyValueString[icuCanonStringLen] = 0;
1628 while (*icuCanonStringPtr != 0 && *icuCanonStringPtr != ULOC_KEYWORD_SEPARATOR)
1629 ++icuCanonStringPtr;
1630 if (*icuCanonStringPtr != 0) {
1631 // the canonicalized string has keywords
1632 // delete the variant tag in the original string (and other trailing '_' or '-')
1633 *variantTag-- = 0;
1634 while (*variantTag == '_')
1635 *variantTag-- = 0;
1636 // delete all of the canonicalized string except the keywords
1637 while (*icuCanonStringPtr != 0)
1638 *varKeyValueStringPtr++ = *icuCanonStringPtr++;
1639 }
1640 *varKeyValueStringPtr = 0;
1641 }
1642 }
856091c5 1643#endif
bd5b749c
A
1644
1645 // 4. Handle special cases of updating region codes, or updating language codes based on
1646 // region code.
1647 for (specialCasePtr = specialCases; specialCasePtr->reg1 != NULL; specialCasePtr++) {
1648 if ( specialCasePtr->lang == NULL || _CheckForTag(inLocaleString, specialCasePtr->lang, 2) ) {
1649 // OK, we matched any language specified. Now what needs updating?
1650 char * foundTag;
1651
1652 if ( isupper(specialCasePtr->update1[0]) ) {
1653 // updating a region code
1654 if ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) {
1655 _CopyReplacementAtPointer(foundTag+1, specialCasePtr->update1);
1656 }
1657 if ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) {
1658 _CopyReplacementAtPointer(regionTag+1, specialCasePtr->update1);
1659 }
1660
1661 } else {
1662 // updating the language, there will be two choices based on region
1663 if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) ||
1664 ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) ) {
1665 _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update1);
1666 } else if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg2 + 1, 2) ) ||
1667 ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg2) ) && !isalnum(foundTag[3]) ) ) {
1668 _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update2);
1669 }
1670 }
1671 }
1672 }
1673
1674 // 5. return pointers if requested.
1675 if (langRegSubtagRef != NULL) {
1676 *langRegSubtagRef = langRegSubtag;
1677 }
1678 if (regionTagRef != NULL) {
1679 *regionTagRef = regionTag;
1680 }
1681}
1682
1683
1684// _RemoveSubstringsIfPresent
1685// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1686// substringList is a list of space-separated substrings to strip if found in localeString
1687static void _RemoveSubstringsIfPresent(char *localeString, const char *substringList) {
1688 while (*substringList != 0) {
1689 char currentSubstring[kLocaleIdentifierCStringMax];
1690 int substringLength = 0;
1691 char * foundSubstring;
1692
1693 // copy current substring & get its length
1694 while ( isgraph(*substringList) ) {
1695 currentSubstring[substringLength++] = *substringList++;
1696 }
1697 // move to next substring
1698 while ( isspace(*substringList) ) {
1699 substringList++;
1700 }
1701
1702 // search for current substring in locale string
1703 if (substringLength == 0)
1704 continue;
1705 currentSubstring[substringLength] = 0;
1706 foundSubstring = strstr(localeString, currentSubstring);
1707
1708 // if substring is found, delete it
1709 if (foundSubstring) {
1710 _DeleteCharsAtPointer(foundSubstring, substringLength);
1711 }
1712 }
1713}
1714
1715
1716// _GetKeyValueString // <1.10>
1717// Removes any key-value string from inLocaleString, puts canonized version in keyValueString
1718
1719static void _GetKeyValueString(char inLocaleString[], char keyValueString[]) {
1720 char * inLocalePtr = inLocaleString;
1721
1722 while (*inLocalePtr != 0 && *inLocalePtr != ULOC_KEYWORD_SEPARATOR) {
1723 inLocalePtr++;
1724 }
1725 if (*inLocalePtr != 0) { // we found a key-value section
1726 char * keyValuePtr = keyValueString;
1727
1728 *keyValuePtr = *inLocalePtr;
1729 *inLocalePtr = 0;
1730 do {
1731 if ( *(++inLocalePtr) != ' ' ) {
1732 *(++keyValuePtr) = *inLocalePtr; // remove "tolower() for *inLocalePtr" // <1.11>
1733 }
1734 } while (*inLocalePtr != 0);
1735 } else {
1736 keyValueString[0] = 0;
1737 }
1738}
1739
1740static void _AppendKeyValueString(char inLocaleString[], int locStringMaxLen, char keyValueString[]) {
856091c5 1741#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
bd5b749c
A
1742 if (keyValueString[0] != 0) {
1743 UErrorCode uerr = U_ZERO_ERROR;
1744 UEnumeration * uenum = uloc_openKeywords(keyValueString, &uerr);
1745 if ( uenum != NULL ) {
1746 const char * keyword;
1747 int32_t length;
1748 char value[ULOC_KEYWORDS_CAPACITY]; // use as max for keyword value
1749 while ( U_SUCCESS(uerr) ) {
1750 keyword = uenum_next(uenum, &length, &uerr);
1751 if ( keyword == NULL ) {
1752 break;
1753 }
1754 length = uloc_getKeywordValue( keyValueString, keyword, value, sizeof(value), &uerr );
1755 length = uloc_setKeywordValue( keyword, value, inLocaleString, locStringMaxLen, &uerr );
1756 }
1757 uenum_close(uenum);
1758 }
1759 }
856091c5 1760#endif
bd5b749c
A
1761}
1762
cf7d2af9 1763// __private_extern__ CFStringRef _CFLocaleCreateCanonicalLanguageIdentifierForCFBundle(CFAllocatorRef allocator, CFStringRef localeIdentifier) {}
bd5b749c
A
1764
1765CFStringRef CFLocaleCreateCanonicalLanguageIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) {
1766 char inLocaleString[kLocaleIdentifierCStringMax];
1767 CFStringRef outStringRef = NULL;
1768
1769 if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) {
1770 KeyStringToResultString testEntry;
1771 KeyStringToResultString * foundEntry;
1772 char keyValueString[sizeof(inLocaleString)]; // <1.10>
1773 char varKeyValueString[sizeof(inLocaleString)]; // <1.17>
1774
1775 _GetKeyValueString(inLocaleString, keyValueString); // <1.10>
1776 testEntry.result = NULL;
1777
9f29f3f8
A
1778 // A. Special case aa_SAAHO, no_BOKMAL, and no_NYNORSK since they are legacy identifiers that don't follow the normal rules (http://unicode.org/cldr/trac/browser/trunk/common/supplemental/supplementalMetadata.xml)
1779
bd5b749c 1780 testEntry.key = inLocaleString;
9f29f3f8
A
1781 KeyStringToResultString specialCase = testEntry;
1782 foundEntry = &specialCase;
1783
1784 if (strncmp("aa_SAAHO", testEntry.key, strlen("aa_SAAHO")) == 0) {
1785 foundEntry->result = "ssy";
1786 } else if (strncmp("no_BOKMAL", testEntry.key, strlen("no_BOKMAL")) == 0) {
1787 foundEntry->result = "nb";
1788 } else if (strncmp("no_NYNORSK", testEntry.key, strlen("no_NYNORSK")) == 0) {
1789 foundEntry->result = "nn";
1790 } else {
1791 // B. First check if input string matches an old-style string that has a replacement
1792 // (do this before case normalization)
1793 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical,
1794 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1795 }
bd5b749c
A
1796 if (foundEntry) {
1797 // It does match, so replace old string with new
1798 strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
1799 varKeyValueString[0] = 0;
1800 } else {
1801 char * langRegSubtag = NULL;
1802 char * regionTag = NULL;
1803
9f29f3f8 1804 // C. No match with an old-style string, use input string but update codes, normalize case, etc.
bd5b749c
A
1805 _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, &regionTag, varKeyValueString); // <1.10><1.17><1.19>
1806
1807 // if the language part already includes a regional variant, then delete any region tag. <1.19>
1808 if (langRegSubtag && regionTag)
1809 *regionTag = 0;
1810 }
1811
9f29f3f8 1812 // D. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string
bd5b749c
A
1813
1814 // 1. Strip defaults in input string based on initial part of locale string
1815 // (mainly to strip default script tag for a language)
1816 testEntry.key = inLocaleString;
1817 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults,
1818 sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey );
1819 if (foundEntry) {
1820 // The input string begins with a character sequence for which
1821 // there are default substrings which should be stripped if present
1822 _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
1823 }
1824
1825 // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string
1826 testEntry.key = inLocaleString;
1827 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageString, kNumAppleLocaleToLanguageString,
1828 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1829 if (foundEntry) {
1830 // it does match
1831 strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
1832 } else {
1833 // skip to any region tag or java-type variant
1834 char * inLocalePtr = inLocaleString;
1835 while (*inLocalePtr != 0 && *inLocalePtr != '_') {
1836 inLocalePtr++;
1837 }
1838 // if there is still a region tag, turn it into a language variant <1.19>
1839 if (*inLocalePtr == '_') {
1840 // handle 3-digit regions in addition to 2-letter ones
1841 char * regionTag = inLocalePtr++;
1842 long expectedLength = 0;
1843 if ( isalpha(*inLocalePtr) ) {
1844 while ( isalpha(*(++inLocalePtr)) )
1845 ;
1846 expectedLength = 3;
1847 } else if ( isdigit(*inLocalePtr) ) {
1848 while ( isdigit(*(++inLocalePtr)) )
1849 ;
1850 expectedLength = 4;
1851 }
1852 *regionTag = (inLocalePtr - regionTag == expectedLength)? '-': 0;
1853 }
1854 // anything else at/after '_' just gets deleted
1855 *inLocalePtr = 0;
1856 }
1857
9f29f3f8 1858 // E. Re-append any key-value strings, now canonical // <1.10><1.17>
bd5b749c
A
1859 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString );
1860 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString );
1861
1862 // All done, return what we came up with.
1863 outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII);
1864 }
1865
1866 return outStringRef;
1867}
1868
1869
1870CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) {
1871 char inLocaleString[kLocaleIdentifierCStringMax];
1872 CFStringRef outStringRef = NULL;
1873
1874 if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) {
1875 KeyStringToResultString testEntry;
1876 KeyStringToResultString * foundEntry;
1877 char keyValueString[sizeof(inLocaleString)]; // <1.10>
1878 char varKeyValueString[sizeof(inLocaleString)]; // <1.17>
1879
1880 _GetKeyValueString(inLocaleString, keyValueString); // <1.10>
1881 testEntry.result = NULL;
1882
1883 // A. First check if input string matches an old-style Apple string that has a replacement
1884 // (do this before case normalization)
1885 testEntry.key = inLocaleString;
1886 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical,
1887 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1888 if (foundEntry) {
1889 // It does match, so replace old string with new // <1.10>
1890 strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
1891 varKeyValueString[0] = 0;
1892 } else {
1893 char * langRegSubtag = NULL;
1894 char * regionTag = NULL;
1895
1896 // B. No match with an old-style string, use input string but update codes, normalize case, etc.
1897 _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, &regionTag, varKeyValueString); // <1.10><1.17>
1898
1899
1900 // C. Now strip defaults that are implied by other fields.
1901
1902 // 1. If an ISO 3166 region tag matches an ISO 3166 regional language variant subtag, strip the latter.
1903 if ( langRegSubtag && regionTag && strncmp(langRegSubtag+1, regionTag+1, 2) == 0 ) {
1904 _DeleteCharsAtPointer(langRegSubtag, 3);
1905 }
1906
1907 // 2. Strip defaults in input string based on final region tag in locale string
1908 // (mainly for Chinese, to strip -Hans for _CN/_SG, -Hant for _TW/_HK/_MO)
1909 if ( regionTag ) {
1910 testEntry.key = regionTag;
1911 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringRegionToDefaults, kNumLocaleStringRegionToDefaults,
1912 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1913 if (foundEntry) {
1914 _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
1915 }
1916 }
1917
1918 // 3. Strip defaults in input string based on initial part of locale string
1919 // (mainly to strip default script tag for a language)
1920 testEntry.key = inLocaleString;
1921 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults,
1922 sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey );
1923 if (foundEntry) {
1924 // The input string begins with a character sequence for which
1925 // there are default substrings which should be stripped if present
1926 _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
1927 }
1928 }
1929
1930 // D. Re-append any key-value strings, now canonical // <1.10><1.17>
1931 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString );
1932 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString );
1933
1934 // Now create the CFString (even if empty!)
1935 outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII);
1936 }
1937
1938 return outStringRef;
1939}
1940
1941// CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, based on
1942// the first part of the SPI CFBundleCopyLocalizationForLocalizationInfo in CFBundle_Resources.c
1943CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes(CFAllocatorRef allocator, LangCode lcode, RegionCode rcode) {
1944 CFStringRef result = NULL;
1945 if (0 <= rcode && rcode < kNumRegionCodeToLocaleString) {
1946 const char *localeString = regionCodeToLocaleString[rcode];
1947 if (localeString != NULL && *localeString != '\0') {
1948 result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull);
1949 }
1950 }
1951 if (result) return result;
1952 if (0 <= lcode && lcode < kNumLangCodeToLocaleString) {
1953 const char *localeString = langCodeToLocaleString[lcode];
1954 if (localeString != NULL && *localeString != '\0') {
1955 result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull);
1956 }
1957 }
1958 return result;
1959}
1960
1961
cf7d2af9
A
1962/*
1963SPI: CFLocaleGetLanguageRegionEncodingForLocaleIdentifier gets the appropriate language and region codes,
1964 and the default legacy script code and encoding, for the specified locale (or language) string.
1965 Returns false if CFLocale has no information about the given locale (in which case none of the by-reference return values are set);
1966 otherwise may set *langCode and/or *regCode to -1 if there is no appropriate legacy value for the locale.
1967 This is a replacement for the CFBundle SPI CFBundleGetLocalizationInfoForLocalization (which was intended to be temporary and transitional);
1968 this function is more up-to-date in its handling of locale strings, and is in CFLocale where this functionality should belong. Compared
1969 to CFBundleGetLocalizationInfoForLocalization, this function does not spcially interpret a NULL localeIdentifier to mean use the single most
1970 preferred localization in the current context (this function returns NO for a NULL localeIdentifier); and in this function
1971 langCode, regCode, and scriptCode are all SInt16* (not SInt32* like the equivalent parameters in CFBundleGetLocalizationInfoForLocalization).
1972*/
856091c5 1973#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
cf7d2af9 1974static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 );
856091c5 1975#endif
cf7d2af9
A
1976
1977Boolean CFLocaleGetLanguageRegionEncodingForLocaleIdentifier(CFStringRef localeIdentifier, LangCode *langCode, RegionCode *regCode, ScriptCode *scriptCode, CFStringEncoding *stringEncoding) {
856091c5 1978#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
cf7d2af9
A
1979 Boolean returnValue = false;
1980 CFStringRef canonicalIdentifier = CFLocaleCreateCanonicalLocaleIdentifierFromString(NULL, localeIdentifier);
1981 if (canonicalIdentifier) {
1982 char localeCString[kLocaleIdentifierCStringMax];
1983 if ( CFStringGetCString(canonicalIdentifier, localeCString, sizeof(localeCString), kCFStringEncodingASCII) ) {
1984 UErrorCode icuStatus = U_ZERO_ERROR;
1985 int32_t languagelength;
1986 char searchString[ULOC_LANG_CAPACITY + ULOC_FULLNAME_CAPACITY];
1987
1988 languagelength = uloc_getLanguage( localeCString, searchString, ULOC_LANG_CAPACITY, &icuStatus );
1989 if ( U_SUCCESS(icuStatus) && languagelength > 0 ) {
1990 // OK, here we have at least a language code, check for other components in order
1991 LocaleToLegacyCodes searchEntry = { (const char *)searchString, 0, 0, 0 };
1992 const LocaleToLegacyCodes * foundEntryPtr;
1993 int32_t componentLength;
1994 char componentString[ULOC_FULLNAME_CAPACITY];
1995
1996 languagelength = strlen(searchString); // in case it got truncated
1997 icuStatus = U_ZERO_ERROR;
1998 componentLength = uloc_getScript( localeCString, componentString, sizeof(componentString), &icuStatus );
1999 if ( U_FAILURE(icuStatus) || componentLength == 0 ) {
2000 icuStatus = U_ZERO_ERROR;
2001 componentLength = uloc_getCountry( localeCString, componentString, sizeof(componentString), &icuStatus );
2002 if ( U_FAILURE(icuStatus) || componentLength == 0 ) {
2003 icuStatus = U_ZERO_ERROR;
2004 componentLength = uloc_getVariant( localeCString, componentString, sizeof(componentString), &icuStatus );
2005 if ( U_FAILURE(icuStatus) ) {
2006 componentLength = 0;
2007 }
2008 }
2009 }
2010
2011 // Append whichever other component we first found
2012 if (componentLength > 0) {
2013 strlcat(searchString, "_", sizeof(searchString));
2014 strlcat(searchString, componentString, sizeof(searchString));
2015 }
2016
2017 // Search
2018 foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries );
2019 if (foundEntryPtr == NULL && (int32_t) strlen(searchString) > languagelength) {
2020 // truncate to language al;one and try again
2021 searchString[languagelength] = 0;
2022 foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries );
2023 }
2024
2025 // If found a matching entry, return requested values
2026 if (foundEntryPtr) {
2027 returnValue = true;
2028 if (langCode) *langCode = foundEntryPtr->langCode;
2029 if (regCode) *regCode = foundEntryPtr->regCode;
2030 if (stringEncoding) *stringEncoding = foundEntryPtr->encoding;
2031 if (scriptCode) {
2032 // map CFStringEncoding to ScriptCode
2033 if (foundEntryPtr->encoding < 33/*kCFStringEncodingMacSymbol*/) {
2034 *scriptCode = foundEntryPtr->encoding;
2035 } else {
2036 switch (foundEntryPtr->encoding) {
2037 case 0x8C/*kCFStringEncodingMacFarsi*/: *scriptCode = 4/*smArabic*/; break;
2038 case 0x98/*kCFStringEncodingMacUkrainian*/: *scriptCode = 7/*smCyrillic*/; break;
2039 case 0xEC/*kCFStringEncodingMacInuit*/: *scriptCode = 28/*smEthiopic*/; break;
2040 case 0xFC/*kCFStringEncodingMacVT100*/: *scriptCode = 32/*smUninterp*/; break;
2041 default: *scriptCode = 0/*smRoman*/; break;
2042 }
2043 }
2044 }
2045 }
2046 }
2047 }
2048 CFRelease(canonicalIdentifier);
2049 }
2050 return returnValue;
856091c5
A
2051#else
2052 return false;
2053#endif
cf7d2af9
A
2054}
2055
856091c5 2056#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
cf7d2af9
A
2057static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 ) {
2058 const char * localeString1 = ((const LocaleToLegacyCodes *)entry1)->locale;
2059 const char * localeString2 = ((const LocaleToLegacyCodes *)entry2)->locale;
2060 return strcmp(localeString1, localeString2);
2061}
856091c5 2062#endif
cf7d2af9 2063
bd5b749c 2064CFDictionaryRef CFLocaleCreateComponentsFromLocaleIdentifier(CFAllocatorRef allocator, CFStringRef localeID) {
856091c5
A
2065 CFMutableDictionaryRef working = CFDictionaryCreateMutable(allocator, 10, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
2066#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
bd5b749c
A
2067 char cLocaleID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY];
2068 char buffer[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY];
bd5b749c
A
2069
2070 UErrorCode icuStatus = U_ZERO_ERROR;
2071 int32_t length = 0;
856091c5
A
2072
2073 if (!localeID) goto out;
2074
bd5b749c
A
2075 // Extract the C string locale ID, for ICU
2076 CFIndex outBytes = 0;
2077 CFStringGetBytes(localeID, CFRangeMake(0, CFStringGetLength(localeID)), kCFStringEncodingASCII, (UInt8) '?', true, (unsigned char *)cLocaleID, sizeof(cLocaleID)/sizeof(char) - 1, &outBytes);
2078 cLocaleID[outBytes] = '\0';
2079
2080 // Get the components
2081 length = uloc_getLanguage(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
2082 if (U_SUCCESS(icuStatus) && length > 0)
2083 {
2084 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
cf7d2af9 2085 CFDictionaryAddValue(working, kCFLocaleLanguageCodeKey, string);
bd5b749c
A
2086 CFRelease(string);
2087 }
2088 icuStatus = U_ZERO_ERROR;
2089
2090 length = uloc_getScript(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
2091 if (U_SUCCESS(icuStatus) && length > 0)
2092 {
2093 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
cf7d2af9 2094 CFDictionaryAddValue(working, kCFLocaleScriptCodeKey, string);
bd5b749c
A
2095 CFRelease(string);
2096 }
2097 icuStatus = U_ZERO_ERROR;
2098
2099 length = uloc_getCountry(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
2100 if (U_SUCCESS(icuStatus) && length > 0)
2101 {
2102 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
cf7d2af9 2103 CFDictionaryAddValue(working, kCFLocaleCountryCodeKey, string);
bd5b749c
A
2104 CFRelease(string);
2105 }
2106 icuStatus = U_ZERO_ERROR;
2107
2108 length = uloc_getVariant(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
2109 if (U_SUCCESS(icuStatus) && length > 0)
2110 {
2111 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
cf7d2af9 2112 CFDictionaryAddValue(working, kCFLocaleVariantCodeKey, string);
bd5b749c
A
2113 CFRelease(string);
2114 }
2115 icuStatus = U_ZERO_ERROR;
2116
2117 // Now get the keywords; open an enumerator on them
2118 UEnumeration *iter = uloc_openKeywords(cLocaleID, &icuStatus);
2119 const char *locKey = NULL;
2120 int32_t locKeyLen = 0;
2121 while ((locKey = uenum_next(iter, &locKeyLen, &icuStatus)) && U_SUCCESS(icuStatus))
2122 {
2123 char locValue[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2124
2125 // Get the value for this keyword
2126 if (uloc_getKeywordValue(cLocaleID, locKey, locValue, sizeof(locValue)/sizeof(char), &icuStatus) > 0
2127 && U_SUCCESS(icuStatus))
2128 {
2129 CFStringRef key = CFStringCreateWithBytes(allocator, (UInt8 *)locKey, strlen(locKey), kCFStringEncodingASCII, true);
2130 CFStringRef value = CFStringCreateWithBytes(allocator, (UInt8 *)locValue, strlen(locValue), kCFStringEncodingASCII, true);
2131 if (key && value)
2132 CFDictionaryAddValue(working, key, value);
2133 if (key)
2134 CFRelease(key);
2135 if (value)
2136 CFRelease(value);
2137 }
2138 }
2139 uenum_close(iter);
2140
856091c5
A
2141 out:;
2142#endif
bd5b749c
A
2143 // Convert to an immutable dictionary and return
2144 CFDictionaryRef result = CFDictionaryCreateCopy(allocator, working);
2145 CFRelease(working);
2146 return result;
2147}
2148
cf7d2af9
A
2149static char *__CStringFromString(CFStringRef str) {
2150 if (!str) return NULL;
2151 CFRange rg = CFRangeMake(0, CFStringGetLength(str));
2152 CFIndex neededLength = 0;
2153 CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, NULL, 0, &neededLength);
2154 char *buf = (char *)malloc(neededLength + 1);
2155 CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, (uint8_t *)buf, neededLength, &neededLength);
2156 buf[neededLength] = '\0';
2157 return buf;
bd5b749c
A
2158}
2159
2160CFStringRef CFLocaleCreateLocaleIdentifierFromComponents(CFAllocatorRef allocator, CFDictionaryRef dictionary) {
856091c5
A
2161 if (!dictionary) return NULL;
2162
cf7d2af9
A
2163 CFIndex cnt = CFDictionaryGetCount(dictionary);
2164 STACK_BUFFER_DECL(CFStringRef, values, cnt);
2165 STACK_BUFFER_DECL(CFStringRef, keys, cnt);
2166 CFDictionaryGetKeysAndValues(dictionary, (const void **)keys, (const void **)values);
2167
2168 char *language = NULL, *script = NULL, *country = NULL, *variant = NULL;
2169 for (CFIndex idx = 0; idx < cnt; idx++) {
2170 if (CFEqual(kCFLocaleLanguageCodeKey, keys[idx])) {
2171 language = __CStringFromString(values[idx]);
2172 keys[idx] = NULL;
2173 } else if (CFEqual(kCFLocaleScriptCodeKey, keys[idx])) {
2174 script = __CStringFromString(values[idx]);
2175 keys[idx] = NULL;
2176 } else if (CFEqual(kCFLocaleCountryCodeKey, keys[idx])) {
2177 country = __CStringFromString(values[idx]);
2178 keys[idx] = NULL;
2179 } else if (CFEqual(kCFLocaleVariantCodeKey, keys[idx])) {
2180 variant = __CStringFromString(values[idx]);
2181 keys[idx] = NULL;
2182 }
bd5b749c
A
2183 }
2184
cf7d2af9
A
2185 char *buf1 = NULL; // (|L)(|_S)(|_C|_C_V|__V)
2186 asprintf(&buf1, "%s%s%s%s%s%s%s", language ? language : "", script ? "_" : "", script ? script : "", (country || variant ? "_" : ""), country ? country : "", variant ? "_" : "", variant ? variant : "");
2187
2188 char cLocaleID[2 * ULOC_FULLNAME_CAPACITY + 2 * ULOC_KEYWORD_AND_VALUES_CAPACITY];
2189 strlcpy(cLocaleID, buf1, sizeof(cLocaleID));
2190 free(language);
2191 free(script);
2192 free(country);
2193 free(variant);
2194 free(buf1);
2195
856091c5 2196#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
cf7d2af9
A
2197 for (CFIndex idx = 0; idx < cnt; idx++) {
2198 if (keys[idx]) {
2199 char *key = __CStringFromString(keys[idx]);
8ca704e1
A
2200 char *value;
2201 if (0 == strcmp(key, "kCFLocaleCalendarKey")) {
2202 // For interchangeability convenience, we alternatively allow a
2203 // calendar object to be passed in, with the alternate key, and
2204 // we'll extract the identifier.
2205 CFCalendarRef cal = (CFCalendarRef)values[idx];
2206 CFStringRef ident = CFCalendarGetIdentifier(cal);
2207 value = __CStringFromString(ident);
2208 char *oldkey = key;
2209 key = strdup("calendar");
2210 free(oldkey);
2211 } else {
2212 value = __CStringFromString(values[idx]);
2213 }
cf7d2af9
A
2214 UErrorCode status = U_ZERO_ERROR;
2215 uloc_setKeywordValue(key, value, cLocaleID, sizeof(cLocaleID), &status);
2216 free(key);
2217 free(value);
2218 }
bd5b749c 2219 }
856091c5
A
2220#endif
2221
cf7d2af9 2222 return CFStringCreateWithCString(allocator, cLocaleID, kCFStringEncodingASCII);
bd5b749c
A
2223}
2224